<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Packages" data-toc-modified-id="Packages-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Packages</a></span></li><li><span><a href="#Functions" data-toc-modified-id="Functions-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Functions</a></span><ul class="toc-item"><li><span><a href="#Classification" data-toc-modified-id="Classification-2.1"><span class="toc-item-num">2.1&nbsp;&nbsp;</span>Classification</a></span></li><li><span><a href="#Regression" data-toc-modified-id="Regression-2.2"><span class="toc-item-num">2.2&nbsp;&nbsp;</span>Regression</a></span></li><li><span><a href="#Bayesian-loss" data-toc-modified-id="Bayesian-loss-2.3"><span class="toc-item-num">2.3&nbsp;&nbsp;</span>Bayesian loss</a></span></li></ul></li><li><span><a href="#Data" data-toc-modified-id="Data-3"><span class="toc-item-num">3&nbsp;&nbsp;</span>Data</a></span></li><li><span><a href="#Grid-Search" data-toc-modified-id="Grid-Search-4"><span class="toc-item-num">4&nbsp;&nbsp;</span>Grid Search</a></span></li><li><span><a href="#Bayesian-optimization" data-toc-modified-id="Bayesian-optimization-5"><span class="toc-item-num">5&nbsp;&nbsp;</span>Bayesian optimization</a></span></li></ul></div>

___
# Packages

In [1]:
%load_ext nb_black

import sys

import numpy as np
import pandas as pd
from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.impute import SimpleImputer
from sklearn.metrics import log_loss
from sklearn.linear_model import BayesianRidge, LinearRegression, Ridge
from scipy.optimize import approx_fprime, minimize
from scipy.special import expit
from scipy.stats import multivariate_normal

sys.path.append("/home/capitaine/01_projects/2023/cr_model/cr-model-research")
from cr_model.model.bayesian_models import BayesianLogisticRegression

2024-03-24 16:51:47.494329: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-03-24 16:51:47.496058: I external/local_tsl/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2024-03-24 16:51:47.520283: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-03-24 16:51:47.520313: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-03-24 16:51:47.520988: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to

<IPython.core.display.Javascript object>

___
# Functions

## Classification

In [2]:
def loss(
    beta: np.ndarray,
    X: np.ndarray,
    y: np.ndarray,
    m: np.ndarray,
    q: np.ndarray,
    r: float,
):
    linear_pred = np.dot(X, beta)
    y_pred = expit(linear_pred)

    weights = np.ones(y.shape)
    weights[y == 0] = 1 / r

    loss = (
        log_loss(y, y_pred, sample_weight=weights, normalize=False)
        + 0.5 * np.dot(np.multiply(q, np.subtract(beta, m)), np.subtract(beta, m))
        + 0.5 * (np.sum(np.log(1 / q)) + len(q) * np.log(2 * np.pi))
    )

    return loss


def jac(
    beta: np.ndarray,
    X: np.ndarray,
    y: np.ndarray,
    m: np.ndarray,
    q: np.ndarray,
    r: float,
):
    linear_pred = np.dot(X, beta)
    y_pred = expit(linear_pred)

    weights = np.ones(y.shape)
    weights[y == 0] = 1 / r

    jac = np.add(
        -np.dot(weights * (y - y_pred), X),
        np.multiply(q, np.subtract(beta, m)),
    )

    return jac


def hess(
    beta: np.ndarray,
    X: np.ndarray,
    y: np.ndarray,
    m: np.ndarray,
    q: np.ndarray,
    r: float,
):
    weights = np.ones(y.shape)
    weights[y == 0] = 1 / r

    linear_pred = np.dot(X, beta)
    y_pred = expit(linear_pred)

    hess = np.diag(q) + np.matmul(
        np.matmul(
            X.T,
            np.diag(np.multiply(weights, np.multiply(y_pred, 1 - y_pred))),
        ),
        X,
    )
    return hess

<IPython.core.display.Javascript object>

In [3]:
beta = np.random.normal(size=4)
X = np.random.normal(size=(10, 4))
y = np.random.choice([0, 1], size=10)
m = np.random.normal(size=4)
q = np.exp(np.random.normal(size=4))
r = 1

<IPython.core.display.Javascript object>

In [4]:
approx_fprime(
    beta,
    loss,
    1.4901161193847656e-08,
    *(X, y, m, q, r),
)

array([-4.21219373, -8.2543025 ,  1.67136264,  1.19512987])

<IPython.core.display.Javascript object>

In [5]:
jac(beta, X, y, m, q, r)

array([-4.21219364, -8.25430268,  1.6713624 ,  1.19512984])

<IPython.core.display.Javascript object>

In [6]:
approx_fprime(
    beta,
    jac,
    1.4901161193847656e-08,
    *(X, y, m, q, r),
)

array([[ 2.03404027,  0.65171725, -0.03051043,  0.45034492],
       [ 0.65171719,  3.54062629, -0.16870368,  0.43155241],
       [-0.03051038, -0.16870359,  1.10525566, -0.26152341],
       [ 0.45034501,  0.43155238, -0.26152341,  3.80380088]])

<IPython.core.display.Javascript object>

In [7]:
hess(beta, X, y, m, q, r)

array([[ 2.03404035,  0.65171727, -0.03051039,  0.45034501],
       [ 0.65171727,  3.54062639, -0.16870359,  0.43155239],
       [-0.03051039, -0.16870359,  1.10525566, -0.26152342],
       [ 0.45034501,  0.43155239, -0.26152342,  3.80380089]])

<IPython.core.display.Javascript object>

## Regression

In [141]:
def loss(
    beta: np.ndarray,
    X: np.ndarray,
    y: np.ndarray,
    m: np.ndarray,
    q: np.ndarray,
    r: float,
    scale=None,
):
    linear_pred = np.dot(X, beta)
    y_pred = linear_pred

    if scale is None:
        scale = _estimate_scale(X, y, y_pred, r, bool(np.all(q == 0)))

    weights = np.ones(y.shape)
    weights[y == 0] = 1 / r

    loss = (
        np.sum(
            np.multiply(
                weights,
                1 / 2 * np.divide(np.power(np.subtract(y, y_pred), 2), scale),
            )
        )
        - np.sum(np.log(weights)) / 2
        + 0.5 * len(y) * (np.log(scale) + np.log(2 * np.pi))
        + np.sum(
            1
            / 2
            * np.multiply(
                np.power(np.subtract(beta, m), 2),
                q,
            )
        )
        + 0.5 * (np.sum(np.log(1 / q)) + len(q) * np.log(2 * np.pi))
    )

    return loss


def jac(
    beta: np.ndarray,
    X: np.ndarray,
    y: np.ndarray,
    m: np.ndarray,
    q: np.ndarray,
    r: float,
    scale=None,
):
    linear_pred = np.dot(X, beta)
    y_pred = linear_pred

    if scale is None:
        scale = _estimate_scale(X, y, y_pred, r, bool(np.all(q == 0)))

    weights = np.ones(y.shape)
    weights[y == 0] = 1 / r

    jac = -np.sum(
        np.multiply(
            np.expand_dims(
                np.multiply(
                    weights,
                    np.divide(np.subtract(y, y_pred), scale),
                ),
                axis=1,
            ),
            np.multiply(
                np.expand_dims(
                    np.array([1] * len(y_pred)),
                    axis=1,
                ),
                X,
            ),
        ),
        axis=0,
    ) + np.multiply(np.subtract(beta, m), q)

    return jac


def hess(
    beta: np.ndarray,
    X: np.ndarray,
    y: np.ndarray,
    m: np.ndarray,
    q: np.ndarray,
    r: float,
    scale=None,
):
    linear_pred = np.dot(X, beta)
    y_pred = linear_pred

    if scale is None:
        scale = _estimate_scale(X, y, y_pred, r, bool(np.all(q == 0)))

    weights = np.ones(y.shape)
    weights[y == 0] = 1 / r

    hess = np.add(
        np.matmul(
            np.matmul(X.T, np.diag(weights) / scale),
            X,
        ),
        np.diag(q),
    )
    return hess


def _estimate_scale(
    X: np.ndarray,
    y: np.ndarray,
    y_pred: np.ndarray,
    r: float,
    without_prior: bool,
):
    weights = np.ones(y.shape)
    weights[y == 0] = 1 / r
    if without_prior:
        df_resid = np.sum(weights) - X.shape[1]
    else:
        df_resid = np.sum(weights)

    if df_resid <= 0:
        raise ValueError(f"Scale estimation is wrong since df_resid={df_resid}.")

    return np.sum(np.multiply(np.power(y - y_pred, 2), weights)) / df_resid

<IPython.core.display.Javascript object>

In [152]:
beta = np.random.normal(size=4)
X = np.random.normal(size=(10, 4))
y = np.random.normal(size=10)
m = np.random.normal(size=4)
q = np.exp(np.random.normal(size=4))
r = 1

<IPython.core.display.Javascript object>

In [153]:
loss(beta, *(X, y, m, q, r, 2))

27.601855668276723

<IPython.core.display.Javascript object>

In [154]:
multivariate_normal.logpdf(
    y, mean=np.dot(X, beta), cov=2 * np.eye(len(y))
) + multivariate_normal.logpdf(beta, mean=m, cov=np.diag(1 / q))

-27.601855668276716

<IPython.core.display.Javascript object>

In [155]:
approx_fprime(
    beta,
    loss,
    1.4901161193847656e-08,
    *(X, y, m, q, r, 2),
)

array([-0.93074703,  2.35958982, -6.39777231, -2.75568271])

<IPython.core.display.Javascript object>

In [156]:
jac(beta, X, y, m, q, r, 2)

array([-0.93074679,  2.35959014, -6.39777206, -2.75568278])

<IPython.core.display.Javascript object>

In [157]:
approx_fprime(
    beta,
    jac,
    1.4901161193847656e-08,
    *(X, y, m, q, r, 2),
)

array([[ 6.62139761,  3.1840551 , -1.7851456 ,  0.94606332],
       [ 3.18405515,  5.21584272, -1.29461098,  0.94209462],
       [-1.78514564, -1.29461098,  3.51416832,  0.13655412],
       [ 0.94606331,  0.94209462,  0.13655412,  3.53591296]])

<IPython.core.display.Javascript object>

In [158]:
hess(beta, X, y, m, q, r, 2)

array([[ 6.6213976 ,  3.18405509, -1.78514563,  0.94606329],
       [ 3.18405509,  5.21584269, -1.29461102,  0.94209462],
       [-1.78514563, -1.29461102,  3.51416831,  0.13655412],
       [ 0.94606329,  0.94209462,  0.13655412,  3.53591296]])

<IPython.core.display.Javascript object>

## Bayesian loss

In [23]:
def func_with_bias(log_sigma2, X, y):
    n_feat = X.shape[1]
    log_sigma2 = np.array([log_sigma2[0]] * n_feat)
    diag_sigma2 = np.exp(log_sigma2)

    m = np.array([0] * n_feat)
    q = 1 / diag_sigma2
    r = 1

    res = minimize(
        loss,
        np.array([0] * n_feat),
        args=(X, y, m, q, r),
        method="L-BFGS-B",
        jac=jac,
    )

    theta_star = res.x

    H = hess(theta_star, X, y, m, q, r)

    out = (
        loss(theta_star, X, y, m, q, r)
        - 0.5 * n_feat * np.log(2 * np.pi)
        + 0.5 * np.linalg.slogdet(H)[1]
    )
    return out


def jac_func_with_bias(log_sigma2, X, y):
    h = np.log(1 + 1e-1)
    jac_list = []
    for ii in range(len(log_sigma2)):
        xk = np.copy(log_sigma2)
        xk[ii] += h
        fk_plus_h = func_with_bias(xk, X, y)
        xk[ii] -= 2 * h
        fk_minus_h = func_with_bias(xk, X, y)
        jac_list.append((fk_plus_h - fk_minus_h) / 2 * h)
    return np.array(jac_list)

<IPython.core.display.Javascript object>

___
# Data

In [8]:
data = pd.read_csv(
    "/home/capitaine/01_projects/2024/bayesian_optimization/data/titanic/train.csv"
)
y = data.pop("Survived")
data["CabinCat"] = data["Cabin"].astype(str).str[0]
data["Embarked"] = data["Embarked"].fillna("unknown")
X = data[["Pclass", "Sex", "Embarked", "CabinCat", "Age", "SibSp", "Parch"]].copy()

<IPython.core.display.Javascript object>

___
# Grid Search

In [29]:
preprocessor = ColumnTransformer(
    [
        (
            "categorical_features_wo_nan",
            OneHotEncoder(handle_unknown="ignore", sparse_output=False),
            ["Pclass", "Sex", "Embarked", "CabinCat"],
        ),
        (
            "numerical_features_w_nan",
            Pipeline(
                [
                    ("imputer", SimpleImputer(strategy="mean")),
                ]
            ),
            ["Age"],
        ),
    ],
    remainder="passthrough",
)
preprocessor.set_output(transform="pandas")
pipe = Pipeline(
    [
        ("preprocessing", preprocessor),
        ("classifier", LogisticRegression(fit_intercept=False)),
    ]
)

<IPython.core.display.Javascript object>

In [30]:
param_grid = {
    "classifier__C": [
        0.001,
        0.1,
        0.2,
        0.5,
        1,
        1.1,
        1.2,
        1.3,
        1.4,
        1.5,
        1.6,
        2,
        3,
        4,
        5,
        6,
        7,
        8,
        9,
        10,
        100,
        1000,
    ],
    "classifier__fit_intercept": [False],
}

<IPython.core.display.Javascript object>

In [31]:
grid_search = GridSearchCV(pipe, param_grid, return_train_score=True)

<IPython.core.display.Javascript object>

In [32]:
grid_search.fit(X, y)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

<IPython.core.display.Javascript object>

In [33]:
pd.DataFrame(grid_search.cv_results_)

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_classifier__C,param_classifier__fit_intercept,params,split0_test_score,split1_test_score,split2_test_score,...,mean_test_score,std_test_score,rank_test_score,split0_train_score,split1_train_score,split2_train_score,split3_train_score,split4_train_score,mean_train_score,std_train_score
0,0.023432,0.010684,0.007818,0.001402,0.001,False,"{'classifier__C': 0.001, 'classifier__fit_inte...",0.620112,0.61236,0.623596,...,0.618404,0.005097,22,0.617978,0.618513,0.615708,0.615708,0.618513,0.617284,0.001301
1,0.023018,0.002659,0.010216,0.00665,0.1,False,"{'classifier__C': 0.1, 'classifier__fit_interc...",0.798883,0.797753,0.797753,...,0.796855,0.01607,14,0.811798,0.820477,0.816269,0.805049,0.805049,0.811728,0.006106
2,0.032645,0.008438,0.01331,0.007797,0.2,False,"{'classifier__C': 0.2, 'classifier__fit_interc...",0.804469,0.797753,0.797753,...,0.799096,0.017968,9,0.81882,0.824684,0.813464,0.806452,0.805049,0.813694,0.007407
3,0.056861,0.033233,0.008583,0.001173,0.5,False,"{'classifier__C': 0.5, 'classifier__fit_interc...",0.793296,0.792135,0.803371,...,0.801356,0.016003,1,0.813202,0.824684,0.817672,0.814867,0.812062,0.816497,0.004508
4,0.041501,0.010407,0.008204,0.000278,1.0,False,"{'classifier__C': 1, 'classifier__fit_intercep...",0.793296,0.792135,0.803371,...,0.801356,0.012951,1,0.808989,0.821879,0.826087,0.819074,0.809257,0.817057,0.006853
5,0.040746,0.01371,0.010899,0.007198,1.1,False,"{'classifier__C': 1.1, 'classifier__fit_interc...",0.793296,0.792135,0.797753,...,0.799109,0.013839,5,0.808989,0.821879,0.824684,0.819074,0.813464,0.817618,0.005691
6,0.074257,0.041819,0.013516,0.007595,1.2,False,"{'classifier__C': 1.2, 'classifier__fit_interc...",0.793296,0.792135,0.792135,...,0.797985,0.014128,10,0.808989,0.821879,0.824684,0.820477,0.814867,0.818179,0.005599
7,0.040473,0.008164,0.01466,0.008732,1.3,False,"{'classifier__C': 1.3, 'classifier__fit_interc...",0.793296,0.792135,0.797753,...,0.800232,0.012971,3,0.808989,0.823282,0.826087,0.820477,0.812062,0.818179,0.006569
8,0.039534,0.015328,0.015991,0.012289,1.4,False,"{'classifier__C': 1.4, 'classifier__fit_interc...",0.798883,0.792135,0.792135,...,0.797979,0.011793,11,0.808989,0.821879,0.826087,0.820477,0.810659,0.817618,0.006647
9,0.061271,0.018972,0.00872,0.000915,1.5,False,"{'classifier__C': 1.5, 'classifier__fit_interc...",0.798883,0.792135,0.797753,...,0.799102,0.011445,6,0.810393,0.823282,0.823282,0.820477,0.813464,0.81818,0.005295


<IPython.core.display.Javascript object>

In [23]:
grid_search.best_estimator_[-1].coef_

array([[ 1.11651034,  0.69121408, -0.40844652,  1.98443979, -0.58516189,
         0.66465723,  0.49527933,  0.15284235,  0.08649899,  0.13085689,
         0.33989409, -0.09199886,  0.5444967 ,  0.86483199,  0.37754538,
        -0.21288019, -0.08767163, -0.46579647, -0.03348929, -0.26217552,
        -0.06106408]])

<IPython.core.display.Javascript object>

In [24]:
preprocessor.fit_transform(X)

Unnamed: 0,categorical_features_wo_nan__Pclass_1,categorical_features_wo_nan__Pclass_2,categorical_features_wo_nan__Pclass_3,categorical_features_wo_nan__Sex_female,categorical_features_wo_nan__Sex_male,categorical_features_wo_nan__Embarked_C,categorical_features_wo_nan__Embarked_Q,categorical_features_wo_nan__Embarked_S,categorical_features_wo_nan__Embarked_unknown,categorical_features_wo_nan__CabinCat_A,...,categorical_features_wo_nan__CabinCat_C,categorical_features_wo_nan__CabinCat_D,categorical_features_wo_nan__CabinCat_E,categorical_features_wo_nan__CabinCat_F,categorical_features_wo_nan__CabinCat_G,categorical_features_wo_nan__CabinCat_T,categorical_features_wo_nan__CabinCat_n,numerical_features_w_nan__Age,remainder__SibSp,remainder__Parch
0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,22.000000,1,0
1,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,38.000000,1,0
2,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,26.000000,0,0
3,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,35.000000,1,0
4,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,35.000000,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
886,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,27.000000,0,0
887,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,19.000000,0,0
888,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,29.699118,1,2
889,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,26.000000,0,0


<IPython.core.display.Javascript object>

___
# Bayesian optimization

In [13]:
def callback(intermediate_result):
    print(f"fun={intermediate_result.fun}")

<IPython.core.display.Javascript object>

In [24]:
res = minimize(
    func_with_bias,
    np.array([0.0]),
    args=(
        preprocessor.fit_transform(X).to_numpy(),
        y.to_numpy(),
    ),
    method="L-BFGS-B",
    jac=jac_func_with_bias,
    callback=callback,
)

fun=416.015700488111
fun=415.7457521852242
fun=415.7028985462279
fun=415.70233681909383
fun=415.70095393097137
fun=415.7002621046415
fun=415.6994301589399


<IPython.core.display.Javascript object>

In [25]:
res

  message: ABNORMAL_TERMINATION_IN_LNSRCH
  success: False
   status: 2
      fun: 415.7002601196556
        x: [-3.364e-01]
      nit: 7
      jac: [ 7.452e-05]
     nfev: 64
     njev: 64
 hess_inv: <1x1 LbfgsInvHessProduct with dtype=float64>

<IPython.core.display.Javascript object>

In [27]:
1 / np.exp(res.x)

array([1.39996636])

<IPython.core.display.Javascript object>

In [18]:
X1 = preprocessor.fit_transform(X).to_numpy()
y1 = y.to_numpy()

<IPython.core.display.Javascript object>

In [182]:
res = minimize(
    func_with_bias,
    np.array([0.0]),
    args=(
        X1,
        y1,
    ),
    method="L-BFGS-B",
    jac=jac_func_with_bias,
    callback=callback,
)

fun=917.3936949762424
fun=909.2815416894933
fun=908.7238866764843
fun=908.5830502425201


<IPython.core.display.Javascript object>

In [183]:
res

  message: CONVERGENCE: NORM_OF_PROJECTED_GRADIENT_<=_PGTOL
  success: True
   status: 0
      fun: 908.5830502425201
        x: [-3.176e+00]
      nit: 4
      jac: [ 6.802e-06]
     nfev: 8
     njev: 8
 hess_inv: <1x1 LbfgsInvHessProduct with dtype=float64>

<IPython.core.display.Javascript object>

In [184]:
np.exp(res.x)

array([0.04176261])

<IPython.core.display.Javascript object>

In [185]:
model = BayesianRidge(
    alpha_1=1 / 1 * 1e6,
    alpha_2=1e6,
    lambda_1=1e-6,
    lambda_2=1e-6,
    fit_intercept=False,
)
model.fit(X1, y1)

<IPython.core.display.Javascript object>

In [191]:
np.log(1 / model.lambda_)

-3.189134422067746

<IPython.core.display.Javascript object>

In [20]:
for log_sigma2 in [
    -4,
    -3,
    -2,
    -1,
    0,
    1,
    2,
    3,
    4,
    5,
]:
    print(
        func_with_bias(
            np.array([log_sigma2]),
            X1,
            y1,
        )
    )

481.2375065318942
447.80239593850894
426.7367949669429
417.1934379524614
416.017499735816
419.8408348902286
426.16091279686424
433.5910798852872
441.47220323542643
449.58914179809045


<IPython.core.display.Javascript object>

In [18]:
1 / np.exp(res.x)

array([3.81102697e-22])

<IPython.core.display.Javascript object>

In [127]:
res

  message: CONVERGENCE: NORM_OF_PROJECTED_GRADIENT_<=_PGTOL
  success: True
   status: 0
      fun: 329.1250092965352
        x: [ 4.932e+01]
      nit: 3
      jac: [ 0.000e+00]
     nfev: 6
     njev: 6
 hess_inv: <1x1 LbfgsInvHessProduct with dtype=float64>

<IPython.core.display.Javascript object>

In [130]:
loss(np.array([1.0] * 21), preprocessor.fit_transform(X), y, np.array([2.0] * 21))

17095.442744711374

<IPython.core.display.Javascript object>

In [6]:
approx_fprime(
    np.array([1.0] * 21),
    jac,
    1.4901161193847656e-08,
    *(preprocessor.fit_transform(X), y, np.array([2.0] * 21)),
)

array([[5.00532150e-01, 0.00000000e+00, 0.00000000e+00, 1.23977661e-04,
        4.08172607e-04, 0.00000000e+00, 0.00000000e+00, 5.32150269e-04,
        0.00000000e+00, 4.57763672e-05, 0.00000000e+00, 4.86373901e-04,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
        0.00000000e+00, 0.00000000e+00, 7.62939453e-04, 4.86373901e-04,
        1.06430054e-03],
       [0.00000000e+00, 5.05719185e-01, 0.00000000e+00, 4.68254089e-04,
        5.24997711e-03, 9.55581665e-04, 0.00000000e+00, 4.76360321e-03,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
        0.00000000e+00, 0.00000000e+00, 8.10623169e-04, 0.00000000e+00,
        0.00000000e+00, 4.90856171e-03, 6.50882721e-03, 4.06169891e-03,
        7.76195526e-03],
       [0.00000000e+00, 0.00000000e+00, 5.09937286e-01, 4.88662720e-03,
        5.05065918e-03, 6.29043579e-03, 1.90734863e-05, 3.62777710e-03,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
        0.0000

<IPython.core.display.Javascript object>

In [9]:
r = 1
beta = np.array([1.0] * 21)
m = np.array([0.0] * 21)
q = 1 / np.array([2.0] * 21)

weights = np.ones(y.shape)
weights[y == 0] = 1 / r

linear_pred = np.dot(preprocessor.fit_transform(X), beta)
y_pred = BayesianLogisticRegression().link._inv_link(linear_pred)

hess = np.diag(q) + np.matmul(
    np.matmul(
        preprocessor.fit_transform(X).T.to_numpy(),
        np.diag(np.multiply(weights, np.multiply(y_pred, 1 - y_pred))),
    ),
    preprocessor.fit_transform(X).to_numpy(),
)

<IPython.core.display.Javascript object>

In [10]:
hess

array([[5.00531936e-01, 0.00000000e+00, 0.00000000e+00, 1.23386632e-04,
        4.08549642e-04, 1.39942169e-09, 0.00000000e+00, 5.31934874e-04,
        0.00000000e+00, 4.53958078e-05, 2.12966051e-08, 4.86518289e-04,
        8.30318036e-10, 3.97397670e-11, 0.00000000e+00, 0.00000000e+00,
        0.00000000e+00, 1.03952402e-11, 7.62715736e-04, 4.86534540e-04,
        1.06386289e-03],
       [0.00000000e+00, 5.05719221e-01, 0.00000000e+00, 4.68690204e-04,
        5.25053032e-03, 9.55623353e-04, 3.99680289e-15, 4.76359717e-03,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
        1.87960758e-12, 3.68594044e-14, 8.10555835e-04, 0.00000000e+00,
        0.00000000e+00, 4.90866468e-03, 6.50974333e-03, 4.06259720e-03,
        7.76258607e-03],
       [0.00000000e+00, 0.00000000e+00, 5.09934095e-01, 4.88596690e-03,
        5.04812852e-03, 6.28940733e-03, 1.91256391e-05, 3.62556246e-03,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
        0.0000

<IPython.core.display.Javascript object>