In [1]:
import rolch
import numpy as np
from sklearn.datasets import load_diabetes, make_regression
import matplotlib.pyplot as plt
from pprint import pprint

np.set_printoptions(precision=3, suppress=True)
print(rolch.__version__)

0.1.9


# Batch Estimation

In [2]:
## Diabetes data set
## Add intercept (will not be regularized)

X, y = load_diabetes(return_X_y=True)

## OLS

In [3]:
equation = {
    0: "all",
    1: "all",
}

online_gamlss_ols = rolch.OnlineGamlss(
    distribution=rolch.DistributionNormal(),
    method="ols",
    equation=equation,
    fit_intercept=True,
)

online_gamlss_ols.fit(X, y)

print("OLS Coefficients \n")
pprint(online_gamlss_ols.betas)

OLS Coefficients 

{0: array([152.004,  -0.621, -12.37 ,  23.288,  15.373, -28.417,  15.796,
        -0.309,   6.318,  33.086,   2.393]),
 1: array([ 3.986, -0.031, -0.02 ,  0.067,  0.045,  0.048, -0.084,  0.007,
        0.043, -0.024,  0.036])}


## LASSO

In [4]:
dist = rolch.DistributionT()
equation = {
    0: "all",  # Can also use: "intercept" or pass a numpy array with indices / boolean
    1: "all",
    2: "all",
}

online_gamlss_lasso = rolch.OnlineGamlss(
    distribution=dist,
    method="lasso",
    equation=equation,
    fit_intercept=True,
    estimation_kwargs={
        "ic": {i: "bic" for i in range(dist.n_params)},  # Change the IC if you like
        "lambda_eps": {
            i: 1e-4 for i in range(dist.n_params)
        },  # Change the design of the lambda grid
    },
    rss_tol_inner=np.inf,
)
online_gamlss_lasso.fit(X, y)

print("LASSO Coefficients \n")
print(np.vstack([*online_gamlss_lasso.betas.values()]).T)

LASSO Coefficients 

[[151.968   3.973  25.   ]
 [ -0.     -0.     -0.   ]
 [ -9.826  -0.      0.   ]
 [ 24.23    0.044  -0.   ]
 [ 13.78    0.      0.   ]
 [ -4.883  -0.     -0.   ]
 [ -0.      0.      0.   ]
 [-10.79   -0.026   0.   ]
 [  0.      0.      0.   ]
 [ 24.842   0.     -0.   ]
 [  2.194   0.      0.   ]]


# Incremental Fit

In [5]:
# Fit for all until the last observation
online_gamlss_lasso = rolch.OnlineGamlss(
    distribution=rolch.DistributionT(),
    method="lasso",
    equation=equation,
    estimation_kwargs={"ic": {i: "bic" for i in range(dist.n_params)}},
)
online_gamlss_lasso.fit(X=X[:-1, :], y=y[:-1])

print("Coefficients for the first N-1 observations \n")
print(np.vstack([*online_gamlss_lasso.betas.values()]).T)

Coefficients for the first N-1 observations 

[[152.023   3.917   2.737]
 [ -0.     -0.      0.   ]
 [-10.774  -0.      0.   ]
 [ 24.566   0.035  -0.277]
 [ 14.204   0.     -0.   ]
 [ -5.537   0.      0.   ]
 [ -0.     -0.      0.   ]
 [-10.837  -0.      0.   ]
 [  0.      0.     -0.   ]
 [ 25.509   0.     -0.   ]
 [  1.748   0.     -0.143]]


In [6]:
online_gamlss_lasso.update(X[[-1], :], y[[-1]])

print("\nCoefficients after update call \n")
print(np.vstack([*online_gamlss_lasso.betas.values()]).T)


Coefficients after update call 

[[152.144   3.933   3.475]
 [ -0.71   -0.045   0.511]
 [-12.52   -0.098   0.618]
 [ 24.582   0.037  -0.223]
 [ 15.34    0.066  -0.232]
 [-33.312  -0.407   2.879]
 [ 19.224   0.384  -2.365]
 [  2.271  -0.004  -0.51 ]
 [  7.308  -0.089   0.267]
 [ 35.061   0.189  -1.479]
 [  2.397   0.044  -0.235]]
