In [1]:
import rolch
import numpy as np
from sklearn.datasets import load_diabetes

np.set_printoptions(precision=3, suppress=True)

# Batch Estimation

In [2]:
## Diabetes data set
## Add intercept (will not be regularized)

X, y = load_diabetes(return_X_y=True)
X = np.hstack((np.ones((X.shape[0], 1)), X))

## OLS

In [3]:
online_gamlss_ols = rolch.OnlineGamlss(
    distribution=rolch.DistributionT(), 
    method="ols"
)

online_gamlss_ols.fit(y, X, X, X)

print("OLS Coefficients \n")
print(np.vstack(online_gamlss_ols.betas).T)

OLS Coefficients 

[[150.933   3.912  10.817]
 [  0.165  -0.065   0.734]
 [-11.363  -0.111   2.061]
 [ 20.206   0.032  -0.178]
 [ 17.183   0.127   0.811]
 [-29.301  -0.836 -19.698]
 [ 17.043   0.796  16.558]
 [  0.224   0.083   8.152]
 [  5.192  -0.18    1.116]
 [ 34.289   0.412   9.576]
 [  2.442   0.06   -0.63 ]]


## LASSO

In [4]:
online_gamlss_lasso = rolch.OnlineGamlss(
    distribution=rolch.DistributionT(), 
    method="lasso", 
    estimation_kwargs={"ic" : "bic"}
)

online_gamlss_lasso.fit(y, X, X, X)

print("LASSO Coefficients \n")
print(np.vstack(online_gamlss_lasso.betas).T)

LASSO Coefficients 

[[151.844   3.943  23.519]
 [  0.26   -0.059   0.001]
 [-11.973  -0.105   0.006]
 [ 22.29    0.052  -0.   ]
 [ 16.202   0.108  -0.001]
 [-10.965  -0.002   0.021]
 [  0.012   0.053   3.693]
 [ -6.236  -0.197  -0.006]
 [  6.183  -0.131   0.003]
 [ 27.042   0.049  -4.208]
 [  2.637   0.05    1.095]]


# Incremental Fit

In [16]:
online_gamlss_lasso = rolch.OnlineGamlss(
    distribution=rolch.DistributionT(), 
    method="ols", 
    estimation_kwargs={"ic" : "bic"}, 
    # aic, bic, hqc, max (max == always select largest model --> OLS solution)
)

online_gamlss_lasso.model_selection_on_weighted_rss = False

online_gamlss_lasso.fit(
    y[:-1], 
    X[:-1, :], 
    X[:-1, :], 
    X[:-1, :]
)

print("Coefficients for the first N-1 observations \n")
print(np.vstack(online_gamlss_lasso.betas).T)

print("\nRSS for the first N-1 observations \n")
print(online_gamlss_lasso.rss)

Coefficients for the first N-1 observations 

[[151.248   3.921  11.733]
 [  0.367  -0.069   1.019]
 [-11.322  -0.112   1.398]
 [ 21.003   0.023  -0.738]
 [ 17.332   0.124   1.307]
 [-34.499  -0.712 -16.854]
 [ 20.727   0.692  13.33 ]
 [  2.526   0.045   7.264]
 [  6.299  -0.175   2.158]
 [ 35.216   0.367   9.054]
 [  2.456   0.058  -0.921]]

RSS for the first N-1 observations 

{0: 1248326.2020248915, 1: 168.7563369721031, 2: 28448516.72403799}


In [17]:
online_gamlss_lasso.update(
    y[[-1]], 
    X[[-1], :], 
    X[[-1], :], 
    X[[-1], :]
)

print("\nCoefficients after update call \n")
print(np.vstack(online_gamlss_lasso.betas).T)

print("\nRSS after update call  \n")
print(online_gamlss_lasso.rss)


Coefficients after update call 

[[151.251   3.92   11.733]
 [  0.316  -0.067   1.019]
 [-11.312  -0.112   1.398]
 [ 20.97    0.024  -0.738]
 [ 17.273   0.126   1.307]
 [-34.722  -0.699 -16.854]
 [ 20.898   0.684  13.33 ]
 [  2.795   0.03    7.264]
 [  6.477  -0.184   2.158]
 [ 35.316   0.362   9.054]
 [  2.493   0.057  -0.921]]

RSS after update call  

{0: array([1248344.208]), 1: array([168.993]), 2: array([28448590.018])}


In [24]:
sample = np.random.normal(3, 10, 100)

In [25]:
np.std(sample)

10.007207826055359

In [26]:
np.std(sample / 2)

5.003603913027679