Reference: http://scikit-learn.org/stable/auto_examples/linear_model/plot_lasso_model_selection.html

In [71]:
import time

import numpy as np
import matplotlib.pyplot as plt

from sklearn.linear_model import LassoCV, LassoLarsCV, LassoLarsIC
from sklearn import datasets

In [72]:
## 前回でも使った`diabetes`のデータ・セットを読み込み
diabetes = datasets.load_diabetes()
X = diabetes.data
y = diabetes.target

In [73]:
X.shape

(442L, 10L)

In [74]:
X[1,:]

array([-0.00188202, -0.04464164, -0.05147406, -0.02632783, -0.00844872,
       -0.01916334,  0.07441156, -0.03949338, -0.06832974, -0.09220405])

In [75]:
y.shape

(442L,)

In [76]:
y[1]

75.0

In [77]:
rng = np.random.RandomState(42)

In [78]:
type(rng)

mtrand.RandomState

In [79]:
rng.randn(X.shape[0], 14)

array([[ 0.49671415, -0.1382643 ,  0.64768854, ..., -0.46572975,
         0.24196227, -1.91328024],
       [-1.72491783, -0.56228753, -1.01283112, ...,  0.11092259,
        -1.15099358,  0.37569802],
       [-0.60063869, -0.29169375, -0.60170661, ...,  0.19686124,
         0.73846658,  0.17136828],
       ..., 
       [ 0.06364906, -0.37177901, -1.47761241, ..., -0.91801906,
         0.09237679, -1.56462213],
       [ 0.14342657,  0.87152439, -1.25119914, ...,  0.13249714,
        -0.38148941, -1.32049344],
       [ 0.33748207, -0.26330781, -0.15702483, ..., -0.7487998 ,
         0.41680157,  1.16712079]])

In [80]:
rng.randn(X.shape[0], 14).shape

(442L, 14L)

In [81]:
## 11列目から14列分のrandom値をくっつける
# add some bad features
X = np.c_[X, rng.randn(X.shape[0], 14)]

In [82]:
X.shape

(442L, 24L)

In [83]:
X[1,:]

array([ -1.88201653e-03,  -4.46416365e-02,  -5.14740612e-02,
        -2.63278347e-02,  -8.44872411e-03,  -1.91633397e-02,
         7.44115641e-02,  -3.94933829e-02,  -6.83297436e-02,
        -9.22040496e-02,   1.22857208e+00,   2.20224327e+00,
         1.33547390e+00,  -7.75379030e-01,  -6.95809611e-01,
        -1.59328578e-01,   1.78455889e+00,  -1.65424052e-01,
        -1.32750749e+00,  -5.01177137e-01,   7.57056002e-01,
         6.89940277e-01,  -2.00216666e-01,   6.83414674e-01])

In [58]:
X[1,:]

array([ -1.88201653e-03,  -4.46416365e-02,  -5.14740612e-02,
        -2.63278347e-02,  -8.44872411e-03,  -1.91633397e-02,
         7.44115641e-02,  -3.94933829e-02,  -6.83297436e-02,
        -9.22040496e-02,   1.22857208e+00,   2.20224327e+00,
         1.33547390e+00,  -7.75379030e-01,  -6.95809611e-01,
        -1.59328578e-01,   1.78455889e+00,  -1.65424052e-01,
        -1.32750749e+00,  -5.01177137e-01,   7.57056002e-01,
         6.89940277e-01,  -2.00216666e-01,   6.83414674e-01,
         1.35734150e+00,  -1.69893202e-01,   5.37365425e-01,
         6.72720488e-01,  -6.28298492e-01,  -1.97528604e-01,
         1.33448235e+00,  -1.11719767e+00,   2.12791807e+00,
        -7.44872785e-01,  -1.17951746e+00,   1.32309021e-01,
         2.20834826e+00,  -1.91096594e+00])

In [85]:
np.sum(X ** 2, axis=0)

array([   1.        ,    1.        ,    1.        ,    1.        ,
          1.        ,    1.        ,    1.        ,    1.        ,
          1.        ,    1.        ,  424.60893796,  443.40316548,
        492.20106552,  426.11576877,  457.36719623,  424.08200565,
        459.43518669,  414.97417405,  455.59711474,  482.24199167,
        502.81891152,  447.26176137,  463.55135182,  387.5052586 ])

In [59]:
## データの正規化(random部分のの追加部分を元の成分と同様の正規化)
X /= np.sqrt(np.sum(X ** 2, axis=0))

In [60]:
X.shape

(442L, 38L)

In [61]:
X[1,:]

array([-0.00188202, -0.04464164, -0.05147406, -0.02632783, -0.00844872,
       -0.01916334,  0.07441156, -0.03949338, -0.06832974, -0.09220405,
        0.05962194,  0.10458416,  0.06019551, -0.03756213, -0.03253553,
       -0.00773693,  0.08325662, -0.0081206 , -0.06219371, -0.02282226,
        0.03376154,  0.0326235 , -0.00929932,  0.03471726,  0.06682611,
       -0.00806612,  0.0265543 ,  0.03407526, -0.0282502 , -0.00963381,
        0.06068646, -0.0529758 ,  0.0997042 , -0.03736597, -0.05751877,
        0.00679859,  0.10103218, -0.09219085])

In [62]:
np.sum(X ** 2, axis=0)

array([ 1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,
        1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,
        1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.])

In [63]:
(X ** 2)[1,:]

array([  3.54198621e-06,   1.99287571e-03,   2.64957898e-03,
         6.93154881e-04,   7.13809391e-05,   3.67233590e-04,
         5.53708087e-03,   1.55972729e-03,   4.66895386e-03,
         8.50158677e-03,   3.55477529e-03,   1.09378458e-02,
         3.62349994e-03,   1.41091385e-03,   1.05856086e-03,
         5.98601102e-05,   6.93166418e-03,   6.59441448e-05,
         3.86805816e-03,   5.20855767e-04,   1.13984136e-03,
         1.06429305e-03,   8.64773954e-05,   1.20528846e-03,
         4.46572936e-03,   6.50622334e-05,   7.05130875e-04,
         1.16112318e-03,   7.98073832e-04,   9.28103690e-05,
         3.68284637e-03,   2.80643572e-03,   9.94092817e-03,
         1.39621588e-03,   3.30840865e-03,   4.62208218e-05,
         1.02075018e-02,   8.49915288e-03])

In [64]:
X[1,:]

array([-0.00188202, -0.04464164, -0.05147406, -0.02632783, -0.00844872,
       -0.01916334,  0.07441156, -0.03949338, -0.06832974, -0.09220405,
        0.05962194,  0.10458416,  0.06019551, -0.03756213, -0.03253553,
       -0.00773693,  0.08325662, -0.0081206 , -0.06219371, -0.02282226,
        0.03376154,  0.0326235 , -0.00929932,  0.03471726,  0.06682611,
       -0.00806612,  0.0265543 ,  0.03407526, -0.0282502 , -0.00963381,
        0.06068646, -0.0529758 ,  0.0997042 , -0.03736597, -0.05751877,
        0.00679859,  0.10103218, -0.09219085])

In [65]:
np.sum(X ** 2, axis=0).shape

(38L,)

In [66]:
np.sqrt(np.sum(X ** 2, axis=0)).shape

(38L,)

In [67]:
np.sqrt(np.sum(X ** 2, axis=0))

array([ 1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,
        1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,
        1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.])

In [68]:
X /= np.sqrt(np.sum(X ** 2, axis=0))

In [69]:
X.shape

(442L, 38L)

In [70]:
X[1,:]

array([-0.00188202, -0.04464164, -0.05147406, -0.02632783, -0.00844872,
       -0.01916334,  0.07441156, -0.03949338, -0.06832974, -0.09220405,
        0.05962194,  0.10458416,  0.06019551, -0.03756213, -0.03253553,
       -0.00773693,  0.08325662, -0.0081206 , -0.06219371, -0.02282226,
        0.03376154,  0.0326235 , -0.00929932,  0.03471726,  0.06682611,
       -0.00806612,  0.0265543 ,  0.03407526, -0.0282502 , -0.00963381,
        0.06068646, -0.0529758 ,  0.0997042 , -0.03736597, -0.05751877,
        0.00679859,  0.10103218, -0.09219085])