In [1]:
%matplotlib inline

In [2]:
from sklearn.datasets import fetch_california_housing

In [3]:
df = fetch_california_housing()
df.data, df.target

(array([[   8.3252    ,   41.        ,    6.98412698, ...,    2.55555556,
           37.88      , -122.23      ],
        [   8.3014    ,   21.        ,    6.23813708, ...,    2.10984183,
           37.86      , -122.22      ],
        [   7.2574    ,   52.        ,    8.28813559, ...,    2.80225989,
           37.85      , -122.24      ],
        ...,
        [   1.7       ,   17.        ,    5.20554273, ...,    2.3256351 ,
           39.43      , -121.22      ],
        [   1.8672    ,   18.        ,    5.32951289, ...,    2.12320917,
           39.43      , -121.32      ],
        [   2.3886    ,   16.        ,    5.25471698, ...,    2.61698113,
           39.37      , -121.24      ]]),
 array([4.526, 3.585, 3.521, ..., 0.923, 0.847, 0.894]))

In [4]:
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(df.data, df.target, test_size=0.2)
x_train.shape, y_train.shape, x_test.shape, y_test.shape

((16512, 8), (16512,), (4128, 8), (4128,))

In [5]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
x_train_scaled = scaler.fit_transform(x_train)
x_test_scaled = scaler.transform(x_test)

In [6]:
from sklearn.linear_model import LinearRegression

regression = LinearRegression()
regression.fit(x_train_scaled, y_train)

In [7]:
regression.coef_

array([ 0.82710973,  0.11623587, -0.27046587,  0.31283464, -0.00516329,
       -0.03971732, -0.90468747, -0.87475815])

In [8]:
y_pred_linear = regression.predict(x_test_scaled)
y_pred_linear, y_test

(array([1.76296628, 1.72649176, 1.97372826, ..., 4.84610941, 6.03054125,
        1.44001237]),
 array([1.249  , 3.056  , 1.447  , ..., 5.00001, 5.00001, 1.188  ]))

In [9]:
from sklearn.linear_model import Ridge

# Гребнева регресія
regression_ridge = Ridge()

In [10]:
from sklearn.linear_model import Lasso

regression_lasso = Lasso()

In [11]:
from sklearn.linear_model import PoissonRegressor

regression_poisson = PoissonRegressor()

In [15]:
models = {
    "Linear regression": regression,
    "Ridge regression": regression_ridge,
    "Lasso regression": regression_lasso,
    "Poisson regression": regression_poisson
}

models_results = {
    "Linear regression": [],
    "Ridge regression": [],
    "Lasso regression": [],
    "Poisson regression": []
}

In [16]:
from sklearn.metrics import mean_squared_error, r2_score

for model_name, model in models.items():    
    for n in [2, 4, 8]:
        x_train_sub = x_train_scaled[:, :n]
        x_test_sub = x_test_scaled[:, :n]

        model.fit(x_train_sub, y_train)
        print(f"{model_name} coef. with {n} features: {model.coef_}")
        
        y_pred = model.predict(x_test_sub)
        temp = [mean_squared_error(y_test, y_pred), r2_score(y_test, y_pred)]
        models_results[model_name].append([f"{value:.2f}" for value in temp])
    print()

Linear regression coef. with 2 features: [0.82127022 0.21578345]
Linear regression coef. with 4 features: [ 1.01873627  0.19620202 -0.54702492  0.49485377]
Linear regression coef. with 8 features: [ 0.82710973  0.11623587 -0.27046587  0.31283464 -0.00516329 -0.03971732
 -0.90468747 -0.87475815]

Ridge regression coef. with 2 features: [0.82121811 0.21576401]
Ridge regression coef. with 4 features: [ 1.01844173  0.19620351 -0.54639004  0.4942609 ]
Ridge regression coef. with 8 features: [ 0.827078    0.11631734 -0.27029591  0.31261154 -0.00513417 -0.03972243
 -0.90390477 -0.87396813]

Lasso regression coef. with 2 features: [0. 0.]
Lasso regression coef. with 4 features: [ 0.  0.  0. -0.]
Lasso regression coef. with 8 features: [ 0.  0.  0. -0. -0. -0. -0. -0.]

Poisson regression coef. with 2 features: [0.22737092 0.05478228]
Poisson regression coef. with 4 features: [ 0.22512028  0.0550206   0.00891736 -0.00926883]
Poisson regression coef. with 8 features: [ 0.21990504  0.05287644  0.

In [17]:
models_results

{'Linear regression': [['0.66', '0.51'], ['0.62', '0.53'], ['0.53', '0.60']],
 'Ridge regression': [['0.66', '0.51'], ['0.62', '0.53'], ['0.53', '0.60']],
 'Lasso regression': [['1.33', '-0.00'], ['1.33', '-0.00'], ['1.33', '-0.00']],
 'Poisson regression': [['0.78', '0.41'], ['0.78', '0.41'], ['0.74', '0.44']]}

In [46]:
import pandas as pd

table1_data = {
    'Model': list(models_results.keys()),
    '2 features': [result[0][0] for result in models_results.values()],
    '4 features': [result[1][0] for result in models_results.values()],
    '8 features': [result[2][0] for result in models_results.values()],
}

table1 = pd.DataFrame(table1_data)
table1.set_index('Model', inplace=True)

table2_data = {
    'Model': list(models_results.keys()),
    '2 features': [result[0][1] for result in models_results.values()],
    '4 features': [result[1][1] for result in models_results.values()],
    '8 features': [result[2][1] for result in models_results.values()],
}

table2 = pd.DataFrame(table2_data)
table2.set_index('Model', inplace=True)

In [47]:
table1

Unnamed: 0_level_0,2 features,4 features,8 features
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Linear regression,0.66,0.62,0.53
Ridge regression,0.66,0.62,0.53
Lasso regression,1.33,1.33,1.33
Poisson regression,0.78,0.78,0.74


In [48]:
table2

Unnamed: 0_level_0,2 features,4 features,8 features
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Linear regression,0.51,0.53,0.6
Ridge regression,0.51,0.53,0.6
Lasso regression,-0.0,-0.0,-0.0
Poisson regression,0.41,0.41,0.44
