In [1]:
%matplotlib inline

In [2]:
from sklearn.datasets import fetch_california_housing

In [3]:
df = fetch_california_housing()
df.data, df.target

(array([[   8.3252    ,   41.        ,    6.98412698, ...,    2.55555556,
           37.88      , -122.23      ],
        [   8.3014    ,   21.        ,    6.23813708, ...,    2.10984183,
           37.86      , -122.22      ],
        [   7.2574    ,   52.        ,    8.28813559, ...,    2.80225989,
           37.85      , -122.24      ],
        ...,
        [   1.7       ,   17.        ,    5.20554273, ...,    2.3256351 ,
           39.43      , -121.22      ],
        [   1.8672    ,   18.        ,    5.32951289, ...,    2.12320917,
           39.43      , -121.32      ],
        [   2.3886    ,   16.        ,    5.25471698, ...,    2.61698113,
           39.37      , -121.24      ]]),
 array([4.526, 3.585, 3.521, ..., 0.923, 0.847, 0.894]))

In [4]:
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(df.data, df.target, test_size=0.2)
x_train.shape, y_train.shape, x_test.shape, y_test.shape

((16512, 8), (16512,), (4128, 8), (4128,))

In [5]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
x_train_scaled = scaler.fit_transform(x_train)
x_test_scaled = scaler.transform(x_test)

In [6]:
from sklearn.linear_model import LinearRegression

regression = LinearRegression()
regression.fit(x_train_scaled, y_train)

In [7]:
regression.coef_

array([ 0.82883967,  0.11985841, -0.2811711 ,  0.32508396, -0.0032564 ,
       -0.04128723, -0.89581413, -0.8656952 ])

In [8]:
y_pred_linear = regression.predict(x_test_scaled)
y_pred_linear, y_test

(array([2.16514447, 0.94870051, 1.62664135, ..., 2.27058505, 2.48054228,
        1.89009559]),
 array([5.00001, 1.1    , 1.332  , ..., 2.298  , 2.227  , 2.169  ]))

In [9]:
from sklearn.linear_model import Ridge

# Гребнева регресія
regression_ridge = Ridge()
regression_ridge.fit(x_train_scaled, y_train)

In [10]:
from sklearn.linear_model import Lasso

regression_lasso = Lasso()
regression_lasso.fit(x_train_scaled, y_train)

In [11]:
from sklearn.linear_model import PoissonRegressor

regression_poisson = PoissonRegressor()
regression_poisson.fit(x_train_scaled, y_train)

In [17]:
models = {
    "Linear regression": regression,
    "Ridge regression": regression_ridge,
    "Lasso regression": regression_lasso,
    "Poisson regression": regression_poisson
}

models_results = {}

In [18]:
from sklearn.metrics import mean_squared_error, r2_score

for model_name, model in models.items():
    for n in [2, 4, 8]:
        x_train_sub = x_train_scaled[:, :n]
        x_test_sub = x_test_scaled[:, :n]
        
        print(f"{model_name} coef. with {n} features: {model.coef_}")
        
        y_pred = model.predict(x_test_sub)
        temp = [mean_squared_error(y_test, y_pred), r2_score(y_test, y_pred)]
        models_results[model_name].append([f"{value:.2f}" for value in temp])

Linear regression coef. with 2 features: [ 0.82883967  0.11985841 -0.2811711   0.32508396 -0.0032564  -0.04128723
 -0.89581413 -0.8656952 ]


ValueError: X has 2 features, but LinearRegression is expecting 8 features as input.

In [19]:
models_results

{}