In [20]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Исходные данные
data = pd.DataFrame({
    'Area': [50, 60, 80, 100, 120],
    'Rooms': [2, 2, 3, 4, 4],
    'Age': [5, 10, 15, 20, 25],
    'Price': [200, 250, 300, 400, 450]
})

# Разделение данных на признаки и целевую переменную
X = data[['Area', 'Rooms', 'Age']]
y = data['Price']

# Масштабирование данных (полиномиальные признаки чувствительны к масштабу)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
X_scaled = pd.DataFrame(X_scaled, columns=X.columns)

# Разделение на обучающую и тестовую выборки
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

In [21]:
from sklearn.preprocessing import PolynomialFeatures

# Создание полиномиальных признаков
poly = PolynomialFeatures(degree=2, include_bias=False)
X_train_poly = poly.fit_transform(X_train)
X_test_poly = poly.transform(X_test)

# Имена новых признаков
poly_feature_names = poly.get_feature_names_out(X.columns)
X_train_poly = pd.DataFrame(X_train_poly, columns=poly_feature_names)
X_test_poly = pd.DataFrame(X_test_poly, columns=poly_feature_names)

print("Полиномиальные признаки:\n", X_train_poly.head())

Полиномиальные признаки:
        Area     Rooms       Age    Area^2  Area Rooms  Area Age  Rooms^2  \
0  1.483651  1.118034  1.414214  2.201220    1.658772  2.098199     1.25   
1 -0.078087  0.000000  0.000000  0.006098   -0.000000 -0.000000     0.00   
2 -1.249390 -1.118034 -1.414214  1.560976    1.396861  1.766904     1.25   
3  0.702782  1.118034  0.707107  0.493902    0.785734  0.496942     1.25   

   Rooms Age  Age^2  
0   1.581139    2.0  
1   0.000000    0.0  
2   1.581139    2.0  
3   0.790569    0.5  


In [22]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

# Линейная регрессия на полиномиальных признаках
model = LinearRegression()
model.fit(X_train_poly, y_train)

# Оценка на тестовых данных
y_pred = model.predict(X_test_poly)
mse_poly = mean_squared_error(y_test, y_pred)
print("MSE на полиномиальных признаках:", mse_poly)

MSE на полиномиальных признаках: 222.8990639353712


In [18]:
from sklearn.linear_model import Lasso

# Lasso для отбора признаков
lasso = Lasso(alpha=0.12, random_state=42)
lasso.fit(X_train_poly, y_train)

# Отбор значимых признаков
selected_features = poly_feature_names[lasso.coef_ != 0]
X_train_poly_selected = X_train_poly[selected_features]
X_test_poly_selected = X_test_poly[selected_features]

print("Отобранные признаки:", selected_features)

Отобранные признаки: ['Area' 'Rooms' 'Rooms^2']


In [19]:
# Линейная регрессия на отобранных признаках
model_selected = LinearRegression()
model_selected.fit(X_train_poly_selected, y_train)

# Оценка на тестовых данных
y_pred_selected = model_selected.predict(X_test_poly_selected)
mse_selected = mean_squared_error(y_test, y_pred_selected)
print("MSE после отбора признаков:", mse_selected)

MSE после отбора признаков: 625.0


In [23]:
from sklearn.feature_selection import RFE
from sklearn.linear_model import LinearRegression

model = LinearRegression()
rfe = RFE(estimator=model, n_features_to_select=5)
X_train_rfe = rfe.fit_transform(X_train_poly, y_train)
X_test_rfe = rfe.transform(X_test_poly)

selected_features = poly_feature_names[rfe.support_]
print("Отобранные признаки (RFE):", selected_features)

Отобранные признаки (RFE): ['Area' 'Rooms' 'Age' 'Area Rooms' 'Rooms^2']


In [None]:
# Линейная регрессия на отобранных признаках
model_selected = LinearRegression()
model_selected.fit(X_train_poly_selected, y_train)

# Оценка на тестовых данных
y_pred_selected = model_selected.predict(X_test_poly_selected)
mse_selected = mean_squared_error(y_test, y_pred_selected)
print("MSE после отбора признаков:", mse_selected)