In [113]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression, LogisticRegression, Ridge, Lasso, ElasticNet
from sklearn.preprocessing import MinMaxScaler, PolynomialFeatures
from sklearn.model_selection import train_test_split

from sklearn.pipeline import Pipeline, FeatureUnion
from sklearn.compose import ColumnTransformer, make_column_selector
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

In [114]:
data = pd.read_csv('lr.csv')
data

Unnamed: 0,sale_price,retail_price,shoe_size,Alaska,Arizona,Arkansas,California,Colorado,Connecticut,Delaware,...,West Virginia,Wisconsin,Wyoming,Off-White,order_day,order_month,order_year,release_day,release_month,release_year
0,1097.0,220.0,11.0,0,0,0,1,0,0,0,...,0,0,0,0,1,9,2017,24,9,2016
1,685.0,220.0,11.0,0,0,0,1,0,0,0,...,0,0,0,0,1,9,2017,23,11,2016
2,690.0,220.0,11.0,0,0,0,1,0,0,0,...,0,0,0,0,1,9,2017,23,11,2016
3,1075.0,220.0,11.5,0,0,0,0,0,0,0,...,0,0,0,0,1,9,2017,23,11,2016
4,828.0,220.0,11.0,0,0,0,0,0,0,0,...,0,0,0,0,1,9,2017,11,2,2017
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
99951,565.0,220.0,8.0,0,0,0,0,0,0,0,...,0,0,0,0,13,2,2019,26,12,2018
99952,598.0,220.0,8.5,0,0,0,1,0,0,0,...,0,0,0,0,13,2,2019,26,12,2018
99953,605.0,220.0,5.5,0,0,0,0,0,0,0,...,0,0,0,0,13,2,2019,26,12,2018
99954,650.0,220.0,11.0,0,0,0,1,0,0,0,...,0,0,0,0,13,2,2019,26,12,2018


In [115]:
X = data.drop(columns='sale_price')
y = data.sale_price

In [116]:
X_train, X_test, y_train, y_test = train_test_split(
    X,
    y,
    test_size=0.2,
    random_state=42
)

In [117]:
pipeline = Pipeline([
    ("poly", PolynomialFeatures(degree=2, include_bias=False)),
    ('scaler', MinMaxScaler()),
    ('model', LinearRegression())
])

pipeline.fit(X_train, y_train)
score = pipeline.score(X_test, y_test)
score

0.7135469936477266

In [118]:
predict = pipeline.predict(X_test)
predict

array([300.39700292, 601.94978284, 318.63420463, ..., 348.09669042,
       365.59829615, 553.16548046], shape=(19992,))

In [119]:


mse = mean_squared_error(y_test, predict)
rmse = np.sqrt(mse)
mae = mean_absolute_error(y_test, predict)
r2 = r2_score(y_test, predict)

print("MSE :", mse)
print("RMSE:", rmse)
print("MAE :", mae)
print("R²  :", r2)

MSE : 18521.442282425596
RMSE: 136.09350565851992
MAE : 80.78702388045366
R²  : 0.7135469936477266


In [120]:
from numpy import sqrt

print(sqrt(18521))

136.09188072769072
