# 1. 선형 회귀

In [None]:
import pandas as pd

df = pd.read_csv("../data/rider_data.csv")
df.head()

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, r2_score

X_train, X_test, y_train, y_test = train_test_split(df.drop("Time_min", axis=1), df["Time_min"], random_state=3333)

print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

In [None]:
model = LinearRegression()

model.fit(X_train, y_train)

In [None]:
pred = model.predict(X_test)
pred

In [None]:
mae = mean_absolute_error(y_test, pred)
mae

In [None]:
model.coef_

coef_df = pd.DataFrame({
    'feature': ["Distance_km", "Riders_Available", "Weather_Index",	"Rush_Hour"],
    'coef': model.coef_
})
print(coef_df)


In [None]:
# 절편
print("절편: ", model.intercept_)

In [None]:
# 평가
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import numpy as np

mae = mean_absolute_error(y_test, pred)
mse = mean_squared_error(y_test, pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, pred)

print(f"MAE : {mae}\nMSE: {mse}\nRMSE : {rmse}\nR2: {r2}")


In [None]:
from sklearn.linear_model import Ridge, Lasso
from sklearn.model_selection import GridSearchCV # 실전 : 옴튜나

r_model = Ridge() # L2 규제 : 2는 제곱
l_model = Lasso() # L1 규제 : 절대값

r_param = {"alpha": [0.001, 0.01, 0.1, 1, 10, 100]}
r_grid = GridSearchCV(r_model, param_grid=r_param, cv=5)
l_grid = GridSearchCV(l_model, param_grid=r_param, cv=5)


r_grid.fit(X_train, y_train)

l_pred = r_grid.best_estimator_.predict(X_test)
l_mae = mean_absolute_error(y_test, l_pred)
l_r2 = r2_score(y_test, l_pred)

print("Ridge param(alpha)", r_grid.best_params_)
print("Ridge score(alpha)", r_grid.best_score_)
print("Ridge mae", l_mae)
print("Ridge r2", l_r2)

print("="*50)

l_grid.fit(X_train, y_train)
l_pred = l_grid.best_estimator_.predict(X_test)
l_mae = mean_absolute_error(y_test, l_pred)
l_r2 = r2_score(y_test, l_pred)

print("Ridge param(alpha)", l_grid.best_params_)
print("Ridge score(alpha)", l_grid.best_score_)
print("Ridge mae", l_mae)
print("Ridge r2", l_r2)


# 2. 다항 회귀

In [None]:
from sklearn.preprocessing import PolynomialFeatures

poly_model = PolynomialFeatures(degree=2, include_bias=False)

print(X_train.shape)

X_train_poly = poly_model.fit_transform(X_train)
X_test_poly = poly_model.transform(X_test)

model = LinearRegression()

model.fit(X_train_poly, y_train)

pred = model.predict(X_test_poly)

r2 = r2_score(y_test, pred)


In [None]:
# a, b, c, d, a2, b2, c2, d2, ab, ac, ad, bc, bd, cd