In [None]:
import pandas as pd
import numpy as np
from sklearn.metrics import mean_absolute_error, mean_squared_error, root_mean_squared_error, r2_score 

In [8]:
X_train = pd.read_csv("../data/processed/X_train.csv")
y_train = pd.read_csv("../data/processed/y_train.csv").squeeze()

X_test = pd.read_csv("../data/processed/X_test.csv")
y_test = pd.read_csv("../data/processed/y_test.csv").squeeze()

In [4]:
from sklearn.linear_model import LinearRegression

lr_model = LinearRegression(
    fit_intercept=True,
    copy_X=True,
    n_jobs=-1,
)

lr_model.fit(X_train,y_train)

print("coeffients :",lr_model.coef_)
print("intercept :",lr_model.intercept_)

y_pred_lr = lr_model.predict(X_test)

print("first 10 predictions :",y_pred_lr[:10])
print("RMSE score :",root_mean_squared_error(y_test,y_pred_lr))
print("R2 :",r2_score(y_test,y_pred_lr))

coeffients : [[ 7.21210766e-01  3.86312742e-01 -1.08484178e-01 -5.25313301e-02
   2.85127361e-01 -4.51906139e-01  5.28704142e+00  9.02795183e-01
   1.26617830e-01 -3.86986728e-01 -1.44793961e-02 -5.81130255e+00
  -1.75127902e+02 -6.49224985e+00  1.15502124e+00  5.70626997e+00
  -8.46656882e-02]]
intercept : [13346.71524879]
first 10 predictions : [[546.50057618]
 [499.7209888 ]
 [383.43373548]
 [366.55174284]
 [291.13137725]
 [274.63567974]
 [220.00664962]
 [202.22133154]
 [176.94822014]
 [242.83815979]]
RMSE score : 31.77919977323049
R2 : 0.8763290420369362


In [5]:
from sklearn.linear_model import Ridge

rr_model = Ridge(
    alpha=1.0,
    random_state=42
)

rr_model.fit(X_train,y_train)

y_pred_rr = rr_model.predict(X_test)

print("RMSE :",root_mean_squared_error(y_test,y_pred_rr))

RMSE : 31.77800433092676


In [None]:
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.model_selection import GridSearchCV

param = {
    'n_estimators' : [100, 200, 300, 400],
    'learning_rate' : [0.05, 0.1, 0.2],
    'max_depth' : [3,5,7,9,11,13],
    'subsample' : [0.5,0.7,0.9]
}

grid = GridSearchCV(
    estimator=GradientBoostingRegressor(),
    param_grid=param,
    cv=5,
    n_jobs=-1
)

grid.fit(X_train,y_train)

y_pred_gb = grid.predict(X_test)

print("best parameters :", grid.best_params_)
print("RMSE :",root_mean_squared_error(y_test,y_pred_gb))