# Summary Metrics of Modeling

In [132]:
import math
import csv
import pickle
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn import linear_model
from sklearn.metrics import mean_squared_error
from sklearn.metrics import median_absolute_error, mean_absolute_error
import statsmodels.api as sm
from sklearn.metrics import r2_score

from sklearn.model_selection import train_test_split, KFold, cross_val_score
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Lasso, LassoCV, Ridge
from sklearn.preprocessing import StandardScaler, PolynomialFeatures

%matplotlib inline

### Import libraries

In [133]:
with open('../data/lasso.pkl', 'rb') as f:
    lm = pickle.load(f)

In [134]:
with open('../data/ridge.pkl', 'rb') as f:
    lm2 = pickle.load(f)

In [135]:
with open('../data/scaler.pkl', 'rb') as f:
    sc = pickle.load(f)

In [136]:
with open('../data/lin_reg.pkl', 'rb') as f:
    lr = pickle.load(f)

### Import models and data

In [137]:
X_train = pd.read_csv('../data/X_train_dum_sc.csv', header=None)
X_test = pd.read_csv('../data/X_test_dum_sc.csv', header=None)
y_train = pd.read_csv('../data/y_train.csv', header=None)
y_test = pd.read_csv('../data/y_test.csv', header=None)

### Calculate regression metrics for linear-regression model, including model-score/R^2, mean-squared error, root-mean-squared error, mean-absolute error, median-absolute error.

In [138]:
lr = LinearRegression()
model_lr = lr.fit(X_train,y_train)
score1 = model_lr.score(X_train,y_train)
y_hat_lr = model_lr.predict(X_train)
mse1=mean_squared_error(y_train, y_hat_lr)
rmse1=np.sqrt(mean_squared_error(y_train, y_hat_lr))
meae1 = mean_absolute_error(y_train, y_hat_lr)
mdae1 = median_absolute_error(y_train, y_hat_lr)
X_train['y_hat_lr'] = lr.predict(X_train)
r2_1 = r2_score(y_train, y_hat_lr)

### Calculate regression metrics for regression model with Lasso-regularization, including model-score/R^2, mean-squared error, root-mean-squared error, mean-absolute error, median-absolute error.

In [139]:
lm1 = Lasso()
model_lm1 = lm1.fit(X_train,y_train)
score2 = model_lm1.score(X_train,y_train)
y_hat_lm1 = model_lm1.predict(X_train)
mse2=mean_squared_error(y_train, y_hat_lm1)
rmse2=np.sqrt(mean_squared_error(y_train, y_hat_lm1))
meae2 = mean_absolute_error(y_train, y_hat_lm1)
mdae2 = median_absolute_error(y_train, y_hat_lm1)
X_train['y_hat_lm1'] = lm1.predict(X_train)
r2_2 = r2_score(y_train, y_hat_lm1)



### Calculate regression metrics for regression model with Ridge regularization, including model-score/R^2, mean-squared error, root-mean-squared error, mean-absolute error, median-absolute error.

In [140]:
lm2 = Ridge()
model_lm2 = lm2.fit(X_train,y_train)
score3 = model_lm2.score(X_train,y_train)
y_hat_lm2 = model_lm2.predict(X_train)
mse3=mean_squared_error(y_train, y_hat_lm2)
rmse3=np.sqrt(mean_squared_error(y_train, y_hat_lm2))
meae3 = mean_absolute_error(y_train, y_hat_lm2)
mdae3 = median_absolute_error(y_train, y_hat_lm2)
X_train['y_hat_lm2'] = lm2.predict(X_train)
r2_3 = r2_score(y_train, y_hat_lm2)

# Summary Metrics Table

In [141]:
    print('| metric | LinReg | Lasso_prod| Ridge_prod|\n| --- | --- | --- | --- |')
    print(f'| {"score"} | {score1} | {score2} | {score3} |')
    print(f'| {"MSE"} | {mse1} | {mse2} | {mse3} |')
    print(f'| {"RMSE"} | {rmse1} | {rmse2} | {rmse3} |')
    print(f'| {"mean-AE"} | {meae1} | {meae2} | {meae3} |')
    print(f'| {"medn-AE"} | {mdae1} | {mdae2} | {mdae3} |')
    print(f'| {"r^2"} | {r2_1} | {r2_2} | {r2_3} |')

| metric | LinReg | Lasso_prod| Ridge_prod|
| --- | --- | --- | --- |
| score | 0.9441678745346292 | 0.9441865204471793 | 0.9442623563732895 |
| MSE | 350195921.98677963 | 350078969.2022968 | 349603303.4123507 |
| RMSE | 18713.522436644034 | 18710.397355542635 | 18697.681765725683 |
| mean-AE | 12831.536928371226 | 12810.758110519813 | 12805.973867902978 |
| medn-AE | 9302.135911006277 | 9309.497258766976 | 9242.662263545571 |
| r^2 | 0.9441678745346292 | 0.9441865204471793 | 0.9442623563732895 |


| metric | LinReg | Lasso_prod| Ridge_prod|
| --- | --- | --- | --- |
| score | 0.9441678745346292 | 0.9441865204471793 | 0.9442623563732895 |
| MSE | 350195921.98677963 | 350078969.2022968 | 349603303.4123507 |
| RMSE | 18713.522436644034 | 18710.397355542635 | 18697.681765725683 |
| mean-AE | 12831.536928371226 | 12810.758110519813 | 12805.973867902978 |
| medn-AE | 9302.135911006277 | 9309.497258766976 | 9242.662263545571 |
| r^2 | 0.9441678745346292 | 0.9441865204471793 | 0.9442623563732895 |

### Interpretation:
#### Comparing the metrics for the three models side-by-side, one might conclude that the Ridge model has the lowest error and highest score/R^2-value.

# Please continue to Notebook-5.