In [68]:
import pandas as ps
import plotly.express as px
import numpy as np
from scipy.stats import pearsonr
import joblib
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from sklearn.metrics import mean_absolute_error, mean_squared_error, mean_squared_log_error

x_test = ps.read_csv('../data/split/x_test.csv', engine='python', sep=',')
y_test = ps.read_csv('../data/split/y_test.csv', engine='python', sep=',')
x_train = ps.read_csv('../data/split/x_train.csv', engine='python', sep=',')
y_train = ps.read_csv('../data/split/y_train.csv', engine='python', sep=',')

# Linear regression

In [55]:
model_reg = joblib.load('../models/linear_regression/lr.pkl')

y_test_pred = model_reg.predict(x_test)
y_train_pred = model_reg.predict(x_train)

df_prediction = ps.DataFrame({'Actual': y_test.squeeze(), 'Predicted':y_test_pred.squeeze()})

test_mae = mean_absolute_error(y_test, y_test_pred)
test_mse = mean_squared_error(y_test, y_test_pred)
test_rmse = np.sqrt(test_mse)
test_rmsle = mean_squared_log_error(y_test, y_test_pred, squared=False)

train_mae = mean_absolute_error(y_train, y_train_pred)
train_mse = mean_squared_error(y_train, y_train_pred)
train_rmse = np.sqrt(train_mse)
train_rmsle = mean_squared_log_error(y_train, y_train_pred, squared=False)

print(f'Mean absolute error: {test_mae}')
print(f'Mean squared error: {test_mse}')
print(f'Root mean squared error: {test_rmse}')
print(f'Mean squared log error: {test_rmsle}')
print('---------------------------------------')
print(f'Mean absolute error: {train_mae}')
print(f'Mean squared error: {train_mse}')
print(f'Root mean squared error: {train_rmse}')
print(f'Mean squared log error: {train_rmsle}')



Mean absolute error: 15492.116617070778
Mean squared error: 483293745.98276067
Root mean squared error: 21983.942912561448
Mean squared log error: 0.125777753870742
---------------------------------------
Mean absolute error: 11318.321082832128
Mean squared error: 312991146.22960883
Root mean squared error: 17691.555788839174
Mean squared log error: 0.0900361928495771


# Random Forest

In [71]:
model_rf = joblib.load('../models/random_forest/rf.pkl')

y_test_pred = model_rf.predict(x_test)
y_train_pred = model_rf.predict(x_train)

df_prediction = ps.DataFrame({'Actual': y_test.squeeze(), 'Predicted':y_test_pred.squeeze()})

test_mae = mean_absolute_error(y_test, y_test_pred)
test_mse = mean_squared_error(y_test, y_test_pred)
test_rmse = np.sqrt(test_mse)
test_rmsle = mean_squared_log_error(y_test, y_test_pred, squared=False)

train_mae = mean_absolute_error(y_train, y_train_pred)
train_mse = mean_squared_error(y_train, y_train_pred)
train_rmse = np.sqrt(train_mse)
train_rmsle = mean_squared_log_error(y_train, y_train_pred, squared=False)

print(f'Mean absolute error: {test_mae}')
print(f'Mean squared error: {test_mse}')
print(f'Root mean squared error: {test_rmse}')
print(f'Mean squared log error: {test_rmsle}')
print('---------------------------------------')
print(f'Mean absolute error: {train_mae}')
print(f'Mean squared error: {train_mse}')
print(f'Root mean squared error: {train_rmse}')
print(f'Mean squared log error: {train_rmsle}')

Mean absolute error: 5945.30303030303
Mean squared error: 286254250.94490355
Root mean squared error: 16919.049942148158
Mean squared log error: 0.08060636525358057
---------------------------------------
Mean absolute error: 6435.369244935543
Mean squared error: 391970102.9843462
Root mean squared error: 19798.234845166025
Mean squared log error: 0.10414105925176978


## Random Forest - Hyperparameter Tuning

In [70]:
model_rf_tuning = joblib.load('../models/random_forest/rf_tuning.pkl')

y_test_pred = model_rf_tuning.predict(x_test)
y_train_pred = model_rf_tuning.predict(x_train)

df_prediction = ps.DataFrame({'Actual': y_test.squeeze(), 'Predicted':y_test_pred.squeeze()})

test_mae = mean_absolute_error(y_test, y_test_pred)
test_mse = mean_squared_error(y_test, y_test_pred)
test_rmse = np.sqrt(test_mse)
test_rmsle = mean_squared_log_error(y_test, y_test_pred, squared=False)

train_mae = mean_absolute_error(y_train, y_train_pred)
train_mse = mean_squared_error(y_train, y_train_pred)
train_rmse = np.sqrt(train_mse)
train_rmsle = mean_squared_log_error(y_train, y_train_pred, squared=False)
print('TEST')
print(f'Mean absolute error: {test_mae}')
print(f'Mean squared error: {test_mse}')
print(f'Root mean squared error: {test_rmse}')
print(f'Mean squared log error: {test_rmsle}')
print('---------------------------------------')
print('TRAINING')
print(f'Mean absolute error: {train_mae}')
print(f'Mean squared error: {train_mse}')
print(f'Root mean squared error: {train_rmse}')
print(f'Mean squared log error: {train_rmsle}')


TEST
Mean absolute error: 5619.699724517906
Mean squared error: 261788272.80716252
Root mean squared error: 16179.87245954561
Mean squared log error: 0.08186557510051727
---------------------------------------
TRAINING
Mean absolute error: 6189.153775322284
Mean squared error: 459874792.51289135
Root mean squared error: 21444.69147628129
Mean squared log error: 0.094191377835999


# Support Vector Regression

In [69]:
model_svr_tuning = joblib.load('../models/support_vector_regression/svr.pkl')

y_test_pred = model_svr_tuning.predict(x_test)
y_train_pred = model_svr_tuning.predict(x_train)

df_prediction = ps.DataFrame({'Actual': y_test.squeeze(), 'Predicted':y_test_pred.squeeze()})

test_mae = mean_absolute_error(y_test, y_test_pred)
test_mse = mean_squared_error(y_test, y_test_pred)
test_rmse = np.sqrt(test_mse)
test_rmsle = mean_squared_log_error(y_test, y_test_pred, squared=False)

train_mae = mean_absolute_error(y_train, y_train_pred)
train_mse = mean_squared_error(y_train, y_train_pred)
train_rmse = np.sqrt(train_mse)
train_rmsle = mean_squared_log_error(y_train, y_train_pred, squared=False)

print(f'Mean absolute error: {test_mae}')
print(f'Mean squared error: {test_mse}')
print(f'Root mean squared error: {test_rmse}')
print(f'Mean squared log error: {test_rmsle}')
print('---------------------------------------')
print(f'Mean absolute error: {train_mae}')
print(f'Mean squared error: {train_mse}')
print(f'Root mean squared error: {train_rmse}')
print(f'Mean squared log error: {train_rmsle}')

Mean absolute error: 24529.584557590915
Mean squared error: 1405822521.4877906
Root mean squared error: 37494.299853281576
Mean squared log error: 0.19080767309769217
---------------------------------------
Mean absolute error: 22324.014118300445
Mean squared error: 1696601564.0005884
Root mean squared error: 41189.823549034394
Mean squared log error: 0.18218812370723766
