In [1]:
#import file from drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import joblib
import numpy as np
import pandas as pd
from time import time
from sklearn.metrics import mean_absolute_error,explained_variance_score,max_error,median_absolute_error,r2_score,mean_squared_error

x_val=pd.read_csv("drive/Shareddrives/major_project/data/x_val.csv")
y_val=pd.read_csv("drive/Shareddrives/major_project/data/y_val.csv")

x_test=pd.read_csv("drive/Shareddrives/major_project/data/x_test.csv")
y_test=pd.read_csv("drive/Shareddrives/major_project/data/y_test.csv")

In [3]:
models={}

for mdl in ['en','et','gb','la','rf','rg']:
  models[mdl] = joblib.load("drive/Shareddrives/major_project/data/{}_model.pkl".format(mdl))

In [4]:
models

{'en': ElasticNet(alpha=1.0, copy_X=True, fit_intercept=True, l1_ratio=0.5,
            max_iter=1000, normalize=False, positive=False, precompute=False,
            random_state=None, selection='cyclic', tol=0.0001, warm_start=False),
 'et': ExtraTreesRegressor(bootstrap=False, ccp_alpha=0.0, criterion='mse',
                     max_depth=None, max_features='auto', max_leaf_nodes=None,
                     max_samples=None, min_impurity_decrease=0.0,
                     min_impurity_split=None, min_samples_leaf=1,
                     min_samples_split=2, min_weight_fraction_leaf=0.0,
                     n_estimators=100, n_jobs=None, oob_score=False,
                     random_state=None, verbose=0, warm_start=False),
 'gb': GradientBoostingRegressor(alpha=0.9, ccp_alpha=0.0, criterion='friedman_mse',
                           init=None, learning_rate=0.1, loss='ls', max_depth=3,
                           max_features=None, max_leaf_nodes=None,
                           min_im

In [5]:
def eval_model(name,model,features,label):
  start=time()
  pred=model.predict(features)
  end=time()
  MAE=round(mean_absolute_error(label,pred),3)
  EVS=round(explained_variance_score(label,pred),3)
  R2=round(r2_score(label,pred),3)
  RMSE=round(np.sqrt(mean_squared_error(label,pred)),3)
  print("{} -- MAE: {}/ EVS: {}/ RMSE: {}/ R2: {}/ Latency: {}s".format(name,MAE,EVS,RMSE,R2,round(end-start,3)))

In [6]:
for name,mdl in models.items():
  eval_model(name,mdl,x_val,y_val)

en -- MAE: 281840.064/ EVS: 0.638/ RMSE: 497748.692/ R2: 0.638/ Latency: 0.011s
et -- MAE: 74383.252/ EVS: 0.954/ RMSE: 178239.155/ R2: 0.954/ Latency: 0.064s
gb -- MAE: 94967.379/ EVS: 0.956/ RMSE: 173966.526/ R2: 0.956/ Latency: 0.004s
la -- MAE: 276166.459/ EVS: 0.676/ RMSE: 470994.89/ R2: 0.676/ Latency: 0.001s
rf -- MAE: 73153.633/ EVS: 0.961/ RMSE: 162910.509/ R2: 0.961/ Latency: 0.049s
rg -- MAE: 276352.382/ EVS: 0.676/ RMSE: 470835.199/ R2: 0.676/ Latency: 0.001s


In [7]:
rf_spec_models={}
for mdl in ['rfRandnew','rfGridnew']:
  rf_spec_models[mdl] = joblib.load("drive/Shareddrives/major_project/data/{}_model.pkl".format(mdl))

In [8]:
for name,mdl in rf_spec_models.items():
  eval_model(name,mdl,x_test,y_test)

rfRandnew -- MAE: 66603.513/ EVS: 0.975/ RMSE: 126745.971/ R2: 0.975/ Latency: 0.079s
rfGridnew -- MAE: 66798.389/ EVS: 0.976/ RMSE: 124001.484/ R2: 0.976/ Latency: 0.057s


In [9]:
#joblib.dump(en_model,"drive/Shareddrives/major_project/data/en_model.pkl")