In [1]:
import sqlite3
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn import preprocessing 
from IPython.display import display
from sklearn import tree
from sklearn.manifold import TSNE
from sklearn import svm
from sklearn.svm import SVC 
from sklearn import linear_model
import joblib 
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import log_loss
from sklearn.metrics import confusion_matrix
from sklearn.multiclass import OneVsRestClassifier
from sklearn.naive_bayes import GaussianNB 
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier 
from sklearn.model_selection import RandomizedSearchCV

In [2]:
final_df = pd.read_pickle('pickle/firmsxn_24JAN.pkl')
final_df.head()

Unnamed: 0,LATITUDE,LONGITUDE,ACQ_TIME,CONFIDENCE,DAYNIGHT,FRP,STATE,ACQ_YEAR,ACQ_MONTH,STATE_PRCNT_FOREST,AVG_TEMP,AVG_PREC
22,17.20519,80.0962,1,6,1,1,33,2022,1,18.36,27.1,71.44
23,17.20362,80.10623,1,7,1,2,33,2022,1,18.36,27.1,71.44
24,17.61481,76.94758,1,7,1,2,26,2022,1,20.11,22.9,132.51
28,17.19704,78.1349,1,6,1,2,33,2022,1,18.36,27.1,71.44
29,17.20525,77.1786,1,7,1,2,26,2022,1,20.11,22.9,132.51


In [5]:
#Breaking down data into train and test 
y = final_df['FRP'].values
x = final_df.drop(['FRP'], axis = 1)
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.33)

In [7]:
x_train, x_cv, y_train, y_cv = train_test_split(x_train, y_train, test_size=0.33)

In [8]:
x_train.shape

(30, 11)

In [9]:
x_cv.shape

(16, 11)

In [10]:
y_train.shape

(30,)

In [11]:
y_cv.shape

(16,)

In [12]:
x_train = x_train.replace((np.inf, -np.inf, np.nan), 0).reset_index(drop=True)
x_cv = x_cv.replace((np.inf, -np.inf, np.nan), 0).reset_index(drop=True)

In [13]:
#Hyperparam tuning 2 for KNN
#Defining range for parameters
#Since neighbors = 20 and leafsize = 30 were the best hyperparams, going around these values to check for even better parameters
neighbors = [20, 25, 30]
leafSize = [30,40]

knn_model = KNeighborsClassifier(weights='distance')
parameters = {'n_neighbors': neighbors, 'leaf_size': leafSize}

hyperParam_Clf1 = RandomizedSearchCV(knn_model, parameters, cv = 3, scoring = 'accuracy',return_train_score= True)
hyperParam_Clf1.fit(x_cv, y_cv)

Traceback (most recent call last):
  File "c:\users\pratik temkar\appdata\local\programs\python\python38\lib\site-packages\sklearn\model_selection\_validation.py", line 761, in _score
    scores = scorer(estimator, X_test, y_test)
  File "c:\users\pratik temkar\appdata\local\programs\python\python38\lib\site-packages\sklearn\metrics\_scorer.py", line 216, in __call__
    return self._score(
  File "c:\users\pratik temkar\appdata\local\programs\python\python38\lib\site-packages\sklearn\metrics\_scorer.py", line 258, in _score
    y_pred = method_caller(estimator, "predict", X)
  File "c:\users\pratik temkar\appdata\local\programs\python\python38\lib\site-packages\sklearn\metrics\_scorer.py", line 68, in _cached_call
    return getattr(estimator, method)(*args, **kwargs)
  File "c:\users\pratik temkar\appdata\local\programs\python\python38\lib\site-packages\sklearn\neighbors\_classification.py", line 214, in predict
    neigh_dist, neigh_ind = self.kneighbors(X)
  File "c:\users\pratik t

RandomizedSearchCV(cv=3, estimator=KNeighborsClassifier(weights='distance'),
                   param_distributions={'leaf_size': [30, 40],
                                        'n_neighbors': [20, 25, 30]},
                   return_train_score=True, scoring='accuracy')

In [14]:
print(hyperParam_Clf1.best_params_)

{'n_neighbors': 20, 'leaf_size': 30}


In [15]:
knnModel = KNeighborsClassifier(n_neighbors = 30, leaf_size = 40).fit(x_train, y_train) 

In [16]:
#Saving this Model for final Analysis
joblib.dump(knnModel, 'pickle/knnpredictModel.pkl')

['pickle/knnpredictModel.pkl']

In [17]:
def mean_absolute_percentage_error(y_true, y_pred): 
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

In [18]:
x_test = x_test.replace((np.inf, -np.inf, np.nan), 0).reset_index(drop=True)

In [19]:
knn_predictions = knnModel.predict(x_test)  

#Fining accuracy of the Model
accuracy_train = knnModel.score(x_train, y_train) 
accuracy_test = knnModel.score(x_test, y_test) 

print('Train Data Accuracy is :',accuracy_train)
print('Test Data Accuracy is :',accuracy_test)

#Finding MAE
knn_MAE = mean_absolute_error(y_test, knn_predictions)
print('MAE value is: ', knn_MAE)

#Finding MAPE
knn_MAPE = mean_absolute_percentage_error(y_test, knn_predictions)
print('MAPE value is: ', knn_MAPE)

Train Data Accuracy is : 0.43333333333333335
Test Data Accuracy is : 0.375
MAE value is:  1.3333333333333333
MAPE value is:  28.740079365079364


In [21]:
#Trying more tuning for random Forest Model
maxdepth = [20, 40, 60]
estimators = [90, 100, 110]

rf_model = RandomForestClassifier(class_weight='balanced', verbose = 50)
parameters = {'max_depth': maxdepth, 'n_estimators':estimators}

hyperParam_Clf2 = RandomizedSearchCV(rf_model, parameters, cv = 3, scoring = 'accuracy',return_train_score= True)
hyperParam_Clf2.fit(x_cv, y_cv)



[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
building tree 1 of 90
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
building tree 2 of 90
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.0s remaining:    0.0s
building tree 3 of 90
[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:    0.0s remaining:    0.0s
building tree 4 of 90
[Parallel(n_jobs=1)]: Done   4 out of   4 | elapsed:    0.0s remaining:    0.0s
building tree 5 of 90
[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed:    0.0s remaining:    0.0s
building tree 6 of 90
[Parallel(n_jobs=1)]: Done   6 out of   6 | elapsed:    0.0s remaining:    0.0s
building tree 7 of 90
[Parallel(n_jobs=1)]: Done   7 out of   7 | elapsed:    0.0s remaining:    0.0s
building tree 8 of 90
[Parallel(n_jobs=1)]: Done   8 out of   8 | elapsed:    0.0s remaining:    0.0s
building tree 9 of 90
[Parallel(n_jobs=1)]: Done   9 out of   9 | elapsed:    0.0s remaining:    0.0s
b

building tree 56 of 90
building tree 57 of 90
building tree 58 of 90
building tree 59 of 90
building tree 60 of 90
building tree 61 of 90
building tree 62 of 90
building tree 63 of 90
building tree 64 of 90
building tree 65 of 90
building tree 66 of 90
building tree 67 of 90
building tree 68 of 90
building tree 69 of 90
building tree 70 of 90
building tree 71 of 90
building tree 72 of 90
building tree 73 of 90
building tree 74 of 90
building tree 75 of 90
building tree 76 of 90
building tree 77 of 90
building tree 78 of 90
building tree 79 of 90
building tree 80 of 90
building tree 81 of 90
building tree 82 of 90
building tree 83 of 90
building tree 84 of 90
building tree 85 of 90
building tree 86 of 90
building tree 87 of 90
building tree 88 of 90
building tree 89 of 90
building tree 90 of 90
[Parallel(n_jobs=1)]: Done  90 out of  90 | elapsed:    0.0s finished
Using ThreadingBackend as joblib.Parallel backend instead of LokyBackend as the latter does not provide shared memory semanti

[Parallel(n_jobs=1)]: Done  27 out of  27 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  28 out of  28 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  29 out of  29 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  30 out of  30 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  31 out of  31 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  32 out of  32 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  33 out of  33 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  34 out of  34 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  35 out of  35 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  36 out of  36 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  37 out of  37 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  38 out of  38 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  39 out of  3

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
building tree 1 of 100
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
building tree 2 of 100
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.0s remaining:    0.0s
building tree 3 of 100
[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:    0.0s remaining:    0.0s
building tree 4 of 100
[Parallel(n_jobs=1)]: Done   4 out of   4 | elapsed:    0.0s remaining:    0.0s
building tree 5 of 100
[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed:    0.0s remaining:    0.0s
building tree 6 of 100
[Parallel(n_jobs=1)]: Done   6 out of   6 | elapsed:    0.0s remaining:    0.0s
building tree 7 of 100
[Parallel(n_jobs=1)]: Done   7 out of   7 | elapsed:    0.0s remaining:    0.0s
building tree 8 of 100
[Parallel(n_jobs=1)]: Done   8 out of   8 | elapsed:    0.0s remaining:    0.0s
building tree 9 of 100
[Parallel(n_jobs=1)]: Done   9 out of   9 | elapsed:    0.0s remaining: 

[Parallel(n_jobs=1)]: Done  38 out of  38 | elapsed:    0.0s remaining:    0.0s
building tree 39 of 100
[Parallel(n_jobs=1)]: Done  39 out of  39 | elapsed:    0.0s remaining:    0.0s
building tree 40 of 100
[Parallel(n_jobs=1)]: Done  40 out of  40 | elapsed:    0.0s remaining:    0.0s
building tree 41 of 100
[Parallel(n_jobs=1)]: Done  41 out of  41 | elapsed:    0.0s remaining:    0.0s
building tree 42 of 100
[Parallel(n_jobs=1)]: Done  42 out of  42 | elapsed:    0.0s remaining:    0.0s
building tree 43 of 100
[Parallel(n_jobs=1)]: Done  43 out of  43 | elapsed:    0.0s remaining:    0.0s
building tree 44 of 100
[Parallel(n_jobs=1)]: Done  44 out of  44 | elapsed:    0.0s remaining:    0.0s
building tree 45 of 100
[Parallel(n_jobs=1)]: Done  45 out of  45 | elapsed:    0.0s remaining:    0.0s
building tree 46 of 100
[Parallel(n_jobs=1)]: Done  46 out of  46 | elapsed:    0.0s remaining:    0.0s
building tree 47 of 100
[Parallel(n_jobs=1)]: Done  47 out of  47 | elapsed:    0.0s rem

building tree 67 of 110
building tree 68 of 110
building tree 69 of 110
building tree 70 of 110
building tree 71 of 110
building tree 72 of 110
building tree 73 of 110
building tree 74 of 110
building tree 75 of 110
building tree 76 of 110
building tree 77 of 110
building tree 78 of 110
building tree 79 of 110
building tree 80 of 110
building tree 81 of 110
building tree 82 of 110
building tree 83 of 110
building tree 84 of 110
building tree 85 of 110
building tree 86 of 110
building tree 87 of 110
building tree 88 of 110
building tree 89 of 110
building tree 90 of 110
building tree 91 of 110
building tree 92 of 110
building tree 93 of 110
building tree 94 of 110
building tree 95 of 110
building tree 96 of 110
building tree 97 of 110
building tree 98 of 110
building tree 99 of 110
building tree 100 of 110
building tree 101 of 110
building tree 102 of 110
building tree 103 of 110
building tree 104 of 110
building tree 105 of 110
building tree 106 of 110
building tree 107 of 110
building

building tree 75 of 110
building tree 76 of 110
building tree 77 of 110
building tree 78 of 110
building tree 79 of 110
building tree 80 of 110
building tree 81 of 110
building tree 82 of 110
building tree 83 of 110
building tree 84 of 110
building tree 85 of 110
building tree 86 of 110
building tree 87 of 110
building tree 88 of 110
building tree 89 of 110
building tree 90 of 110
building tree 91 of 110
building tree 92 of 110
building tree 93 of 110
building tree 94 of 110
building tree 95 of 110
building tree 96 of 110
building tree 97 of 110
building tree 98 of 110
building tree 99 of 110
building tree 100 of 110
building tree 101 of 110
building tree 102 of 110
building tree 103 of 110
building tree 104 of 110
building tree 105 of 110
building tree 106 of 110
building tree 107 of 110
building tree 108 of 110
building tree 109 of 110
building tree 110 of 110
[Parallel(n_jobs=1)]: Done 110 out of 110 | elapsed:    0.0s finished
Using ThreadingBackend as joblib.Parallel backend inste

[Parallel(n_jobs=1)]: Done  23 out of  23 | elapsed:    0.0s remaining:    0.0s
building tree 24 of 110
[Parallel(n_jobs=1)]: Done  24 out of  24 | elapsed:    0.0s remaining:    0.0s
building tree 25 of 110
[Parallel(n_jobs=1)]: Done  25 out of  25 | elapsed:    0.0s remaining:    0.0s
building tree 26 of 110
[Parallel(n_jobs=1)]: Done  26 out of  26 | elapsed:    0.0s remaining:    0.0s
building tree 27 of 110
[Parallel(n_jobs=1)]: Done  27 out of  27 | elapsed:    0.0s remaining:    0.0s
building tree 28 of 110
[Parallel(n_jobs=1)]: Done  28 out of  28 | elapsed:    0.0s remaining:    0.0s
building tree 29 of 110
[Parallel(n_jobs=1)]: Done  29 out of  29 | elapsed:    0.0s remaining:    0.0s
building tree 30 of 110
[Parallel(n_jobs=1)]: Done  30 out of  30 | elapsed:    0.0s remaining:    0.0s
building tree 31 of 110
[Parallel(n_jobs=1)]: Done  31 out of  31 | elapsed:    0.0s remaining:    0.0s
building tree 32 of 110
[Parallel(n_jobs=1)]: Done  32 out of  32 | elapsed:    0.0s rem

[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
building tree 2 of 90
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.0s remaining:    0.0s
building tree 3 of 90
[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:    0.0s remaining:    0.0s
building tree 4 of 90
[Parallel(n_jobs=1)]: Done   4 out of   4 | elapsed:    0.0s remaining:    0.0s
building tree 5 of 90
[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed:    0.0s remaining:    0.0s
building tree 6 of 90
[Parallel(n_jobs=1)]: Done   6 out of   6 | elapsed:    0.0s remaining:    0.0s
building tree 7 of 90
[Parallel(n_jobs=1)]: Done   7 out of   7 | elapsed:    0.0s remaining:    0.0s
building tree 8 of 90
[Parallel(n_jobs=1)]: Done   8 out of   8 | elapsed:    0.0s remaining:    0.0s
building tree 9 of 90
[Parallel(n_jobs=1)]: Done   9 out of   9 | elapsed:    0.0s remaining:    0.0s
building tree 10 of 90
[Parallel(n_jobs=1)]: Done  10 out of  10 | elapsed:    0.0s remaining:    0.0s
b

building tree 59 of 90
building tree 60 of 90
building tree 61 of 90
building tree 62 of 90
building tree 63 of 90
building tree 64 of 90
building tree 65 of 90
building tree 66 of 90
building tree 67 of 90
building tree 68 of 90
building tree 69 of 90
building tree 70 of 90
building tree 71 of 90
building tree 72 of 90
building tree 73 of 90
building tree 74 of 90
building tree 75 of 90
building tree 76 of 90
building tree 77 of 90
building tree 78 of 90
building tree 79 of 90
building tree 80 of 90
building tree 81 of 90
building tree 82 of 90
building tree 83 of 90
building tree 84 of 90
building tree 85 of 90
building tree 86 of 90
building tree 87 of 90
building tree 88 of 90
building tree 89 of 90
building tree 90 of 90
[Parallel(n_jobs=1)]: Done  90 out of  90 | elapsed:    0.0s finished
Using ThreadingBackend as joblib.Parallel backend instead of LokyBackend as the latter does not provide shared memory semantics.
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 conc

Using ThreadingBackend as joblib.Parallel backend instead of LokyBackend as the latter does not provide shared memory semantics.
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   4 out of   4 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   6 out of   6 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   7 out of   7 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   8 out of   8 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   9 out of   9 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  10 out of  10 | elapsed:    0.0s remaining

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
building tree 1 of 100
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
building tree 2 of 100
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.0s remaining:    0.0s
building tree 3 of 100
[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:    0.0s remaining:    0.0s
building tree 4 of 100
[Parallel(n_jobs=1)]: Done   4 out of   4 | elapsed:    0.0s remaining:    0.0s
building tree 5 of 100
[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed:    0.0s remaining:    0.0s
building tree 6 of 100
[Parallel(n_jobs=1)]: Done   6 out of   6 | elapsed:    0.0s remaining:    0.0s
building tree 7 of 100
[Parallel(n_jobs=1)]: Done   7 out of   7 | elapsed:    0.0s remaining:    0.0s
building tree 8 of 100
[Parallel(n_jobs=1)]: Done   8 out of   8 | elapsed:    0.0s remaining:    0.0s
building tree 9 of 100
[Parallel(n_jobs=1)]: Done   9 out of   9 | elapsed:    0.0s remaining: 

[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.0s remaining:    0.0s
building tree 3 of 100
[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:    0.0s remaining:    0.0s
building tree 4 of 100
[Parallel(n_jobs=1)]: Done   4 out of   4 | elapsed:    0.0s remaining:    0.0s
building tree 5 of 100
[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed:    0.0s remaining:    0.0s
building tree 6 of 100
[Parallel(n_jobs=1)]: Done   6 out of   6 | elapsed:    0.0s remaining:    0.0s
building tree 7 of 100
[Parallel(n_jobs=1)]: Done   7 out of   7 | elapsed:    0.0s remaining:    0.0s
building tree 8 of 100
[Parallel(n_jobs=1)]: Done   8 out of   8 | elapsed:    0.0s remaining:    0.0s
building tree 9 of 100
[Parallel(n_jobs=1)]: Done   9 out of   9 | elapsed:    0.0s remaining:    0.0s
building tree 10 of 100
[Parallel(n_jobs=1)]: Done  10 out of  10 | elapsed:    0.0s remaining:    0.0s
building tree 11 of 100
[Parallel(n_jobs=1)]: Done  11 out of  11 | elapsed:    0.0s remaining:

[Parallel(n_jobs=1)]: Done  22 out of  22 | elapsed:    0.0s remaining:    0.0s
building tree 23 of 110
[Parallel(n_jobs=1)]: Done  23 out of  23 | elapsed:    0.0s remaining:    0.0s
building tree 24 of 110
[Parallel(n_jobs=1)]: Done  24 out of  24 | elapsed:    0.0s remaining:    0.0s
building tree 25 of 110
[Parallel(n_jobs=1)]: Done  25 out of  25 | elapsed:    0.0s remaining:    0.0s
building tree 26 of 110
[Parallel(n_jobs=1)]: Done  26 out of  26 | elapsed:    0.0s remaining:    0.0s
building tree 27 of 110
[Parallel(n_jobs=1)]: Done  27 out of  27 | elapsed:    0.0s remaining:    0.0s
building tree 28 of 110
[Parallel(n_jobs=1)]: Done  28 out of  28 | elapsed:    0.0s remaining:    0.0s
building tree 29 of 110
[Parallel(n_jobs=1)]: Done  29 out of  29 | elapsed:    0.0s remaining:    0.0s
building tree 30 of 110
[Parallel(n_jobs=1)]: Done  30 out of  30 | elapsed:    0.0s remaining:    0.0s
building tree 31 of 110
[Parallel(n_jobs=1)]: Done  31 out of  31 | elapsed:    0.0s rem

[Parallel(n_jobs=1)]: Done  48 out of  48 | elapsed:    0.0s remaining:    0.0s
building tree 49 of 110
[Parallel(n_jobs=1)]: Done  49 out of  49 | elapsed:    0.0s remaining:    0.0s
building tree 50 of 110
building tree 51 of 110
building tree 52 of 110
building tree 53 of 110
building tree 54 of 110
building tree 55 of 110
building tree 56 of 110
building tree 57 of 110
building tree 58 of 110
building tree 59 of 110
building tree 60 of 110
building tree 61 of 110
building tree 62 of 110
building tree 63 of 110
building tree 64 of 110
building tree 65 of 110
building tree 66 of 110
building tree 67 of 110
building tree 68 of 110
building tree 69 of 110
building tree 70 of 110
building tree 71 of 110
building tree 72 of 110
building tree 73 of 110
building tree 74 of 110
building tree 75 of 110
building tree 76 of 110
building tree 77 of 110
building tree 78 of 110
building tree 79 of 110
building tree 80 of 110
building tree 81 of 110
building tree 82 of 110
building tree 83 of 110


[Parallel(n_jobs=1)]: Done  25 out of  25 | elapsed:    0.0s remaining:    0.0s
building tree 26 of 110
[Parallel(n_jobs=1)]: Done  26 out of  26 | elapsed:    0.0s remaining:    0.0s
building tree 27 of 110
[Parallel(n_jobs=1)]: Done  27 out of  27 | elapsed:    0.0s remaining:    0.0s
building tree 28 of 110
[Parallel(n_jobs=1)]: Done  28 out of  28 | elapsed:    0.0s remaining:    0.0s
building tree 29 of 110
[Parallel(n_jobs=1)]: Done  29 out of  29 | elapsed:    0.0s remaining:    0.0s
building tree 30 of 110
[Parallel(n_jobs=1)]: Done  30 out of  30 | elapsed:    0.0s remaining:    0.0s
building tree 31 of 110
[Parallel(n_jobs=1)]: Done  31 out of  31 | elapsed:    0.0s remaining:    0.0s
building tree 32 of 110
[Parallel(n_jobs=1)]: Done  32 out of  32 | elapsed:    0.0s remaining:    0.0s
building tree 33 of 110
[Parallel(n_jobs=1)]: Done  33 out of  33 | elapsed:    0.0s remaining:    0.0s
building tree 34 of 110
[Parallel(n_jobs=1)]: Done  34 out of  34 | elapsed:    0.0s rem

[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
building tree 2 of 90
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.0s remaining:    0.0s
building tree 3 of 90
[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:    0.0s remaining:    0.0s
building tree 4 of 90
[Parallel(n_jobs=1)]: Done   4 out of   4 | elapsed:    0.0s remaining:    0.0s
building tree 5 of 90
[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed:    0.0s remaining:    0.0s
building tree 6 of 90
[Parallel(n_jobs=1)]: Done   6 out of   6 | elapsed:    0.0s remaining:    0.0s
building tree 7 of 90
[Parallel(n_jobs=1)]: Done   7 out of   7 | elapsed:    0.0s remaining:    0.0s
building tree 8 of 90
[Parallel(n_jobs=1)]: Done   8 out of   8 | elapsed:    0.0s remaining:    0.0s
building tree 9 of 90
[Parallel(n_jobs=1)]: Done   9 out of   9 | elapsed:    0.0s remaining:    0.0s
building tree 10 of 90
[Parallel(n_jobs=1)]: Done  10 out of  10 | elapsed:    0.0s remaining:    0.0s
b

[Parallel(n_jobs=1)]: Done  37 out of  37 | elapsed:    0.0s remaining:    0.0s
building tree 38 of 90
[Parallel(n_jobs=1)]: Done  38 out of  38 | elapsed:    0.0s remaining:    0.0s
building tree 39 of 90
[Parallel(n_jobs=1)]: Done  39 out of  39 | elapsed:    0.0s remaining:    0.0s
building tree 40 of 90
[Parallel(n_jobs=1)]: Done  40 out of  40 | elapsed:    0.0s remaining:    0.0s
building tree 41 of 90
[Parallel(n_jobs=1)]: Done  41 out of  41 | elapsed:    0.0s remaining:    0.0s
building tree 42 of 90
[Parallel(n_jobs=1)]: Done  42 out of  42 | elapsed:    0.0s remaining:    0.0s
building tree 43 of 90
[Parallel(n_jobs=1)]: Done  43 out of  43 | elapsed:    0.0s remaining:    0.0s
building tree 44 of 90
[Parallel(n_jobs=1)]: Done  44 out of  44 | elapsed:    0.0s remaining:    0.0s
building tree 45 of 90
[Parallel(n_jobs=1)]: Done  45 out of  45 | elapsed:    0.0s remaining:    0.0s
building tree 46 of 90
[Parallel(n_jobs=1)]: Done  46 out of  46 | elapsed:    0.0s remaining:  

[Parallel(n_jobs=1)]: Done   9 out of   9 | elapsed:    0.0s remaining:    0.0s
building tree 10 of 90
[Parallel(n_jobs=1)]: Done  10 out of  10 | elapsed:    0.0s remaining:    0.0s
building tree 11 of 90
[Parallel(n_jobs=1)]: Done  11 out of  11 | elapsed:    0.0s remaining:    0.0s
building tree 12 of 90
[Parallel(n_jobs=1)]: Done  12 out of  12 | elapsed:    0.0s remaining:    0.0s
building tree 13 of 90
[Parallel(n_jobs=1)]: Done  13 out of  13 | elapsed:    0.0s remaining:    0.0s
building tree 14 of 90
[Parallel(n_jobs=1)]: Done  14 out of  14 | elapsed:    0.0s remaining:    0.0s
building tree 15 of 90
[Parallel(n_jobs=1)]: Done  15 out of  15 | elapsed:    0.0s remaining:    0.0s
building tree 16 of 90
[Parallel(n_jobs=1)]: Done  16 out of  16 | elapsed:    0.0s remaining:    0.0s
building tree 17 of 90
[Parallel(n_jobs=1)]: Done  17 out of  17 | elapsed:    0.0s remaining:    0.0s
building tree 18 of 90
[Parallel(n_jobs=1)]: Done  18 out of  18 | elapsed:    0.0s remaining:  

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
building tree 1 of 100
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
building tree 2 of 100
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.0s remaining:    0.0s
building tree 3 of 100
[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:    0.0s remaining:    0.0s
building tree 4 of 100
[Parallel(n_jobs=1)]: Done   4 out of   4 | elapsed:    0.0s remaining:    0.0s
building tree 5 of 100
[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed:    0.0s remaining:    0.0s
building tree 6 of 100
[Parallel(n_jobs=1)]: Done   6 out of   6 | elapsed:    0.0s remaining:    0.0s
building tree 7 of 100
[Parallel(n_jobs=1)]: Done   7 out of   7 | elapsed:    0.0s remaining:    0.0s
building tree 8 of 100
[Parallel(n_jobs=1)]: Done   8 out of   8 | elapsed:    0.0s remaining:    0.0s
building tree 9 of 100
[Parallel(n_jobs=1)]: Done   9 out of   9 | elapsed:    0.0s remaining: 

building tree 99 of 100
building tree 100 of 100
[Parallel(n_jobs=1)]: Done 100 out of 100 | elapsed:    0.0s finished
Using ThreadingBackend as joblib.Parallel backend instead of LokyBackend as the latter does not provide shared memory semantics.
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   4 out of   4 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   6 out of   6 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   7 out of   7 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   8 out of   8 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   9 

[Parallel(n_jobs=1)]: Done  38 out of  38 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  39 out of  39 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  40 out of  40 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  41 out of  41 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  42 out of  42 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  43 out of  43 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  44 out of  44 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  45 out of  45 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  46 out of  46 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  47 out of  47 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  48 out of  48 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  49 out of  49 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done 100 out of 10

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
building tree 1 of 110
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
building tree 2 of 110
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.0s remaining:    0.0s
building tree 3 of 110
[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:    0.0s remaining:    0.0s
building tree 4 of 110
[Parallel(n_jobs=1)]: Done   4 out of   4 | elapsed:    0.0s remaining:    0.0s
building tree 5 of 110
[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed:    0.0s remaining:    0.0s
building tree 6 of 110
[Parallel(n_jobs=1)]: Done   6 out of   6 | elapsed:    0.0s remaining:    0.0s
building tree 7 of 110
[Parallel(n_jobs=1)]: Done   7 out of   7 | elapsed:    0.0s remaining:    0.0s
building tree 8 of 110
[Parallel(n_jobs=1)]: Done   8 out of   8 | elapsed:    0.0s remaining:    0.0s
building tree 9 of 110
[Parallel(n_jobs=1)]: Done   9 out of   9 | elapsed:    0.0s remaining: 

building tree 91 of 110
building tree 92 of 110
building tree 93 of 110
building tree 94 of 110
building tree 95 of 110
building tree 96 of 110
building tree 97 of 110
building tree 98 of 110
building tree 99 of 110
building tree 100 of 110
building tree 101 of 110
building tree 102 of 110
building tree 103 of 110
building tree 104 of 110
building tree 105 of 110
building tree 106 of 110
building tree 107 of 110
building tree 108 of 110
building tree 109 of 110
building tree 110 of 110
[Parallel(n_jobs=1)]: Done 110 out of 110 | elapsed:    0.0s finished
Using ThreadingBackend as joblib.Parallel backend instead of LokyBackend as the latter does not provide shared memory semantics.
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:    0.0s remainin

[Parallel(n_jobs=1)]: Done  34 out of  34 | elapsed:    0.0s remaining:    0.0s
building tree 35 of 110
[Parallel(n_jobs=1)]: Done  35 out of  35 | elapsed:    0.0s remaining:    0.0s
building tree 36 of 110
[Parallel(n_jobs=1)]: Done  36 out of  36 | elapsed:    0.0s remaining:    0.0s
building tree 37 of 110
[Parallel(n_jobs=1)]: Done  37 out of  37 | elapsed:    0.0s remaining:    0.0s
building tree 38 of 110
[Parallel(n_jobs=1)]: Done  38 out of  38 | elapsed:    0.0s remaining:    0.0s
building tree 39 of 110
[Parallel(n_jobs=1)]: Done  39 out of  39 | elapsed:    0.0s remaining:    0.0s
building tree 40 of 110
[Parallel(n_jobs=1)]: Done  40 out of  40 | elapsed:    0.0s remaining:    0.0s
building tree 41 of 110
[Parallel(n_jobs=1)]: Done  41 out of  41 | elapsed:    0.0s remaining:    0.0s
building tree 42 of 110
[Parallel(n_jobs=1)]: Done  42 out of  42 | elapsed:    0.0s remaining:    0.0s
building tree 43 of 110
[Parallel(n_jobs=1)]: Done  43 out of  43 | elapsed:    0.0s rem

RandomizedSearchCV(cv=3,
                   estimator=RandomForestClassifier(class_weight='balanced',
                                                    verbose=50),
                   param_distributions={'max_depth': [20, 40, 60],
                                        'n_estimators': [90, 100, 110]},
                   return_train_score=True, scoring='accuracy')

In [22]:
#Getting the best hyperparams
print(hyperParam_Clf2.best_params_)

{'n_estimators': 110, 'max_depth': 60}


In [23]:
rfModel = RandomForestClassifier(n_estimators = 110, max_depth= 40, class_weight='balanced', verbose = 50).fit(x_train, y_train) 

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
building tree 1 of 110
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
building tree 2 of 110
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.0s remaining:    0.0s
building tree 3 of 110
[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:    0.0s remaining:    0.0s
building tree 4 of 110
[Parallel(n_jobs=1)]: Done   4 out of   4 | elapsed:    0.0s remaining:    0.0s
building tree 5 of 110
[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed:    0.0s remaining:    0.0s
building tree 6 of 110
[Parallel(n_jobs=1)]: Done   6 out of   6 | elapsed:    0.0s remaining:    0.0s
building tree 7 of 110
[Parallel(n_jobs=1)]: Done   7 out of   7 | elapsed:    0.0s remaining:    0.0s
building tree 8 of 110
[Parallel(n_jobs=1)]: Done   8 out of   8 | elapsed:    0.0s remaining:    0.0s
building tree 9 of 110
[Parallel(n_jobs=1)]: Done   9 out of   9 | elapsed:    0.0s remaining: 

In [24]:
joblib.dump(rfModel, 'pickle/rfpredictModel.pkl')

['pickle/rfpredictModel.pkl']

In [25]:
rf_predictions = rfModel.predict(x_test)  

def mean_absolute_percentage_error(y_true, y_pred): 
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100
    
#Finding MAE
rf_MAE = mean_absolute_error(y_test, rf_predictions)
print('MAE value is: ', rf_MAE)

#Finding MAPE
rf_MAPE = mean_absolute_percentage_error(y_test, rf_predictions)
print('MAPE value is: ', rf_MAPE)

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   4 out of   4 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   6 out of   6 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   7 out of   7 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   8 out of   8 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   9 out of   9 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  10 out of  10 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  11 out of  11 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  12 out of  

In [26]:
#Hyper param tuning for decision tree too
min_samples_split = [30,40,50,60]
max_features = ['sqrt', 'log2']

dt_model = DecisionTreeClassifier()
parameters = {'min_samples_split': min_samples_split, 'max_features':max_features }

hyperParam_Clf3 = RandomizedSearchCV(dt_model, parameters, cv = 3, scoring = 'accuracy',return_train_score= True)
hyperParam_Clf3.fit(x_cv, y_cv)



RandomizedSearchCV(cv=3, estimator=DecisionTreeClassifier(),
                   param_distributions={'max_features': ['sqrt', 'log2'],
                                        'min_samples_split': [30, 40, 50, 60]},
                   return_train_score=True, scoring='accuracy')

In [27]:
print(hyperParam_Clf3.best_params_)

{'min_samples_split': 30, 'max_features': 'sqrt'}


In [28]:
dtModel = DecisionTreeClassifier(min_samples_split = 40, max_features = 'sqrt').fit(x_train, y_train) 

In [29]:
#Saving decision tree model to drive

joblib.dump(dtModel, 'pickle/dtpredictModel.pkl')

['pickle/dtpredictModel.pkl']

In [30]:
dt_predictions = dtModel.predict(x_test)  

def mean_absolute_percentage_error(y_true, y_pred): 
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100
    
#Finding MAE
dt_MAE = mean_absolute_error(y_test, dt_predictions)
print('MAE value is: ', dt_MAE)

#Finding MAPE
dt_MAPE = mean_absolute_percentage_error(y_test, dt_predictions)
print('MAPE value is: ', dt_MAPE)

MAE value is:  1.3333333333333333
MAPE value is:  28.740079365079364


In [31]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.20)

In [32]:
x_D1, x_D2, y_D1, y_D2 = train_test_split(x_train, y_train, test_size=0.50)

In [33]:
x_train.shape

(56, 11)

In [34]:
x_test.shape

(14, 11)

In [35]:
x_D1.shape

(28, 11)

In [36]:
x_D2.shape

(28, 11)

In [37]:
x_test = x_test.replace((np.inf, -np.inf, np.nan), 0).reset_index(drop=True)

In [38]:
x_train = x_train.replace((np.inf, -np.inf, np.nan), 0).reset_index(drop=True)

In [39]:
x_D1 = x_D1.replace((np.inf, -np.inf, np.nan), 0).reset_index(drop=True)

In [40]:
x_D2 = x_D2.replace((np.inf, -np.inf, np.nan), 0).reset_index(drop=True)

In [41]:
x_test = x_test.replace((np.inf, -np.inf, np.nan), 0).reset_index(drop=True)
for i in range(3):
  id_val = np.random.choice(np.arange(len(x_D1)), 50000, replace=True)
  sample_x = x_D1.iloc[id_val]
  sample_y = y_D1[id_val]
  #Perforing decision tree on sample 1
  if(i == 0):
    dtSampleModel = DecisionTreeClassifier(min_samples_split = 40, max_features = 'sqrt').fit(sample_x, sample_y) 
    joblib.dump(dtSampleModel, 'pickle/SampleModel_'+ str(i) + '.pkl')
  elif(i == 1):
    knnSampleModel = KNeighborsClassifier(n_neighbors = 30, leaf_size = 40).fit(sample_x, sample_y)
    joblib.dump(knnSampleModel, 'pickle/SampleModel_'+ str(i) + '.pkl')
  else:
    rfSampleModel = RandomForestClassifier(n_estimators = 110, max_depth= 40, class_weight='balanced', verbose = 50).fit(sample_x, sample_y)
    joblib.dump(rfSampleModel, 'pickle/SampleModel_'+ str(i) + '.pkl')

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
building tree 1 of 110
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
building tree 2 of 110
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.0s remaining:    0.0s
building tree 3 of 110
[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:    0.0s remaining:    0.0s
building tree 4 of 110
[Parallel(n_jobs=1)]: Done   4 out of   4 | elapsed:    0.0s remaining:    0.0s
building tree 5 of 110
[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed:    0.0s remaining:    0.0s
building tree 6 of 110
[Parallel(n_jobs=1)]: Done   6 out of   6 | elapsed:    0.0s remaining:    0.0s
building tree 7 of 110
[Parallel(n_jobs=1)]: Done   7 out of   7 | elapsed:    0.0s remaining:    0.0s
building tree 8 of 110
[Parallel(n_jobs=1)]: Done   8 out of   8 | elapsed:    0.0s remaining:    0.0s
building tree 9 of 110
[Parallel(n_jobs=1)]: Done   9 out of   9 | elapsed:    0.0s remaining: 

In [42]:
D2_df = pd.DataFrame() 

In [43]:
#Pass D2 through each of the model and get the predictions for it
for i in range(3):
  dt2SampleModel = joblib.load('pickle/SampleModel_'+ str(i) + '.pkl')
  predictedValues = dt2SampleModel.predict(x_D2)
  columnName = 'predict' + str(i)
  D2_df[columnName] = predictedValues

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   4 out of   4 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   6 out of   6 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   7 out of   7 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   8 out of   8 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   9 out of   9 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  10 out of  10 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  11 out of  11 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  12 out of  

In [44]:
D2_df.head()

Unnamed: 0,predict0,predict1,predict2
0,4,3,4
1,2,3,2
2,3,4,3
3,2,2,2
4,3,3,3


In [45]:
#Deciding the final value from model using majority count.
finalPrediction = []
for i in range(len(D2_df)):
  row_list = D2_df.iloc[i].values.tolist()
  majority_count = max(set(row_list) , key=row_list.count)
  finalPrediction.append(majority_count)

In [46]:
finalPrediction = np.array(finalPrediction)

In [47]:
def mean_absolute_percentage_error(y_true, y_pred): 
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

In [48]:
#Checking error values for train data now
#Finding MAE
train_MAE = mean_absolute_error(y_D2, finalPrediction)
print('Train MAE value is: ', train_MAE)

#Finding MAPE
train_MAPE = mean_absolute_percentage_error(y_D2, finalPrediction)
print('MAPE value is: ', train_MAPE)

Train MAE value is:  0.5714285714285714
MAPE value is:  19.17091836734694


In [49]:
#Putting in test data now
def testDataPrediction(x_test):
  test_df = pd.DataFrame() 
  for i in range(3):
    SampleModel = joblib.load('pickle/SampleModel_'+ str(i) + '.pkl')
    predictedValues = SampleModel.predict(x_test)
    columnName = 'predict' + str(i)
    test_df[columnName] = predictedValues

  test_finalPrediction = []
  for j in range(len(test_df)):
    row_list = test_df.iloc[j].values.tolist()
    majority_count = max(set(row_list) , key=row_list.count)
    test_finalPrediction.append(majority_count)

  test_finalPrediction = np.array(test_finalPrediction)
  return(test_finalPrediction)

In [50]:
test_MAE = mean_absolute_error(y_test, testDataPrediction(x_test))
print('Test MAE value is: ', test_MAE)

#Finding MAPE
test_MAPE = mean_absolute_percentage_error(y_test, testDataPrediction(x_test))
print('Test MAPE value is: ', test_MAPE)

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   4 out of   4 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   6 out of   6 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   7 out of   7 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   8 out of   8 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   9 out of   9 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  10 out of  10 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  11 out of  11 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  12 out of  