In [4]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn import metrics
%matplotlib inline

#importing models
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor

In [5]:
xtrain = np.load(r"C:\\Users\\hp\xtrain.npy")
xtest  =  np.load(r"C:\\Users\\hp\xtest.npy")
ytrain = np.load(r"C:\\Users\\hp\ytrain.npy")
ytest = np.load(r"C:\\Users\\hp\ytest.npy")

<b>LINEAR REGRESSION</B>

In [6]:
# Create a Linear regressor
lm = LinearRegression()
# Train the model using the training sets 
lm.fit(xtrain, ytrain)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)

In [7]:
# Predicting Test data with the model
y_test_pred = lm.predict(xtest)
acc_linreg = metrics.r2_score(ytest, y_test_pred)
print('R^2:', acc_linreg)

R^2: 0.2443185424018477


<b>RANDOM FOREST REGRESSOR</B>

In [233]:
# Create a Random Forest Regressor
RFR = RandomForestRegressor(max_depth=3,n_estimators=10,min_weight_fraction_leaf=0.0,min_samples_split=2,max_leaf_nodes=5)
# Train the model using the training sets 
RFR.fit(xtrain, ytrain)

RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=3,
                      max_features='auto', max_leaf_nodes=5,
                      min_impurity_decrease=0.0, min_impurity_split=None,
                      min_samples_leaf=1, min_samples_split=2,
                      min_weight_fraction_leaf=0.0, n_estimators=10,
                      n_jobs=None, oob_score=False, random_state=None,
                      verbose=0, warm_start=False)

In [234]:
# Predicting Test data with the model
y_test_pred = RFR.predict(xtest)
# Model Evaluation
acc_rf = metrics.r2_score(ytest, y_test_pred)
print('R^2:', acc_rf)

R^2: 0.38355388380880706


<b>XGBOOST REGRESSOR</b>

In [235]:
#Create a XGBoost Regressor
xgb = XGBRegressor(learning_rate=0.1,max_depth=6,min_child_weight=1,n_estimators=200,subsample=1)
# Train the model using the training sets 
xgb.fit(xtrain, ytrain)



XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,
             colsample_bynode=1, colsample_bytree=1, gamma=0,
             importance_type='gain', learning_rate=0.1, max_delta_step=0,
             max_depth=6, min_child_weight=1, missing=None, n_estimators=200,
             n_jobs=1, nthread=None, objective='reg:linear', random_state=0,
             reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None,
             silent=None, subsample=1, verbosity=1)

In [236]:
#Predicting Test data with the model
y_test_pred = xgb.predict(xtest)
acc_xgb = metrics.r2_score(ytest, y_test_pred)
print('R^2:', acc_xgb)

R^2: 0.4434973253500747


In [237]:
from sklearn.neural_network import MLPRegressor
nn = MLPRegressor(activation='relu',solver='lbfgs',hidden_layer_sizes=(300,200),random_state=1,max_iter=25000)

In [238]:
nn.fit(xtrain,ytrain)

MLPRegressor(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
             beta_2=0.999, early_stopping=False, epsilon=1e-08,
             hidden_layer_sizes=(300, 200), learning_rate='constant',
             learning_rate_init=0.001, max_iter=25000, momentum=0.9,
             n_iter_no_change=10, nesterovs_momentum=True, power_t=0.5,
             random_state=1, shuffle=True, solver='lbfgs', tol=0.0001,
             validation_fraction=0.1, verbose=False, warm_start=False)

In [239]:
#Predicting Test data with the model
y_test_pred = nn.predict(xtest)
acc_nn = metrics.r2_score(ytest, y_test_pred)
print('R^2:', acc_nn)

R^2: 0.3291310741405381


<B>STACKING</B>

In [241]:
#Implementing Stacking
import warnings
from mlxtend.regressor import StackingRegressor
from sklearn.ensemble import AdaBoostRegressor
AdaBoostRegressor()

AdaBoostRegressor(base_estimator=None, learning_rate=1.0, loss='linear',
                  n_estimators=50, random_state=None)

In [242]:
rg1 = XGBRegressor(learning_rate=0.1,max_depth=6,min_child_weight=1,n_estimators=200,subsample=1)
rg2 = MLPRegressor(activation='relu',solver='lbfgs',hidden_layer_sizes=(300,200),random_state=1,max_iter=25000)
rg3 = AdaBoostRegressor(learning_rate=0.01,n_estimators=300,random_state=1)
meta_rg1 = LinearRegression()
srg1 = StackingRegressor(regressors=[rg1,rg2,rg3],meta_regressor=meta_rg1)

In [243]:
srg1.fit(xtrain,ytrain)



StackingRegressor(meta_regressor=LinearRegression(copy_X=True,
                                                  fit_intercept=True,
                                                  n_jobs=None,
                                                  normalize=False),
                  refit=True,
                  regressors=[XGBRegressor(base_score=0.5, booster='gbtree',
                                           colsample_bylevel=1,
                                           colsample_bynode=1,
                                           colsample_bytree=1, gamma=0,
                                           importance_type='gain',
                                           learning_rate=0.1, max_delta_step=0,
                                           max_depth=6, min_child_weight=1,
                                           missing=None, n_es...
                                           max_iter=25000, momentum=0.9,
                                           n_iter_no_change=10,
     

In [244]:
#Predicting Test data with the model
y_test_pred = srg1.predict(xtest)
acc_srg1 = metrics.r2_score(ytest, y_test_pred)
print('R^2:', acc_srg1)

R^2: 0.46554361490149976


In [245]:
models = pd.DataFrame({
    'Model': ['Linear Regression', 'Random Forest', 'XGBoost','Artificial Neural Network','Stacking'],
    'R-squared Score': [acc_linreg*100, acc_rf*100, acc_xgb*100,acc_nn*100,acc_srg1*100]})
models.sort_values(by='R-squared Score', ascending=False)

Unnamed: 0,Model,R-squared Score
4,Stacking,46.554361
2,XGBoost,44.349733
1,Random Forest,38.355388
3,Artificial Neural Network,32.913107
0,Linear Regression,24.431854


# CONVERTING IT INTO A CLASSIFICATION PROBLEM

In [302]:
#import scoring and report metrics
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score

#import models
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier

In [303]:
ytrain[ytrain<=-0.5]=0
ytrain[(ytrain>-0.5) & (ytrain<=1) & (ytrain!=0)]=1
ytrain[ytrain>1]=2
ytest[ytest<=-0.5]=0
ytest[(ytest>-0.5) & (ytest<=1) & (ytest!=0)]=1
ytest[ytest>1]=2

<b>Making Predicitions</b>

<b>KNN</b>

In [306]:
knn = KNeighborsClassifier()
knn.fit(xtrain, ytrain)

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
                     metric_params=None, n_jobs=None, n_neighbors=5, p=2,
                     weights='uniform')

In [307]:
predictions = knn.predict(xtest)
score_knn = accuracy_score(ytest, predictions)
print(score_knn)

0.5151515151515151


In [308]:
#confusion metris and classification report
conf_mat = confusion_matrix(ytest, predictions)
print("Confusion Matrix\n", conf_mat)
class_rep = classification_report(ytest, predictions)
print("Classification Report\n", class_rep)

Confusion Matrix
 [[ 2  5  4]
 [ 4 15 11]
 [ 0  8 17]]
Classification Report
               precision    recall  f1-score   support

         0.0       0.33      0.18      0.24        11
         1.0       0.54      0.50      0.52        30
         2.0       0.53      0.68      0.60        25

    accuracy                           0.52        66
   macro avg       0.47      0.45      0.45        66
weighted avg       0.50      0.52      0.50        66



<b>SUPPORT VECTOR MACHINE</b>

In [310]:
svm = SVC(gamma='auto')
svm.fit(xtrain, ytrain)

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',
    max_iter=-1, probability=False, random_state=None, shrinking=True,
    tol=0.001, verbose=False)

In [311]:
predictions2 = svm.predict(xtest)
score_svm = accuracy_score(ytest, predictions2)
print(score_svm)

0.3787878787878788


In [312]:
# print confusion matrix and classification report
conf_mat2 = confusion_matrix(ytest, predictions2)
print("Confusion Matrix\n", conf_mat)
class_rep2 = classification_report(ytest, predictions2)
print("Classification Report\n", class_rep2)

Confusion Matrix
 [[ 2  5  4]
 [ 4 15 11]
 [ 0  8 17]]
Classification Report
               precision    recall  f1-score   support

         0.0       0.00      0.00      0.00        11
         1.0       0.00      0.00      0.00        30
         2.0       0.38      1.00      0.55        25

    accuracy                           0.38        66
   macro avg       0.13      0.33      0.18        66
weighted avg       0.14      0.38      0.21        66



  'precision', 'predicted', average, warn_for)


<b>LOGISTIC REGRESSION</b>

In [349]:
LG = LogisticRegression()
LG.fit(xtrain,ytrain)
predictions3 = LG.predict(xtest)
LG.score(xtest,ytest)



0.48484848484848486

In [315]:
#confusion metris and classification report
conf_mat3 = confusion_matrix(ytest, predictions3)
print("Confusion Matrix\n", conf_mat3)
class_rep3 = classification_report(ytest, predictions3)
print("Classification Report\n", class_rep3)

Confusion Matrix
 [[ 2  7  2]
 [ 2 17 11]
 [ 0 12 13]]
Classification Report
               precision    recall  f1-score   support

         0.0       0.50      0.18      0.27        11
         1.0       0.47      0.57      0.52        30
         2.0       0.50      0.52      0.51        25

    accuracy                           0.48        66
   macro avg       0.49      0.42      0.43        66
weighted avg       0.49      0.48      0.47        66



<b>RANDOM FOREST</b>

In [317]:
RF = RandomForestClassifier(n_jobs=4,random_state=0,n_estimators=8,max_depth=4,max_leaf_nodes=8)
#training the classifier
RF.fit(xtrain,ytrain)
predictions4 = RF.predict(xtest)
RandomForestClassifier()

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
                       max_depth=None, max_features='auto', max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators='warn',
                       n_jobs=None, oob_score=False, random_state=None,
                       verbose=0, warm_start=False)

In [318]:
RF.score(xtest,ytest)

0.5151515151515151

In [319]:
#confusion metris and classification report
conf_mat4 = confusion_matrix(ytest, predictions4)
print("Confusion Matrix\n", conf_mat4)
class_rep4 = classification_report(ytest, predictions4)
print("Classification Report\n", class_rep4)

Confusion Matrix
 [[ 1  8  2]
 [ 0 20 10]
 [ 0 12 13]]
Classification Report
               precision    recall  f1-score   support

         0.0       1.00      0.09      0.17        11
         1.0       0.50      0.67      0.57        30
         2.0       0.52      0.52      0.52        25

    accuracy                           0.52        66
   macro avg       0.67      0.43      0.42        66
weighted avg       0.59      0.52      0.48        66



<b>XGBOOST CLASSIFIER</b>

In [321]:
xgb = XGBClassifier(max_depth=7,subsample=0.5,n_estimators=200,learnign_rate=0.01,min_child_weight=2,reg_alpha=0,reg_lambda=1)
xgb.fit(xtrain,ytrain)

XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
              colsample_bynode=1, colsample_bytree=1, gamma=0,
              learnign_rate=0.01, learning_rate=0.1, max_delta_step=0,
              max_depth=7, min_child_weight=2, missing=None, n_estimators=200,
              n_jobs=1, nthread=None, objective='multi:softprob',
              random_state=0, reg_alpha=0, reg_lambda=1, scale_pos_weight=1,
              seed=None, silent=None, subsample=0.5, verbosity=1)

In [323]:
predictions6 = xgb.predict(xtest)
xgb.score(xtest,ytest)

0.5303030303030303

In [324]:
#confusion metris and classification report
conf_mat6 = confusion_matrix(ytest, predictions6)
print("Confusion Matrix\n", conf_mat6)
class_rep6 = classification_report(ytest, predictions6)
print("Classification Report\n", class_rep6)

Confusion Matrix
 [[ 2  7  2]
 [ 2 20  8]
 [ 0 12 13]]
Classification Report
               precision    recall  f1-score   support

         0.0       0.50      0.18      0.27        11
         1.0       0.51      0.67      0.58        30
         2.0       0.57      0.52      0.54        25

    accuracy                           0.53        66
   macro avg       0.53      0.46      0.46        66
weighted avg       0.53      0.53      0.51        66



<b>NEURAL NETWORKS</b>

In [325]:
from sklearn.neural_network import MLPClassifier
MLPClassifier()

MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
              beta_2=0.999, early_stopping=False, epsilon=1e-08,
              hidden_layer_sizes=(100,), learning_rate='constant',
              learning_rate_init=0.001, max_iter=200, momentum=0.9,
              n_iter_no_change=10, nesterovs_momentum=True, power_t=0.5,
              random_state=None, shuffle=True, solver='adam', tol=0.0001,
              validation_fraction=0.1, verbose=False, warm_start=False)

In [326]:
nn = MLPClassifier(activation='relu',solver='lbfgs',hidden_layer_sizes=(20),random_state=1,max_iter=15)
nn.fit(xtrain,ytrain)
predictions7 = nn.predict(xtest)
nn.score(xtest,ytest)

0.5

In [327]:
#confusion metris and classification report
conf_mat7 = confusion_matrix(ytest, predictions7)
print("Confusion Matrix\n", conf_mat7)
class_rep7 = classification_report(ytest, predictions7)
print("Classification Report\n", class_rep7)

Confusion Matrix
 [[ 2  7  2]
 [ 3 18  9]
 [ 0 12 13]]
Classification Report
               precision    recall  f1-score   support

         0.0       0.40      0.18      0.25        11
         1.0       0.49      0.60      0.54        30
         2.0       0.54      0.52      0.53        25

    accuracy                           0.50        66
   macro avg       0.48      0.43      0.44        66
weighted avg       0.49      0.50      0.49        66



<b>ADABOOST CLASSIFIER</b>

In [329]:
from sklearn.ensemble import AdaBoostClassifier
AdaBoostClassifier(n_estimators=1000,random_state=1,learning_rate=0.1)

AdaBoostClassifier(algorithm='SAMME.R', base_estimator=None, learning_rate=0.1,
                   n_estimators=1000, random_state=1)

In [330]:
adb = AdaBoostClassifier()
adb.fit(xtrain,ytrain)
predictions8 = nn.predict(xtest)
adb.score(xtest,ytest)

0.4090909090909091

In [331]:
#confusion metris and classification report
conf_mat8 = confusion_matrix(ytest, predictions8)
print("Confusion Matrix\n", conf_mat8)
class_rep8 = classification_report(ytest, predictions8)
print("Classification Report\n", class_rep8)

Confusion Matrix
 [[ 2  7  2]
 [ 3 18  9]
 [ 0 12 13]]
Classification Report
               precision    recall  f1-score   support

         0.0       0.40      0.18      0.25        11
         1.0       0.49      0.60      0.54        30
         2.0       0.54      0.52      0.53        25

    accuracy                           0.50        66
   macro avg       0.48      0.43      0.44        66
weighted avg       0.49      0.50      0.49        66



<b>STACKING</B>

In [343]:
#Implementing Stacking
import warnings
from mlxtend.classifier import StackingClassifier

In [344]:
clf1 = MLPClassifier(activation='relu',solver='lbfgs',hidden_layer_sizes=(8),random_state=1,max_iter=10)
clf2 = XGBClassifier(max_depth=4,subsample=0.5,n_estimators=100,learnign_rate=0.01,min_child_weight=2,reg_alpha=0,reg_lambda=1)
clf3 = RandomForestClassifier(n_jobs=4,random_state=0,n_estimators=8,max_depth=4,max_leaf_nodes=8)
meta_c1 = LogisticRegression()
sclf1 = StackingClassifier(classifiers=[clf1,clf2,clf3],use_probas=True,meta_classifier=meta_c1)

In [345]:
sclf1.fit(xtrain,ytrain)
predictions9 =sclf1.predict(xtest)



In [346]:
sclf1.score(xtest,ytest)

0.5454545454545454

In [347]:
#confusion metris and classification report
conf_mat9 = confusion_matrix(ytest, predictions9)
print("Confusion Matrix\n", conf_mat9)
class_rep9 = classification_report(ytest, predictions9)
print("Classification Report\n", class_rep9)

Confusion Matrix
 [[ 2  7  2]
 [ 2 19  9]
 [ 0 10 15]]
Classification Report
               precision    recall  f1-score   support

         0.0       0.50      0.18      0.27        11
         1.0       0.53      0.63      0.58        30
         2.0       0.58      0.60      0.59        25

    accuracy                           0.55        66
   macro avg       0.53      0.47      0.48        66
weighted avg       0.54      0.55      0.53        66

