In [27]:
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import cross_validate
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neural_network import MLPClassifier
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC

import warnings
warnings.filterwarnings("ignore")

In [28]:
import numpy as np
import glob
import matplotlib.pyplot as plt
import pandas as pd
from scipy import *

data = np.empty(shape=[0, 222])

all_data = glob.glob('./drive/MyDrive/compsci/all_data.csv')

for j in all_data:
    print('Loading ', j)
    csvrows = np.loadtxt(j, delimiter=',')
    data = np.append(data, csvrows, axis=0)

print(data.shape)

Loading  ./drive/MyDrive/compsci/all_data.csv
(151452, 222)


In [29]:
X = data[:,:-2]
y = data[:,-2]

In [30]:
gbc_clf = GradientBoostingClassifier(n_estimators=100,random_state=48)
ada_clf = AdaBoostClassifier(n_estimators=100,random_state=48)
rfc_clf = RandomForestClassifier(max_depth=10, random_state=48,n_estimators=10)
NB_clf = GaussianNB()
NNMLP_clf = MLPClassifier(random_state=48, max_iter=100)
svc_clf = make_pipeline(StandardScaler(), SVC(gamma='auto'))

n_folds = StratifiedKFold(n_splits = 6,shuffle= False ,random_state =48)

In [31]:
scoring = {'accuracy': 'accuracy',
           'prec_macro': 'precision_macro',
           'rec_macro': 'recall_macro',
           'f1_macro': 'f1_macro'}

In [32]:
ada = cross_validate(ada_clf ,X,y,cv=n_folds,scoring =scoring)
print('ada score: ',ada)
print('--------------------------------------------------------------------------------------------------------')
rfc = cross_validate(rfc_clf ,X,y,cv=n_folds,scoring =scoring)
print('rfc score: ',rfc)
print('--------------------------------------------------------------------------------------------------------')
NB = cross_validate(NB_clf ,X,y,cv=n_folds,scoring =scoring)
print('NB score: ',NB)
print('--------------------------------------------------------------------------------------------------------')
NNMLP = cross_validate(NNMLP_clf ,X,y,cv=n_folds,scoring =scoring)
print('NNMLP score: ',NNMLP)
print('--------------------------------------------------------------------------------------------------------')
svc = cross_validate(svc_clf ,X,y,cv=n_folds,scoring =scoring)
print('svc score: ',svc)

ada score:  {'fit_time': array([593.67027783, 590.14921999, 586.47153735, 587.08071232,
       586.55754781, 593.02368736]), 'score_time': array([2.25231218, 2.23539925, 2.27471614, 2.23945904, 2.23752856,
       2.24986124]), 'test_accuracy': array([0.74728627, 0.69285318, 0.72910229, 0.83226369, 0.81194042,
       0.82041835]), 'test_prec_macro': array([0.30843925, 0.39413839, 0.35981792, 0.47842174, 0.29709635,
       0.40506096]), 'test_rec_macro': array([0.25176287, 0.29945686, 0.34495462, 0.3346276 , 0.25696864,
       0.29889129]), 'test_f1_macro': array([0.24122488, 0.32882251, 0.34045842, 0.36437765, 0.25527602,
       0.30791171])}
--------------------------------------------------------------------------------------------------------
rfc score:  {'fit_time': array([21.12967205, 21.32598662, 21.17011929, 21.23944426, 21.21314645,
       21.43212938]), 'score_time': array([0.09104347, 0.08750677, 0.08953238, 0.0880754 , 0.08705878,
       0.08782387]), 'test_accuracy': array([

In [33]:
# gbc = cross_validate(gbc_clf ,X,y,cv=n_folds,scoring =scoring)
# print('gbc score: ',gbc)