In [67]:
import numpy as np
import warnings
import pickle
import os
from urllib.request import urlopen
import random as rand
%matplotlib inline
import matplotlib.pyplot as plt
import time

from sklearn.metrics import f1_score, precision_score, recall_score, accuracy_score, precision_recall_curve
from sklearn.metrics import confusion_matrix, roc_auc_score, roc_curve, multilabel_confusion_matrix, classification_report
from sklearn.metrics import label_ranking_average_precision_score, coverage_error, label_ranking_loss

from sklearn.linear_model import LogisticRegressionCV, LogisticRegression, SGDClassifier

from sklearn.model_selection import train_test_split, GridSearchCV, cross_validate
from sklearn.model_selection import cross_val_predict, cross_val_score, ShuffleSplit, StratifiedShuffleSplit

from sklearn import svm as svm, neighbors as nbrs, tree as tree, gaussian_process as GPC, naive_bayes as NB

from sklearn.experimental import enable_hist_gradient_boosting

from sklearn.ensemble import RandomForestClassifier, BaggingClassifier, ExtraTreesClassifier, AdaBoostClassifier
from sklearn.ensemble import GradientBoostingClassifier, HistGradientBoostingClassifier, VotingClassifier

from sklearn.neural_network import MLPClassifier

dir = os.getcwd()
#dir = 'C:/Users/10/Desktop/proper/'
os.chdir(dir)

print(dir)

url_main = 'file:///C:/Users/10/Desktop/proper/'
data_file_name = "UNIQUE_STATES_40k.pkl"
label_file_name = "UNIQUE_LABELS_40k.pkl"

#DATA
data = np.load(urlopen(url_main + data_file_name))
data = np.unpackbits(data).reshape(-1, 2500)
data = data.astype('int')
#data[np.where(data==0)] = -1

#LABELS
labels = pickle.load(urlopen(url_main + label_file_name))
labels = np.unpackbits(labels).reshape(40000, -1)

X = data
Y = labels

print('X shape:', X.shape)
print('Y shape:', Y.shape)

#X_train, X_test, Y_train, Y_test = train_test_split(X, Y, train_size = 0.8, test_size = 0.2)

#print('X_train shape:', X_train.shape)
#print('Y_train shape:', Y_train.shape)

#sorted(sklearn.metrics.SCORERS.keys()) UNCOMMENT TO PRINT ALL SCORERS

C:\Users\10\Desktop\proper
X shape: (40000, 2500)
Y shape: (40000, 1)


In [60]:
clf1 = LogisticRegression(solver='lbfgs', multi_class='auto', random_state=42, verbose=1,
                          max_iter=1000, n_jobs = -1)

clf2 = svm.LinearSVC(penalty='l1', loss='squared_hinge', dual=False, tol=0.0001, C=1.0,
                    fit_intercept=True, intercept_scaling=1, class_weight=None, 
                    verbose=1, random_state=42, max_iter=1000)

clf3 = NB.MultinomialNB(alpha=1.5, fit_prior=True)

clf4 = tree.DecisionTreeClassifier(criterion='entropy', splitter='best', random_state=42,
                                  min_samples_split=50, min_samples_leaf=1)

clf5 = BaggingClassifier(RandomForestClassifier(n_estimators=400),
                         max_samples=0.5, max_features=0.5, n_jobs=-1,random_state=42)

clf6 = RandomForestClassifier(oob_score=False, n_estimators=400, n_jobs=-1,random_state=42)

clf7 = ExtraTreesClassifier(oob_score=False, n_estimators=400, n_jobs=-1, bootstrap=True,
                            random_state=42)

clf8 = GradientBoostingClassifier(random_state=42)

clf9 = HistGradientBoostingClassifier(random_state=42)

clf10 = MLPClassifier(solver='lbfgs', hidden_layer_sizes = (5, 1000), alpha = 1e-08,
                      warm_start=True, verbose=1, random_state=42)



def benchmark(clf, CVV_n):
    
    print("\n", clf, "\n\n For cross-validation\n")
    
    t1 = time.time()

    CVV = cross_validate(clf, X, Y.ravel(), cv=CVV_n, return_train_score=False, n_jobs=-1,
                       scoring = ['accuracy',
                                  'precision',
                                  'recall',
                                  'f1'])

    t2 = time.time()

    for rows in CVV:
        print(rows, "{0:.4f}".format(CVV[rows].mean()), "{0:.4f}".format(CVV[rows].std()))
    
    print()
    print("{0:.2f}".format((t2-t1)/CVV_n)+'s per one iteration of cross-validate')
    print()
    
    return CVV



def validate(clf, n, data, labels):
    '''
    n-fold cross-validation with stratified sampling
    '''
    
    accuracy_scores = []
    precision_scores = []
    recall_scores = []
    f1_scores = []

    sss = StratifiedShuffleSplit(n_splits=n, test_size=0.2, random_state=42)
    for train_index, test_index in sss.split(data, labels):
        x_train, x_test = data[train_index], data[test_index]
        y_train, y_test = labels[train_index], labels[test_index]
        clf.fit(x_train, y_train)
        y_pred = clf.predict(x_test)
        accuracy_scores.append(accuracy_score(y_test, y_pred))
        precision_scores.append(precision_score(y_test, y_pred))
        recall_scores.append(recall_score(y_test, y_pred))
        f1_scores.append(f1_score(y_test, y_pred))
    
    print('For Stratified')
    print('Accuracy', "{0:.4f}".format(np.mean(accuracy_scores)), "{0:.4f}".format(np.std(accuracy_scores)))
    print('Precision', "{0:.4f}".format(np.mean(precision_scores)), "{0:.4f}".format(np.std(precision_scores)))
    print('Recall', "{0:.4f}".format(np.mean(recall_scores)), "{0:.4f}".format(np.std(recall_scores)))
    print('F1-measure', "{0:.4f}".format(np.mean(f1_scores)), "{0:.4f}".format(np.std(f1_scores))) 

In [61]:
benchmark(clf3, 5) # test benchmark, fastest
validate(clf3, 5, X, Y.ravel())


 MultinomialNB(alpha=1.5, class_prior=None, fit_prior=True) 

 For cross-validation

fit_time 0.9739 0.0413
score_time 0.6171 0.0124
test_accuracy 0.6545 0.1625
test_precision 0.6175 0.1961
test_recall 0.8549 0.0141
test_f1 0.7006 0.1160

0.42s per one iteration of cross-validate

For Stratified
Accuracy 0.7511 0.0018
Precision 0.6702 0.0024
Recall 0.8423 0.0047
F1-measure 0.7465 0.0018


In [4]:
benchmark(clf1, 3)


 LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=1000,
                   multi_class='auto', n_jobs=None, penalty='l2',
                   random_state=42, solver='lbfgs', tol=0.0001, verbose=1,
                   warm_start=False) 

fit_time 70.0470 18.5702
score_time 1.7152 1.2301
test_accuracy 0.8501 0.0979
train_accuracy 0.9824 0.0130
test_precision 0.8116 0.1276
train_precision 0.9776 0.0162
test_recall 0.8768 0.0555
train_recall 0.9821 0.0136
test_f1 0.8410 0.0957
train_f1 0.9798 0.0149

29.45s per one iteration of cross-validate



{'fit_time': array([43.99170589, 80.22472382, 85.92458415]),
 'score_time': array([3.40165377, 1.24158287, 0.50250125]),
 'test_accuracy': array([0.71441428, 0.94172354, 0.89417235]),
 'train_accuracy': array([1.        , 0.96913789, 0.97806277]),
 'test_precision': array([0.63673806, 0.93777235, 0.86040401]),
 'train_precision': array([1.        , 0.96225444, 0.97052542]),
 'test_recall': array([0.79965517, 0.92758621, 0.90327586]),
 'train_recall': array([1.        , 0.96698276, 0.97931034]),
 'test_f1': array([0.70895751, 0.93265147, 0.88131887]),
 'train_f1': array([1.        , 0.9646128 , 0.97489809])}

In [5]:
benchmark(clf2, 3)


 LinearSVC(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, loss='squared_hinge', max_iter=1000,
          multi_class='ovr', penalty='l1', random_state=42, tol=0.0001,
          verbose=1) 

fit_time 86.1305 55.7012
score_time 0.4655 0.0363
test_accuracy 0.8536 0.0851
train_accuracy 0.9799 0.0147
test_precision 0.8157 0.1069
train_precision 0.9761 0.0173
test_recall 0.8666 0.0712
train_recall 0.9778 0.0166
test_f1 0.8397 0.0896
train_f1 0.9770 0.0169

51.83s per one iteration of cross-validate



{'fit_time': array([ 16.86352587,  88.27532673, 153.25256991]),
 'score_time': array([0.51678586, 0.4409709 , 0.438658  ]),
 'test_accuracy': array([0.73811309, 0.94052351, 0.88209705]),
 'train_accuracy': array([1.        , 0.96508794, 0.97468782]),
 'test_precision': array([0.67516697, 0.93425846, 0.83769968]),
 'train_precision': array([1.        , 0.95967255, 0.96852217]),
 'test_recall': array([0.76689655, 0.92862069, 0.90413793]),
 'train_recall': array([1.        , 0.96008621, 0.97344828]),
 'test_f1': array([0.7181143 , 0.93143104, 0.86965174]),
 'train_f1': array([1.        , 0.95987934, 0.97097898])}

In [6]:
benchmark(clf3, 3)


 MultinomialNB(alpha=1.5, class_prior=None, fit_prior=True) 

fit_time 0.6174 0.0147
score_time 0.8847 0.0074
test_accuracy 0.6442 0.1939
train_accuracy 0.7319 0.0398
test_precision 0.6356 0.2261
train_precision 0.6443 0.0435
test_recall 0.8264 0.0335
train_recall 0.8701 0.0922
test_f1 0.6941 0.1283
train_f1 0.7375 0.0410

1.39s per one iteration of cross-validate



{'fit_time': array([0.59659052, 0.62785888, 0.62785888]),
 'score_time': array([0.87949562, 0.87943983, 0.89506888]),
 'test_accuracy': array([0.42425379, 0.8958974 , 0.61246531]),
 'train_accuracy': array([0.74739368, 0.67735403, 0.77099036]),
 'test_precision': array([0.4218763 , 0.94837398, 0.53655156]),
 'train_precision': array([0.63278008, 0.59778068, 0.70243974]),
 'test_recall': array([0.8737931 , 0.80448276, 0.80103448]),
 'train_recall': array([0.99913793, 0.78948276, 0.82155172]),
 'test_f1': array([0.56902262, 0.87052239, 0.64264472]),
 'train_f1': array([0.77483621, 0.68038633, 0.75734096])}

In [7]:
benchmark(clf4, 3)


 DecisionTreeClassifier(class_weight=None, criterion='entropy', max_depth=None,
                       max_features=None, max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=50,
                       min_weight_fraction_leaf=0.0, presort=False,
                       random_state=42, splitter='best') 

fit_time 11.9281 2.2629
score_time 0.3463 0.0211
test_accuracy 0.8012 0.1369
train_accuracy 0.9844 0.0113
test_precision 0.7365 0.1842
train_precision 0.9778 0.0141
test_recall 0.9467 0.0528
train_recall 0.9866 0.0117
test_f1 0.8185 0.1221
train_f1 0.9822 0.0129

5.34s per one iteration of cross-validate



{'fit_time': array([ 8.97354102, 12.34034657, 14.47031784]),
 'score_time': array([0.3761301 , 0.32925487, 0.333606  ]),
 'test_accuracy': array([0.65419229, 0.9838746 , 0.76546914]),
 'train_accuracy': array([0.99456236, 0.96865039, 0.98995013]),
 'test_precision': array([0.56647657, 0.99241756, 0.65045593]),
 'train_precision': array([0.99125139, 0.95827657, 0.98385995]),
 'test_recall': array([0.87344828, 0.97034483, 0.9962069 ]),
 'train_recall': array([0.9962931 , 0.97017241, 0.99318966]),
 'test_f1': array([0.6872414 , 0.98125708, 0.78703262]),
 'train_f1': array([0.99376585, 0.9641878 , 0.98850279])}

In [8]:
benchmark(clf5, 3)


 BaggingClassifier(base_estimator=RandomForestClassifier(bootstrap=True,
                                                        class_weight=None,
                                                        criterion='gini',
                                                        max_depth=None,
                                                        max_features='auto',
                                                        max_leaf_nodes=None,
                                                        min_impurity_decrease=0.0,
                                                        min_impurity_split=None,
                                                        min_samples_leaf=1,
                                                        min_samples_split=2,
                                                        min_weight_fraction_leaf=0.0,
                                                        n_estimators=400,
                                                        n_jobs=None,
     

{'fit_time': array([52.95813966, 78.15876698, 76.2726047 ]),
 'score_time': array([16.53210378, 18.53561759, 19.21905327]),
 'test_accuracy': array([0.49932503, 1.        , 0.94644866]),
 'train_accuracy': array([0.99909998, 0.99501256, 1.        ]),
 'test_precision': array([0.4608719 , 1.        , 0.89038993]),
 'train_precision': array([0.99793531, 0.99757008, 1.        ]),
 'test_recall': array([0.88948276, 1.        , 1.        ]),
 'train_recall': array([1.        , 0.99094828, 1.        ]),
 'test_f1': array([0.60715547, 1.        , 0.94201722]),
 'train_f1': array([0.99896659, 0.99424815, 1.        ])}

In [9]:
benchmark(clf6, 2)


 RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
                       max_depth=None, max_features='auto', max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=400,
                       n_jobs=-1, oob_score=False, random_state=42, verbose=0,
                       warm_start=False) 

fit_time 18.6260 3.3448
score_time 4.1075 1.3324
test_accuracy 0.7350 0.1116
train_accuracy 1.0000 0.0000
test_precision 0.6393 0.0999
train_precision 1.0000 0.0000
test_recall 0.9602 0.0398
train_recall 1.0000 0.0000
test_f1 0.7651 0.0850
train_f1 1.0000 0.0000

13.78s per one iteration of cross-validate



{'fit_time': array([15.28115892, 21.97074533]),
 'score_time': array([5.43988967, 2.77501321]),
 'test_accuracy': array([0.6234 , 0.84655]),
 'train_accuracy': array([1., 1.]),
 'test_precision': array([0.5393319 , 0.73923018]),
 'train_precision': array([1., 1.]),
 'test_recall': array([0.92045977, 1.        ]),
 'train_recall': array([1., 1.]),
 'test_f1': array([0.68014269, 0.85006595]),
 'train_f1': array([1., 1.])}

In [10]:
benchmark(clf7, 3)


 ExtraTreesClassifier(bootstrap=True, class_weight=None, criterion='gini',
                     max_depth=None, max_features='auto', max_leaf_nodes=None,
                     min_impurity_decrease=0.0, min_impurity_split=None,
                     min_samples_leaf=1, min_samples_split=2,
                     min_weight_fraction_leaf=0.0, n_estimators=400, n_jobs=-1,
                     oob_score=False, random_state=42, verbose=0,
                     warm_start=False) 

fit_time 51.4008 9.0446
score_time 6.9680 4.3956
test_accuracy 0.8183 0.2131
train_accuracy 1.0000 0.0000
test_precision 0.7811 0.2247
train_precision 1.0000 0.0000
test_recall 0.9635 0.0516
train_recall 1.0000 0.0000
test_f1 0.8494 0.1667
train_f1 1.0000 0.0000

22.34s per one iteration of cross-validate



{'fit_time': array([38.96178913, 55.03927708, 60.20129442]),
 'score_time': array([12.95646453,  5.4181993 ,  2.5294528 ]),
 'test_accuracy': array([0.51927404, 1.        , 0.93572339]),
 'train_accuracy': array([1., 1., 1.]),
 'test_precision': array([0.47212066, 1.        , 0.87126333]),
 'train_precision': array([1., 1., 1.]),
 'test_recall': array([0.89051724, 1.        , 1.        ]),
 'train_recall': array([1., 1., 1.]),
 'test_f1': array([0.61708483, 1.        , 0.93120334]),
 'train_f1': array([1., 1., 1.])}

In [11]:
benchmark(clf8, 3)


 GradientBoostingClassifier(criterion='friedman_mse', init=None,
                           learning_rate=0.1, loss='deviance', max_depth=3,
                           max_features=None, max_leaf_nodes=None,
                           min_impurity_decrease=0.0, min_impurity_split=None,
                           min_samples_leaf=1, min_samples_split=2,
                           min_weight_fraction_leaf=0.0, n_estimators=100,
                           n_iter_no_change=None, presort='auto',
                           random_state=42, subsample=1.0, tol=0.0001,
                           validation_fraction=0.1, verbose=0,
                           warm_start=False) 

fit_time 386.2307 6.8271
score_time 0.6762 0.0259
test_accuracy 0.9007 0.1153
train_accuracy 0.9938 0.0058
test_precision 0.8614 0.1441
train_precision 0.9976 0.0031
test_recall 0.9386 0.0869
train_recall 0.9882 0.0106
test_f1 0.8968 0.1183
train_f1 0.9929 0.0068

132.38s per one iteration of cross-validate



{'fit_time': array([377.97797942, 394.6967957 , 386.01747108]),
 'score_time': array([0.70429134, 0.64179778, 0.68260264]),
 'test_accuracy': array([0.73908805, 0.99977499, 0.96332408]),
 'train_accuracy': array([0.9998125 , 0.98593768, 0.99576255]),
 'test_precision': array([0.66251225, 0.99948303, 0.92224519]),
 'train_precision': array([0.99956915, 0.99323315, 0.99991296]),
 'test_recall': array([0.81568966, 1.        , 1.        ]),
 'train_recall': array([1.        , 0.97431034, 0.99034483]),
 'test_f1': array([0.73116452, 0.99974145, 0.95955   ]),
 'train_f1': array([0.99978453, 0.98368075, 0.99510589])}

In [12]:
benchmark(clf9, 2)


 HistGradientBoostingClassifier(l2_regularization=0.0, learning_rate=0.1,
                               loss='auto', max_bins=256, max_depth=None,
                               max_iter=100, max_leaf_nodes=31,
                               min_samples_leaf=20, n_iter_no_change=None,
                               random_state=42, scoring=None, tol=1e-07,
                               validation_fraction=0.1, verbose=0) 

fit_time 76.9881 3.6801
score_time 2.4197 0.0325
test_accuracy 0.7659 0.1118
train_accuracy 1.0000 0.0000
test_precision 0.6735 0.1089
train_precision 1.0000 0.0000
test_recall 0.9456 0.0502
train_recall 1.0000 0.0000
test_f1 0.7844 0.0919
train_f1 1.0000 0.0000

43.88s per one iteration of cross-validate



{'fit_time': array([73.30798554, 80.66824841]),
 'score_time': array([2.38719559, 2.45217776]),
 'test_accuracy': array([0.6541 , 0.87775]),
 'train_accuracy': array([1., 1.]),
 'test_precision': array([0.56457458, 0.78244378]),
 'train_precision': array([1., 1.]),
 'test_recall': array([0.8954023 , 0.99586207]),
 'train_recall': array([1., 1.]),
 'test_f1': array([0.692506  , 0.87634653]),
 'train_f1': array([1., 1.])}

In [13]:
benchmark(clf10, 3)


 MLPClassifier(activation='relu', alpha=1e-08, batch_size='auto', beta_1=0.9,
              beta_2=0.999, early_stopping=False, epsilon=1e-08,
              hidden_layer_sizes=(5, 1000), learning_rate='constant',
              learning_rate_init=0.001, max_iter=200, momentum=0.9,
              n_iter_no_change=10, nesterovs_momentum=True, power_t=0.5,
              random_state=42, shuffle=True, solver='lbfgs', tol=0.0001,
              validation_fraction=0.1, verbose=1, warm_start=True) 

fit_time 181.7868 121.8933
score_time 1.3271 0.2151
test_accuracy 0.7173 0.1957
train_accuracy 0.8378 0.1940
test_precision 0.5050 0.4073
train_precision 0.6443 0.4562
test_recall 0.6529 0.4617
train_recall 0.6503 0.4601
test_f1 0.5560 0.4139
train_f1 0.6472 0.4581

91.78s per one iteration of cross-validate



{'fit_time': array([  9.41558909, 269.72620916, 266.21873784]),
 'score_time': array([1.61945605, 1.10804939, 1.25387359]),
 'test_accuracy': array([0.56442178, 0.99354984, 0.59386485]),
 'train_accuracy': array([0.56442661, 0.99520006, 0.95368808]),
 'test_precision': array([0.        , 0.99738858, 0.51769789]),
 'train_precision': array([0.        , 0.99431231, 0.93856309]),
 'test_recall': array([0.        , 0.98775862, 0.97086207]),
 'train_recall': array([0.        , 0.99465517, 0.95612069]),
 'test_f1': array([0.        , 0.99255024, 0.67530131]),
 'train_f1': array([0.        , 0.99448371, 0.94726054])}

In [27]:


def validate(clf, n, data, labels):
    '''
    n-fold cross-validation with stratified sampling.
    '''
    accuracy_scores = []
    precision_scores = []
    recall_scores = []
    f1_scores = []

    sss = StratifiedShuffleSplit(n_splits=n)
    for train_index, test_index in sss.split(data, labels):
        x_train, x_test = data[train_index], data[test_index]
        y_train, y_test = labels[train_index], labels[test_index]
        clf.fit(x_train, y_train)
        y_pred = clf.predict(x_test)
        accuracy_scores.append(accuracy_score(y_test, y_pred))
        precision_scores.append(precision_score(y_test, y_pred))
        recall_scores.append(recall_score(y_test, y_pred))
        f1_scores.append(f1_score(y_test, y_pred))

    print('Accuracy', "{0:.4f}".format(np.mean(accuracy_scores)), "{0:.4f}".format(np.std(accuracy_scores)))
    print('Precision', "{0:.4f}".format(np.mean(precision_scores)), "{0:.4f}".format(np.std(precision_scores)))
    print('Recall', "{0:.4f}".format(np.mean(recall_scores)), "{0:.4f}".format(np.std(recall_scores)))
    print('F1-measure', "{0:.4f}".format(np.mean(f1_scores)), "{0:.4f}".format(np.std(f1_scores))) 



In [29]:
validate(clf3, 5, X, Y.ravel())

Accuracy 0.7520 0.0059
Precision 0.6698 0.0052
Recall 0.8478 0.0110
F1-measure 0.7484 0.0066


In [30]:
validate(clf1, 5, X, Y.ravel())

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:   47.7s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:   41.3s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:   45.2s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:   46.5s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


Accuracy 0.9347 0.0013
Precision 0.9513 0.0037
Recall 0.8959 0.0055
F1-measure 0.9227 0.0018


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:   45.0s finished


In [31]:
validate(clf2, 5, X, Y.ravel())

[LibLinear][LibLinear][LibLinear][LibLinear][LibLinear]Accuracy 0.9367 0.0038
Precision 0.9504 0.0056
Recall 0.9016 0.0068
F1-measure 0.9253 0.0046


In [32]:
validate(clf3, 5, X, Y.ravel())

Accuracy 0.7554 0.0073
Precision 0.6757 0.0078
Recall 0.8420 0.0130
F1-measure 0.7497 0.0076


In [33]:
validate(clf4, 5, X, Y.ravel())

Accuracy 0.9537 0.0026
Precision 0.9458 0.0023
Recall 0.9479 0.0041
F1-measure 0.9468 0.0031


In [34]:
validate(clf5, 5, X, Y.ravel())

Accuracy 0.9896 0.0015
Precision 0.9944 0.0016
Recall 0.9816 0.0032
F1-measure 0.9880 0.0017


In [35]:
validate(clf6, 5, X, Y.ravel())

Accuracy 0.9889 0.0012
Precision 0.9944 0.0011
Recall 0.9801 0.0030
F1-measure 0.9872 0.0014


In [36]:
validate(clf7, 5, X, Y.ravel())

Accuracy 0.9909 0.0012
Precision 0.9955 0.0006
Recall 0.9837 0.0033
F1-measure 0.9895 0.0014


In [37]:
validate(clf8, 5, X, Y.ravel())

Accuracy 0.9866 0.0014
Precision 0.9936 0.0014
Recall 0.9754 0.0033
F1-measure 0.9844 0.0017


In [38]:
validate(clf9, 5, X, Y.ravel())

Accuracy 0.9937 0.0010
Precision 0.9965 0.0009
Recall 0.9890 0.0016
F1-measure 0.9927 0.0011


In [39]:
validate(clf10, 5, X, Y.ravel())

Accuracy 0.9345 0.0032
Precision 0.8981 0.0052
Recall 0.9580 0.0026
F1-measure 0.9271 0.0034


In [62]:
os.getcwd()

'C:\\Users\\10\\Desktop\\proper'