In [16]:
import numpy as np
import pandas as pd
from pandas import DataFrame
from sklearn import svm, linear_model, neural_network, naive_bayes, neighbors, tree, ensemble, linear_model
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score, f1_score
from sklearn import preprocessing
import time
import os
import glob
from itertools import product

In [17]:
files = os.path.join(os.getcwd(),"csv_files", "*.csv")
print(files)
datasets = glob.glob(files)
datasets

/home/pranav/Project/csv_files/*.csv


['/home/pranav/Project/csv_files/electricity_normalised.csv',
 '/home/pranav/Project/csv_files/pc4.csv',
 '/home/pranav/Project/csv_files/credit.csv',
 '/home/pranav/Project/csv_files/MagicTelescope.csv',
 '/home/pranav/Project/csv_files/irish.csv',
 '/home/pranav/Project/csv_files/pc1.csv',
 '/home/pranav/Project/csv_files/tic-tac-toe.csv',
 '/home/pranav/Project/csv_files/ionosphere.csv',
 '/home/pranav/Project/csv_files/diabetes.csv']

In [18]:
models = [linear_model.LogisticRegression(), neural_network.MLPClassifier(), naive_bayes.GaussianNB(), neighbors.KNeighborsClassifier(algorithm = 'brute'), tree.DecisionTreeClassifier(min_impurity_split = 0.25), ensemble.RandomForestClassifier()]

In [19]:
def read_csv(file_path):
    X = []
    dataframe = pd.read_csv(file_path)
    category = np.logical_not(
        np.logical_or(np.array(dataframe.dtypes == np.float64), np.array(dataframe.dtypes == np.int64)))
    for cat_cols in dataframe.select_dtypes(['object']).columns:
        dataframe[cat_cols] = dataframe[cat_cols].astype('category')
        dataframe[cat_cols] = pd.Categorical.from_array(dataframe[cat_cols]).codes
    for cat_cols in dataframe.select_dtypes(['bool']).columns:
        dataframe[cat_cols] = dataframe[cat_cols].astype('category')
        dataframe[cat_cols] = pd.Categorical.from_array(dataframe[cat_cols]).codes  # Have to see an alternative

    classes = np.array(dataframe[dataframe.columns[-1]])
    a_enc = pd.factorize(classes)
    Y = np.array(a_enc[0])
    for row in dataframe.as_matrix():
        row = np.array(row)
        X.append(np.array(row[0:len(row) - 1]).astype(float))
    X = np.array(X)
    return X, Y

In [20]:
def compute(model, dataset, folds =10 , seed=42):
    X, y = read_csv(dataset)
    kf = StratifiedKFold(n_splits=folds, shuffle=True, random_state=seed)
    accs = np.zeros(folds)   
    f1_scores = np.zeros(folds)
    btime = np.zeros(folds)
    i = 0 
    for index_train, index_test in kf.split(X,y):
        t0=time.time()
        model.fit(X[index_train], y[index_train])
        btime[i] = round(time.time()-t0, 6)
        
        y_true = y[index_test]
        y_pred = model.predict(X[index_test])
        accs[i] = accuracy_score(y_true, y_pred)
        f1_scores[i] = f1_score(y_true, y_pred, average='micro')
        i+=1
    return accs, f1_scores, btime 

In [21]:
shape = (len(datasets), len(models), 10)
accuracies = np.zeros(shape)
f1_scores = np.zeros(shape)
build_time = np.zeros(shape)
for i, d in enumerate(datasets):
    for j, m in enumerate(models):
        print(d)
        print(m)
        accuracies[i,j] = compute(m, d)[0]
        f1_scores[i,j] = compute(m, d)[1]
        build_time[i,j] = compute(m, d)[2]

/home/pranav/Project/csv_files/electricity_normalised.csv
LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False)




/home/pranav/Project/csv_files/electricity_normalised.csv
MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(100,), learning_rate='constant',
       learning_rate_init=0.001, max_iter=200, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=None,
       shuffle=True, solver='adam', tol=0.0001, validation_fraction=0.1,
       verbose=False, warm_start=False)




/home/pranav/Project/csv_files/electricity_normalised.csv
GaussianNB(priors=None)




/home/pranav/Project/csv_files/electricity_normalised.csv
KNeighborsClassifier(algorithm='brute', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=1, n_neighbors=5, p=2,
           weights='uniform')




/home/pranav/Project/csv_files/electricity_normalised.csv
DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=None,
            max_features=None, max_leaf_nodes=None,
            min_impurity_split=0.25, min_samples_leaf=1,
            min_samples_split=2, min_weight_fraction_leaf=0.0,
            presort=False, random_state=None, splitter='best')




/home/pranav/Project/csv_files/electricity_normalised.csv
RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_split=1e-07, min_samples_leaf=1,
            min_samples_split=2, min_weight_fraction_leaf=0.0,
            n_estimators=10, n_jobs=1, oob_score=False, random_state=None,
            verbose=0, warm_start=False)




/home/pranav/Project/csv_files/pc4.csv
LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False)




/home/pranav/Project/csv_files/pc4.csv
MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(100,), learning_rate='constant',
       learning_rate_init=0.001, max_iter=200, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=None,
       shuffle=True, solver='adam', tol=0.0001, validation_fraction=0.1,
       verbose=False, warm_start=False)




/home/pranav/Project/csv_files/pc4.csv
GaussianNB(priors=None)
/home/pranav/Project/csv_files/pc4.csv
KNeighborsClassifier(algorithm='brute', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=1, n_neighbors=5, p=2,
           weights='uniform')




/home/pranav/Project/csv_files/pc4.csv
DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=None,
            max_features=None, max_leaf_nodes=None,
            min_impurity_split=0.25, min_samples_leaf=1,
            min_samples_split=2, min_weight_fraction_leaf=0.0,
            presort=False, random_state=None, splitter='best')
/home/pranav/Project/csv_files/pc4.csv
RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_split=1e-07, min_samples_leaf=1,
            min_samples_split=2, min_weight_fraction_leaf=0.0,
            n_estimators=10, n_jobs=1, oob_score=False, random_state=None,
            verbose=0, warm_start=False)




/home/pranav/Project/csv_files/credit.csv
LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False)




/home/pranav/Project/csv_files/credit.csv
MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(100,), learning_rate='constant',
       learning_rate_init=0.001, max_iter=200, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=None,
       shuffle=True, solver='adam', tol=0.0001, validation_fraction=0.1,
       verbose=False, warm_start=False)




/home/pranav/Project/csv_files/credit.csv
GaussianNB(priors=None)
/home/pranav/Project/csv_files/credit.csv
KNeighborsClassifier(algorithm='brute', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=1, n_neighbors=5, p=2,
           weights='uniform')




/home/pranav/Project/csv_files/credit.csv
DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=None,
            max_features=None, max_leaf_nodes=None,
            min_impurity_split=0.25, min_samples_leaf=1,
            min_samples_split=2, min_weight_fraction_leaf=0.0,
            presort=False, random_state=None, splitter='best')
/home/pranav/Project/csv_files/credit.csv
RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_split=1e-07, min_samples_leaf=1,
            min_samples_split=2, min_weight_fraction_leaf=0.0,
            n_estimators=10, n_jobs=1, oob_score=False, random_state=None,
            verbose=0, warm_start=False)




/home/pranav/Project/csv_files/MagicTelescope.csv
LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False)




/home/pranav/Project/csv_files/MagicTelescope.csv
MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(100,), learning_rate='constant',
       learning_rate_init=0.001, max_iter=200, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=None,
       shuffle=True, solver='adam', tol=0.0001, validation_fraction=0.1,
       verbose=False, warm_start=False)




/home/pranav/Project/csv_files/MagicTelescope.csv
GaussianNB(priors=None)




/home/pranav/Project/csv_files/MagicTelescope.csv
KNeighborsClassifier(algorithm='brute', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=1, n_neighbors=5, p=2,
           weights='uniform')




/home/pranav/Project/csv_files/MagicTelescope.csv
DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=None,
            max_features=None, max_leaf_nodes=None,
            min_impurity_split=0.25, min_samples_leaf=1,
            min_samples_split=2, min_weight_fraction_leaf=0.0,
            presort=False, random_state=None, splitter='best')




/home/pranav/Project/csv_files/MagicTelescope.csv
RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_split=1e-07, min_samples_leaf=1,
            min_samples_split=2, min_weight_fraction_leaf=0.0,
            n_estimators=10, n_jobs=1, oob_score=False, random_state=None,
            verbose=0, warm_start=False)




/home/pranav/Project/csv_files/irish.csv
LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False)




/home/pranav/Project/csv_files/irish.csv
MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(100,), learning_rate='constant',
       learning_rate_init=0.001, max_iter=200, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=None,
       shuffle=True, solver='adam', tol=0.0001, validation_fraction=0.1,
       verbose=False, warm_start=False)




/home/pranav/Project/csv_files/irish.csv
GaussianNB(priors=None)
/home/pranav/Project/csv_files/irish.csv
KNeighborsClassifier(algorithm='brute', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=1, n_neighbors=5, p=2,
           weights='uniform')
/home/pranav/Project/csv_files/irish.csv
DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=None,
            max_features=None, max_leaf_nodes=None,
            min_impurity_split=0.25, min_samples_leaf=1,
            min_samples_split=2, min_weight_fraction_leaf=0.0,
            presort=False, random_state=None, splitter='best')




/home/pranav/Project/csv_files/irish.csv
RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_split=1e-07, min_samples_leaf=1,
            min_samples_split=2, min_weight_fraction_leaf=0.0,
            n_estimators=10, n_jobs=1, oob_score=False, random_state=None,
            verbose=0, warm_start=False)




/home/pranav/Project/csv_files/pc1.csv
LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False)
/home/pranav/Project/csv_files/pc1.csv
MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(100,), learning_rate='constant',
       learning_rate_init=0.001, max_iter=200, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=None,
       shuffle=True, solver='adam', tol=0.0001, validation_fraction=0.1,
       verbose=False, warm_start=False)




/home/pranav/Project/csv_files/pc1.csv
GaussianNB(priors=None)
/home/pranav/Project/csv_files/pc1.csv
KNeighborsClassifier(algorithm='brute', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=1, n_neighbors=5, p=2,
           weights='uniform')




/home/pranav/Project/csv_files/pc1.csv
DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=None,
            max_features=None, max_leaf_nodes=None,
            min_impurity_split=0.25, min_samples_leaf=1,
            min_samples_split=2, min_weight_fraction_leaf=0.0,
            presort=False, random_state=None, splitter='best')
/home/pranav/Project/csv_files/pc1.csv
RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_split=1e-07, min_samples_leaf=1,
            min_samples_split=2, min_weight_fraction_leaf=0.0,
            n_estimators=10, n_jobs=1, oob_score=False, random_state=None,
            verbose=0, warm_start=False)




/home/pranav/Project/csv_files/tic-tac-toe.csv
LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False)
/home/pranav/Project/csv_files/tic-tac-toe.csv
MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(100,), learning_rate='constant',
       learning_rate_init=0.001, max_iter=200, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=None,
       shuffle=True, solver='adam', tol=0.0001, validation_fraction=0.1,
       verbose=False, warm_start=False)




/home/pranav/Project/csv_files/tic-tac-toe.csv
GaussianNB(priors=None)
/home/pranav/Project/csv_files/tic-tac-toe.csv
KNeighborsClassifier(algorithm='brute', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=1, n_neighbors=5, p=2,
           weights='uniform')




/home/pranav/Project/csv_files/tic-tac-toe.csv
DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=None,
            max_features=None, max_leaf_nodes=None,
            min_impurity_split=0.25, min_samples_leaf=1,
            min_samples_split=2, min_weight_fraction_leaf=0.0,
            presort=False, random_state=None, splitter='best')
/home/pranav/Project/csv_files/tic-tac-toe.csv
RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_split=1e-07, min_samples_leaf=1,
            min_samples_split=2, min_weight_fraction_leaf=0.0,
            n_estimators=10, n_jobs=1, oob_score=False, random_state=None,
            verbose=0, warm_start=False)




/home/pranav/Project/csv_files/ionosphere.csv
LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False)
/home/pranav/Project/csv_files/ionosphere.csv
MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(100,), learning_rate='constant',
       learning_rate_init=0.001, max_iter=200, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=None,
       shuffle=True, solver='adam', tol=0.0001, validation_fraction=0.1,
       verbose=False, warm_start=False)




/home/pranav/Project/csv_files/ionosphere.csv
GaussianNB(priors=None)
/home/pranav/Project/csv_files/ionosphere.csv
KNeighborsClassifier(algorithm='brute', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=1, n_neighbors=5, p=2,
           weights='uniform')
/home/pranav/Project/csv_files/ionosphere.csv
DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=None,
            max_features=None, max_leaf_nodes=None,
            min_impurity_split=0.25, min_samples_leaf=1,
            min_samples_split=2, min_weight_fraction_leaf=0.0,
            presort=False, random_state=None, splitter='best')




/home/pranav/Project/csv_files/ionosphere.csv
RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_split=1e-07, min_samples_leaf=1,
            min_samples_split=2, min_weight_fraction_leaf=0.0,
            n_estimators=10, n_jobs=1, oob_score=False, random_state=None,
            verbose=0, warm_start=False)




/home/pranav/Project/csv_files/diabetes.csv
LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False)




/home/pranav/Project/csv_files/diabetes.csv
MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(100,), learning_rate='constant',
       learning_rate_init=0.001, max_iter=200, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=None,
       shuffle=True, solver='adam', tol=0.0001, validation_fraction=0.1,
       verbose=False, warm_start=False)




/home/pranav/Project/csv_files/diabetes.csv
GaussianNB(priors=None)
/home/pranav/Project/csv_files/diabetes.csv
KNeighborsClassifier(algorithm='brute', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=1, n_neighbors=5, p=2,
           weights='uniform')
/home/pranav/Project/csv_files/diabetes.csv
DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=None,
            max_features=None, max_leaf_nodes=None,
            min_impurity_split=0.25, min_samples_leaf=1,
            min_samples_split=2, min_weight_fraction_leaf=0.0,
            presort=False, random_state=None, splitter='best')




/home/pranav/Project/csv_files/diabetes.csv
RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_split=1e-07, min_samples_leaf=1,
            min_samples_split=2, min_weight_fraction_leaf=0.0,
            n_estimators=10, n_jobs=1, oob_score=False, random_state=None,
            verbose=0, warm_start=False)




In [22]:
mean_accs = np.mean(accuracies, axis=2)
mean_accs

array([[ 0.75271506,  0.7806769 ,  0.72974107,  0.80914561,  0.86952689,
         0.89629714],
       [ 0.91014171,  0.76863014,  0.8717572 ,  0.85940954,  0.87792159,
         0.90670761],
       [ 0.721     ,  0.596     ,  0.704     ,  0.656     ,  0.736     ,
         0.73      ],
       [ 0.9732385 ,  0.82298107,  0.89437381,  0.99957931,  0.99989488,
         0.99989488],
       [ 0.75165236,  0.64248739,  0.77203962,  0.67565586,  0.98582683,
         0.94988906],
       [ 0.92968629,  0.86745803,  0.8944931 ,  0.93148824,  0.93058734,
         0.93780288],
       [ 0.69205993,  0.84651734,  0.71809177,  0.83711012,  0.87377634,
         0.91959136],
       [ 0.88326331,  0.92852007,  0.88889823,  0.84862745,  0.89690476,
         0.9257423 ],
       [ 0.77339371,  0.67968216,  0.75647642,  0.71226931,  0.73557758,
         0.74480519]])

In [23]:
mean_f1s = np.mean(f1_scores, axis=2)
mean_f1s

array([[ 0.75271506,  0.77941886,  0.72974107,  0.80914561,  0.86930618,
         0.89609849],
       [ 0.91014171,  0.71554558,  0.8717572 ,  0.85940954,  0.87792159,
         0.90397733],
       [ 0.721     ,  0.651     ,  0.704     ,  0.656     ,  0.735     ,
         0.734     ],
       [ 0.9732385 ,  0.80246255,  0.89437381,  0.99957931,  0.99989488,
         0.99989488],
       [ 0.75165236,  0.62777991,  0.77203962,  0.67565586,  0.98582683,
         0.94196419],
       [ 0.92968629,  0.68698286,  0.8944931 ,  0.93148824,  0.93058734,
         0.93959664],
       [ 0.69205993,  0.83502634,  0.71809177,  0.83711012,  0.8769123 ,
         0.91962358],
       [ 0.88326331,  0.92002334,  0.88889823,  0.84862745,  0.89968254,
         0.94028011],
       [ 0.77339371,  0.65630554,  0.75647642,  0.71226931,  0.72776828,
         0.73173274]])

In [24]:
mean_time = np.mean(build_time, axis=2)
mean_time

array([[  1.69797900e-01,   5.36828940e+00,   1.01191000e-02,
          2.77650000e-03,   1.09524600e-01,   4.32821900e-01],
       [  6.37086000e-02,   5.03353000e-02,   1.12490000e-03,
          5.09300000e-04,   4.10900000e-04,   2.42645000e-02],
       [  1.00921000e-02,   4.46393000e-02,   9.19600000e-04,
          2.82900000e-04,   2.18010000e-03,   1.88252000e-02],
       [  1.99460500e-01,   3.90816100e-01,   4.66970000e-03,
          1.20660000e-03,   1.95462000e-02,   1.48100800e-01],
       [  4.33880000e-03,   3.84219000e-02,   6.78200000e-04,
          2.26900000e-04,   3.75800000e-04,   1.40377000e-02],
       [  3.06644000e-02,   3.12113000e-02,   8.52500000e-04,
          2.84500000e-04,   3.05600000e-04,   2.04541000e-02],
       [  1.41090000e-03,   6.12267000e-01,   7.47700000e-04,
          2.78900000e-04,   8.62700000e-04,   1.68143000e-02],
       [  1.75250000e-03,   2.87714500e-01,   6.51400000e-04,
          2.28200000e-04,   1.60240000e-03,   1.74571000e-02],


In [25]:
#accuracies
columns = [str(m).split('(')[0] for m in models]
columns

['LogisticRegression',
 'MLPClassifier',
 'GaussianNB',
 'KNeighborsClassifier',
 'DecisionTreeClassifier',
 'RandomForestClassifier']

In [26]:
df = DataFrame(data=mean_accs, columns=columns)
df.to_csv('mean_accs')
df

Unnamed: 0,LogisticRegression,MLPClassifier,GaussianNB,KNeighborsClassifier,DecisionTreeClassifier,RandomForestClassifier
0,0.752715,0.780677,0.729741,0.809146,0.869527,0.896297
1,0.910142,0.76863,0.871757,0.85941,0.877922,0.906708
2,0.721,0.596,0.704,0.656,0.736,0.73
3,0.973238,0.822981,0.894374,0.999579,0.999895,0.999895
4,0.751652,0.642487,0.77204,0.675656,0.985827,0.949889
5,0.929686,0.867458,0.894493,0.931488,0.930587,0.937803
6,0.69206,0.846517,0.718092,0.83711,0.873776,0.919591
7,0.883263,0.92852,0.888898,0.848627,0.896905,0.925742
8,0.773394,0.679682,0.756476,0.712269,0.735578,0.744805


In [27]:
df = DataFrame(data=mean_f1s, columns=columns)
df.to_csv('mean_f1s')
df

Unnamed: 0,LogisticRegression,MLPClassifier,GaussianNB,KNeighborsClassifier,DecisionTreeClassifier,RandomForestClassifier
0,0.752715,0.779419,0.729741,0.809146,0.869306,0.896098
1,0.910142,0.715546,0.871757,0.85941,0.877922,0.903977
2,0.721,0.651,0.704,0.656,0.735,0.734
3,0.973238,0.802463,0.894374,0.999579,0.999895,0.999895
4,0.751652,0.62778,0.77204,0.675656,0.985827,0.941964
5,0.929686,0.686983,0.894493,0.931488,0.930587,0.939597
6,0.69206,0.835026,0.718092,0.83711,0.876912,0.919624
7,0.883263,0.920023,0.888898,0.848627,0.899683,0.94028
8,0.773394,0.656306,0.756476,0.712269,0.727768,0.731733


In [28]:
df = DataFrame(data=mean_time, columns=columns)
df.to_csv('mean_time')
df

Unnamed: 0,LogisticRegression,MLPClassifier,GaussianNB,KNeighborsClassifier,DecisionTreeClassifier,RandomForestClassifier
0,0.169798,5.368289,0.010119,0.002777,0.109525,0.432822
1,0.063709,0.050335,0.001125,0.000509,0.000411,0.024265
2,0.010092,0.044639,0.00092,0.000283,0.00218,0.018825
3,0.199461,0.390816,0.00467,0.001207,0.019546,0.148101
4,0.004339,0.038422,0.000678,0.000227,0.000376,0.014038
5,0.030664,0.031211,0.000852,0.000285,0.000306,0.020454
6,0.001411,0.612267,0.000748,0.000279,0.000863,0.016814
7,0.001752,0.287714,0.000651,0.000228,0.001602,0.017457
8,0.005555,0.089135,0.000687,0.000249,0.001526,0.018156
