In [1]:
import numpy as np
import pandas as pd
from lathes_model_multiclass import LathesModel, Lathes_train_test_split

import matplotlib.pyplot as plt

In [2]:
%%time

input_id = 0
INPUT_PATH = 'Input/Input_%i.csv' %input_id

data = pd.read_csv(INPUT_PATH, header=None).values

data.shape

CPU times: user 2.71 s, sys: 168 ms, total: 2.87 s
Wall time: 2.87 s


(2813440, 9)

In [3]:
X = data[:,:-1]
y = data[:,-1]

X_train, X_test, yy_train, yy_test = Lathes_train_test_split(X, y, test_size=0.3, random_state=12)

print('X_train shape:', X_train.shape)
print('y_train shape:', yy_train.shape)
print('X_test shape:', X_test.shape)
print('y_test shape:', yy_test.shape)

X_train shape: (1967616, 8)
y_train shape: (1967616,)
X_test shape: (845824, 8)
y_test shape: (845824,)


In [4]:
params = {'N_PCs':4,
          'granularity':4, 
          'n_jobs':4,
          'selection_type':'intersection'}

model = LathesModel(**params)

In [5]:
X_norm = model._normalization(X_train,yy_train)

In [6]:
# X_extracted = model._tsfresh_extraction(X_norm)
X_extracted = pd.read_csv('X_extracted.csv', index_col=[0])

In [7]:
model._tsfresh_selection_3class(X_extracted)


In [8]:
model._pca()

TypeError: '>=' not supported between instances of 'dict' and 'int'

In [None]:
model.plot_variation_held(show=True)

In [None]:
%%time 

model._soda()

In [None]:
model._grouping_algorithm()

In [None]:
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis

In [None]:
classifiers = [KNeighborsClassifier(3),
        SVC(gamma='scale'),
        SVC(gamma=2, C=1),
        GaussianProcessClassifier(1.0 * RBF(1.0)),
        DecisionTreeClassifier(),
        RandomForestClassifier(n_estimators=100),
        MLPClassifier(alpha=1,max_iter=500),
        AdaBoostClassifier(),
        GaussianNB(),
        QuadraticDiscriminantAnalysis()]

names = ["Nearest Neighbors", 
         "Linear SVM", 
         "Radial-basis function kernel SVM", 
         "Radial-basis function kernel Gaussian Process",
         "Decision Tree", 
         "Random Forest", 
         "MLP Neural Network", 
         "AdaBoost",
         "Gaussian Naive Bayes", 
         "Quadratic Discriminant Analysis"]

In [None]:
for clf_name, clf in zip(names, classifiers):
    clf.fit(model.X_projected_, model.target_)

In [None]:
from sklearn.metrics import balanced_accuracy_score, plot_confusion_matrix

from datetime import datetime

In [None]:
%%time
model.change_hyperparams({'n_jobs': 0})

tac = datetime.now()

X_test_norm = model._predict_normalization(X_test)

model._predict_tsfresh_extraction(X_test_norm)

model._predict_pca()

tic = datetime.now()

In [None]:
y_test = yy_test[::3584]

Classifiers_result = {}

for clf_name, clf in zip(names, classifiers):
    clf_tac = datetime.now()
    
    Classifiers_result[clf_name] = {}
    
    y_pred = clf.predict(model.X_test_projected_)
    acc = balanced_accuracy_score(y_test,y_pred)*100
    Classifiers_result[clf_name]['Accuracy'] = acc
    
    print(clf_name, '-', acc)
    plot_confusion_matrix(clf, model.X_test_projected_, y_test, 
                          cmap='GnBu', normalize='true')
    plt.show()
    
    f = plt.figure(figsize=(10,7))
    ax = f.subplots(1,1)
    plot_confusion_matrix(clf, model.X_test_projected_, y_test, 
                            cmap='GnBu', normalize='true', ax=ax)
    plt.title('{} - {:.2f}%'.format(clf_name, acc))
    plt.close()
    
    clf_tic = datetime.now()
    Classifiers_result[clf_name]['Time'] = ((tic - tac) + (clf_tic - clf_tac))

In [None]:
model.plot_soda(show=True)

In [None]:
model.plot_GA(show=True)

In [None]:
np.unique(model.classifiers_label_)

In [None]:
np.unique(model.target_)

In [None]:
results = pd.DataFrame(Classifiers_result).T

results

In [None]:
def plot(PATH=False, figsize=[14,10], s=50, label_fontsize=20,
                    label_pad=18, ticks_fontsize=16, cmap='viridis', show=False):

                x = model.X_projected_[:,0]
                y = model.X_projected_[:,1]
                z = model.X_projected_[:,2]
                                    
                fig = plt.figure(figsize=figsize)
                ax = fig.add_subplot(111, projection='3d')

                colors = model.target_
                ax.scatter(x, y, z, c=colors, s=s, edgecolor='k', cmap=cmap)
                    
                plt.ylabel('PC2',fontsize=label_fontsize,labelpad=label_pad)
                plt.xlabel('PC1',fontsize=label_fontsize, labelpad=label_pad)
                ax.set_zlabel('PC3', fontsize=label_fontsize, labelpad=int(2/3*label_pad))
                plt.tick_params(axis='x', labelsize=ticks_fontsize)
                plt.tick_params(axis='y', labelsize=ticks_fontsize)
                plt.tick_params(axis='z', labelsize=ticks_fontsize)
                ax.grid()
                if show:
                    plt.show()
                if PATH:
                    fig.savefig(PATH, bbox_inches='tight')


In [None]:
plot()

In [None]:
plt.hist(model.target_, bins=3)

In [None]:
386/549

In [None]:
56/549

In [None]:
107/549

In [None]:
model.selection_type_