In [None]:
%matplotlib inline

In [None]:
import warnings
warnings.filterwarnings('ignore')

In [None]:
from sklearn.datasets import load_digits
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
import seaborn as sn
import numpy as np
import pandas as pd
import os
import sklearn
import glob
from sklearn.metrics import plot_confusion_matrix
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
import pickle
from pycm import ConfusionMatrix

In [None]:
DATASET     = 'GAUSSIAN'
TEST_SPLIT  = 16000
N_TRIALS    = 10
WEIGHT_DIR  = './weights/{}'.format(DATASET)

ews_signals = ['Auto-Regressive Coefficient','Standard Deviation','Skewness','Kurtosis','Coefficient of Variation','Return Rate','Density Ratio','Autocorrelation at First Lag']
save_data        = {}
accuracies       = {}
confusion_matrix = {}
perf_metrics     = {}

In [None]:
def summarize_perf(y_true,y_pred):
    perf = ConfusionMatrix(actual_vector=y_true,predict_vector=y_pred)
    print(perf)
    return perf

In [None]:
def get_model(name='LogisticRegression'):
    state = np.random.randint(0,1000)
    if(name=="LogisticRegression"):
        model = LogisticRegression(C=0.5,solver='sag',verbose=1,n_jobs=48,multi_class='auto',random_state=state)
    elif(name=="SVM"):
        model = SVC(C=0.5,kernel='linear',random_state=state)
    elif(name=="RandomForest"):
        model = RandomForestClassifier(n_jobs=48,random_state=state)
    elif(name=="MLP"):
        model = MLPClassifier(random_state=state)
    return model

In [None]:
def unison_shuffle(a, b):
    assert len(a) == len(b)
    p = np.random.permutation(len(a))
    return a[p], b[p]

In [None]:
models      = ["LogisticRegression","SVM","RandomForest","MLP"]
DATASETS    = ['GAUSSIAN','NOISE']
TEST_SPLIT  = 16000
N_TRIALS    = 10

for DATASET in DATASETS:
    WEIGHT_DIR  = './weights/{}'.format(DATASET)
    X_M         = np.load('./data/EWS_MATRIX_{}.npy'.format(DATASET))
    Y           = np.load('./data/EWS_LABEL_{}.npy'.format(DATASET))
    ews_signals = ['Auto-Regressive Coefficient','Standard Deviation','Skewness','Kurtosis','Coefficient of Variation','Return Rate','Density Ratio','Autocorrelation at First Lag']
    save_data = {}
    accuracies       = {}
    confusion_matrix = {}
    perf_metrics     = {}
    for model_name in models:
        print('===='*20,"\n\n"," \t \t \t MODEL : ",model_name,"\n\n",'===='*20)
        for ews_index in range(X_M.shape[2]):
            print('===='*20,"\n"," \t \t \t EWS SIGNAL : ",ews_signals[ews_index],"\n",'===='*20)
            ews_name               = ews_signals[ews_index]
            accuracies[ews_name]   = []
            perf_metrics[ews_name] = []
            X_INDIVIDUAL = X_M[:,:,ews_index]
            for trial_no in range(N_TRIALS):
                print('____'*20,"\n"," \t \t \t TRIAL : ",trial_no+1,"\n",'____'*20)
                if('GAUSSIAN' in DATASET):
                    x_train, x_test, y_train, y_test = train_test_split(X_INDIVIDUAL,Y,test_size=0.2)
                else:
                    x_train = X_INDIVIDUAL[:TEST_SPLIT]
                    x_test  = X_INDIVIDUAL[TEST_SPLIT:]
                    y_train = Y[:TEST_SPLIT]
                    y_test  = Y[TEST_SPLIT:]   
                    x_train,y_train = unison_shuffle(x_train,y_train)
                    x_test,y_test   = unison_shuffle(x_test,y_test)
                x_train = sklearn.preprocessing.scale(x_train)
                x_test  = sklearn.preprocessing.scale(x_test)
                model = get_model(model_name)
                model.fit(x_train, y_train)
                perf_metric = summarize_perf(y_test, model.predict(x_test))
                score       = model.score(x_test, y_test)
                print("---"*20,"\n \t \t TEST ACCURACY ",": \t", score,"\n","---"*20)
                accuracies[ews_name].append(score)
                perf_metrics[ews_name].append(perf_metric)
                fig,ax = plt.subplots(1,1,figsize=(5,5))
                disp = plot_confusion_matrix(model, x_test, y_test,cmap=plt.cm.Blues,normalize='true',display_labels=[-1,0,1],ax=ax)
                plt.show()
        plt.figure(figsize=(8,8))
        sns.barplot(x =[x for x in accuracies.keys()], y =[np.mean(y) for y in accuracies.values()])
        plt.ylabel('Test Accuracy')
        plt.show()
        plt.close()
