In [1]:
%load_ext autoreload
%autoreload 5

In [1]:
import json
import numpy as np
import pandas as pd
import random

from sklearn.metrics import (
    classification_report,
    confusion_matrix,
    accuracy_score,
    f1_score,
    precision_score,
    recall_score,
    plot_confusion_matrix,
)

import matplotlib
import matplotlib.pyplot as plt
import numpy as np

from weight_lifting import WeightLifting
from sfs_features import (
    get_lr_features,
    get_svm_features,
    get_mpl_features
)

from sklearn.ensemble import IsolationForest

In [2]:
np.random.seed(42)
random.seed(42)

### BASE PADRAO

In [3]:
engine = WeightLifting()
df = engine.load_df()
df = engine.transform(df)
X_train, X_test, y_train, y_test = engine.create_train_test(df)
base_padrao_predict = engine.fit_and_predict(X_train, X_test, y_train, y_test)
#engine.plot_results(base_padrao_predict, X_test, y_test)

### FEATURE SELECTION WITH SequentialFeatureSelector 

In [5]:
sfs_predict = []

# LOGISTIC REGRESSION
lr_X_train, lr_X_test, lr_y_train, lr_y_test = engine.create_train_test(df, features=get_mpl_features())
lr_predict = engine.fit_and_predict(lr_X_train, lr_X_test, lr_y_train, lr_y_test)
lr_predict = lr_predict[0:1][0] # Somente o LR interessa
#lr_predict.update({'X_TEST':lr_X_test})
sfs_predict.append(lr_predict)

# SUPORT VECTOR MACHINES
svm_X_train, svm_X_test, svm_y_train, svm_y_test = engine.create_train_test(df, features=get_svm_features())
svm_predict = engine.fit_and_predict(svm_X_train, svm_X_test, svm_y_train, svm_y_test)
svm_predict = svm_predict[1:2][0] # Somente o SVM interessa
#lr_predict.update({'X_TEST':svm_X_test})
sfs_predict.append(svm_predict)

# MULTILAYER PERCEPTRON
mlp_X_train, mlp_X_test, mlp_y_train, mlp_y_test = engine.create_train_test(df, features=get_mpl_features())
mpl_predict = engine.fit_and_predict(mlp_X_train, mlp_X_test, mlp_y_train, mlp_y_test)
mpl_predict = mpl_predict[2:3][0] # Somente o mpl interessa
#lr_predict.update({'X_TEST':mlp_X_test})
sfs_predict.append(mpl_predict)

#engine.plot_results([sfs_predict[0]], lr_X_test, lr_y_test)
#engine.plot_results([sfs_predict[1]], svm_X_test, svm_y_test)
#engine.plot_results([sfs_predict[2]], mlp_X_test, mlp_y_test)

### ISOLATION FOREST

In [6]:
iso = IsolationForest(contamination=0.05)
predict = iso.fit_predict(df.iloc[:, 0:-1])

mask = predict != -1

iso_X_train, iso_X_test, iso_y_train, iso_y_test = engine.create_train_test(df.iloc[mask])

isolation_predict = engine.fit_and_predict(iso_X_train, iso_X_test, iso_y_train, iso_y_test)
#engine.plot_results(isolation_predict, iso_X_test, iso_y_test)
#plot_confusion_matrix(isolation_predict[2]['model'], iso_X_test, iso_y_test, values_format = '.5g')

### ISOLATION FOREST + SFS

In [7]:
iso_sfs_predict = []

# LOGISTIC REGRESSION
iso_lr_X_train, iso_lr_X_test, iso_lr_y_train, iso_lr_y_test = engine.create_train_test(df.iloc[mask], features=get_mpl_features())
iso_lr_predict = engine.fit_and_predict(iso_lr_X_train, iso_lr_X_test, iso_lr_y_train, iso_lr_y_test)
iso_lr_predict = iso_lr_predict[0:1][0] # Somente o LR interessa
#lr_predict.update({'X_TEST':lr_X_test})
iso_sfs_predict.append(iso_lr_predict)

# SUPORT VECTOR MACHINES
iso_svm_X_train, iso_svm_X_test, iso_svm_y_train, iso_svm_y_test = engine.create_train_test(df.iloc[mask], features=get_svm_features())
iso_svm_predict = engine.fit_and_predict(iso_svm_X_train, iso_svm_X_test, iso_svm_y_train, iso_svm_y_test)
iso_svm_predict = iso_svm_predict[1:2][0] # Somente o SVM interessa
#lr_predict.update({'X_TEST':svm_X_test})
iso_sfs_predict.append(iso_svm_predict)

# MULTILAYER PERCEPTRON
iso_mlp_X_train, iso_mlp_X_test, iso_mlp_y_train, iso_mlp_y_test = engine.create_train_test(df.iloc[mask], features=get_mpl_features())
iso_mlp_predict = engine.fit_and_predict(iso_mlp_X_train, iso_mlp_X_test, iso_mlp_y_train, iso_mlp_y_test)
iso_mlp_predict = iso_mlp_predict[2:3][0] # Somente o mpl interessa
#lr_predict.update({'X_TEST':mlp_X_test})
iso_sfs_predict.append(iso_mlp_predict)

#plot_confusion_matrix(iso_sfs_predict[0]['model'], iso_lr_X_test, iso_lr_y_test, values_format = '.5g')
#engine.plot_results([iso_sfs_predict[0]], iso_lr_X_test, iso_lr_y_test)
#engine.plot_results([iso_sfs_predict[1]], iso_svm_X_test, iso_svm_y_test)
#engine.plot_results([iso_sfs_predict[2]], iso_mlp_X_test, iso_mlp_y_test)

In [8]:
# plot_confusion_matrix(iso_sfs_predict[2]['model'], iso_mlp_X_test, iso_mlp_y_test, values_format = '.5g')

In [9]:
# engine.plot_results([iso_sfs_predict[2]], iso_mlp_X_test, iso_mlp_y_test)

### COMPARACAO DE RESULTADOS

In [10]:
# Unificando os resultados em um unico DF
# PADRAO
resultados_inicial = pd.DataFrame(base_padrao_predict)
resultados_inicial['ESTADO'] = 'INICIAL'

# SFS
resultados_sfs = pd.DataFrame(sfs_predict)
resultados_sfs['ESTADO'] = 'SFS'

# ISO
resultados_iso = pd.DataFrame(isolation_predict)
resultados_iso['ESTADO'] = 'ISO'


# ISO + SFS
resultados_iso_sfs = pd.DataFrame(iso_sfs_predict)
resultados_iso_sfs['ESTADO'] = 'ISO_SFS'

resultados = pd.concat([resultados_inicial,resultados_sfs, resultados_iso, resultados_iso_sfs])
resultados['NAME_ESTADO'] = resultados['name'] + '_' + resultados['ESTADO']

In [12]:
resultados

Unnamed: 0,name,model,predict,accuracy,f1,precision,recall,ESTADO,NAME_ESTADO
0,LR,"LogisticRegression(C=0.1, multi_class='ovr', s...","[E, E, A, E, E, A, B, A, D, C, B, A, A, A, B, ...",0.9821,0.9673,0.9633,0.9719,INICIAL,LR_INICIAL
1,SVM,"SVC(C=10, gamma=1e-05, probability=True)","[E, E, A, E, E, A, E, E, E, C, B, A, A, A, B, ...",0.6988,0.621,0.855,0.558,INICIAL,SVM_INICIAL
2,MPL,"MLPClassifier(hidden_layer_sizes=(5, 2), solve...","[A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, ...",0.3231,0.0977,0.0646,0.2,INICIAL,MPL_INICIAL
0,LR,"LogisticRegression(C=0.1, multi_class='ovr', s...","[E, E, A, E, E, A, B, A, E, B, A, E, E, E, B, ...",0.7893,0.5585,0.6899,0.5472,SFS,LR_SFS
1,SVM,"SVC(C=10, gamma=1e-05, probability=True)","[E, E, A, E, E, A, E, E, E, C, B, A, A, A, B, ...",0.7356,0.6465,0.7638,0.6015,SFS,SVM_SFS
2,MPL,"MLPClassifier(hidden_layer_sizes=(5, 2), solve...","[E, E, E, E, E, A, E, A, E, E, E, E, E, E, E, ...",0.5577,0.272,0.2871,0.3165,SFS,MPL_SFS
0,LR,"LogisticRegression(C=0.1, multi_class='ovr', s...","[B, B, A, C, E, E, A, D, E, E, A, D, A, B, A, ...",0.9791,0.963,0.9693,0.9572,ISO,LR_ISO
1,SVM,"SVC(C=10, gamma=1e-05, probability=True)","[B, B, A, C, E, E, A, E, E, E, A, E, A, B, A, ...",0.7207,0.6365,0.8716,0.5757,ISO,SVM_ISO
2,MPL,"MLPClassifier(hidden_layer_sizes=(5, 2), solve...","[B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, ...",0.2186,0.0718,0.0437,0.2,ISO,MPL_ISO
0,LR,"LogisticRegression(C=0.1, multi_class='ovr', s...","[B, B, A, B, E, E, A, E, E, E, A, E, A, B, A, ...",0.7845,0.5626,0.7922,0.5525,ISO_SFS,LR_ISO_SFS
