## Gas Sensor Array Drift - Modelagem

In [47]:
import pandas as pd
import numpy as np

from sklearn.datasets import load_svmlight_file
from sklearn.model_selection import train_test_split, StratifiedKFold, cross_val_score

from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, BaggingClassifier, VotingClassifier
from sklearn.neural_network import MLPClassifier

from sklearn.metrics import classification_report

import matplotlib.pyplot as plt

In [9]:
seed = 10

### Leitura dos Dados

In [5]:
dataset = pd.read_csv('../Dataset/dataset.csv')
dataset_pca = pd.read_csv('../Dataset/dataset_pca.csv')

In [7]:
print(dataset.shape)
dataset.head()

(13910, 129)


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,119,120,121,122,123,124,125,126,127,target
0,-0.905872,-0.997343,-0.971612,-0.975257,-0.984879,0.958387,0.79549,0.865034,-0.878662,-0.998106,...,0.941052,-0.784112,-0.854664,-0.96745,-0.995176,-0.986399,0.964157,0.763942,0.947373,1.0
1,-0.874435,-0.996351,-0.935227,-0.942549,-0.984585,0.92819,0.775693,0.862033,-0.845751,-0.997632,...,0.945275,-0.76241,-0.840881,-0.93932,-0.989122,-0.981219,0.941361,0.751117,0.951699,1.0
2,-0.828754,-0.994975,-0.901866,-0.90726,-0.976629,0.883323,0.746385,0.856738,-0.792863,-0.996824,...,0.941742,-0.729302,-0.791153,-0.912999,-0.983409,-0.97006,0.911878,0.730715,0.948836,1.0
3,-0.826652,-0.99498,-0.854993,-0.85644,-0.919666,0.880565,0.744694,0.857085,-0.789712,-0.996811,...,0.940969,-0.724868,-0.786506,-0.877101,-0.976055,-0.959714,0.905411,0.726073,0.948164,1.0
4,-0.782066,-0.99387,-0.86288,-0.861308,-0.964463,0.831314,0.713546,0.852045,-0.737248,-0.996058,...,0.936418,-0.686562,-0.711911,-0.876364,-0.974309,-0.956094,0.87244,0.702171,0.940519,1.0


## Separação dos dados

In [21]:
X = dataset.drop('target', axis=1)
y = dataset.loc[:,'target']

In [22]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=seed)

## Classificação

In [86]:
kfold = StratifiedKFold(n_splits=10, random_state=seed)

### k-NN

### Sem PCA 

In [24]:
print("Início dos classificadores de individuais (KNN, DT, RANDOMFOREST, MLP)\n")
# Criação dos classificadores
# KNN
clf_knn = KNeighborsClassifier(n_neighbors=5)
clf_knn = clf_knn.fit(X_train, y_train)
pred_knn = clf_knn.predict(X_test)

scores = cross_val_score(clf_knn, X, y, cv=kfold)

print('Score:', clf_knn.score(X_test, y_test))
# Imprimindo relatório de classificação do modelo inicial
print("Relatorio de Classificação do modelo inicial (kNN)")
print(classification_report(y_test, pred_knn), "\n")

Início dos classificadores de individuais (KNN, DT, RANDOMFOREST, MLP)

Score: 0.99252443933295
Relatorio de Classificação do modelo inicial (kNN)
              precision    recall  f1-score   support

         1.0       0.99      0.99      0.99       653
         2.0       0.99      0.99      0.99       733
         3.0       1.00      0.99      0.99       401
         4.0       0.99      0.99      0.99       504
         5.0       0.99      1.00      0.99       741
         6.0       1.00      0.99      1.00       446

   micro avg       0.99      0.99      0.99      3478
   macro avg       0.99      0.99      0.99      3478
weighted avg       0.99      0.99      0.99      3478
 



In [97]:
results = []
for k in range(1, 11):
    for metric in ['euclidean', 'manhattan', 'chebyshev']:
        params = {'n_neighbors': k, 'metric': metric}
        
        clf_knn = KNeighborsClassifier(**params)
        
        scores = cross_val_score(clf_knn, X_train, y_train, cv=kfold)
        scores_mean = scores.mean()
        scores_std = scores.std()
        
        results.append((params, scores_mean, scores_std))
        
        print('Acuracia - KNN: k = %2d, metric = %s, scores: %0.3f' % (k, metric, scores_mean))

Acuracia - KNN: k =  1, metric = euclidean, scores: 0.995
Acuracia - KNN: k =  1, metric = manhattan, scores: 0.996
Acuracia - KNN: k =  1, metric = chebyshev, scores: 0.992
Acuracia - KNN: k =  2, metric = euclidean, scores: 0.994
Acuracia - KNN: k =  2, metric = manhattan, scores: 0.995
Acuracia - KNN: k =  2, metric = chebyshev, scores: 0.989
Acuracia - KNN: k =  3, metric = euclidean, scores: 0.993
Acuracia - KNN: k =  3, metric = manhattan, scores: 0.995
Acuracia - KNN: k =  3, metric = chebyshev, scores: 0.988
Acuracia - KNN: k =  4, metric = euclidean, scores: 0.992
Acuracia - KNN: k =  4, metric = manhattan, scores: 0.994
Acuracia - KNN: k =  4, metric = chebyshev, scores: 0.987
Acuracia - KNN: k =  5, metric = euclidean, scores: 0.991
Acuracia - KNN: k =  5, metric = manhattan, scores: 0.993
Acuracia - KNN: k =  5, metric = chebyshev, scores: 0.986
Acuracia - KNN: k =  6, metric = euclidean, scores: 0.990
Acuracia - KNN: k =  6, metric = manhattan, scores: 0.992
Acuracia - KNN

In [98]:
results = np.array(results)
idx_best_result = results[:, 1].argmax()
results[idx_best_result]

array([{'n_neighbors': 1, 'metric': 'manhattan'}, 0.9960687459155222,
       0.001389698920687762], dtype=object)

### Com PCA

In [99]:
X = dataset_pca.drop('target', axis=1)
y = dataset_pca.loc[:,'target']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=seed)

In [100]:
results = []
for k in range(1, 11):
    for metric in ['euclidean', 'manhattan', 'chebyshev']:
        params = {'n_neighbors': k, 'metric': metric}
        
        clf_knn = KNeighborsClassifier(**params)
        
        scores = cross_val_score(clf_knn, X_train, y_train, cv=kfold)
        scores_mean = scores.mean()
        scores_std = scores.std()
        
        results.append((params, scores_mean, scores_std))
        
        print('Acuracia - KNN: k = %2d, metric = %s, scores: %0.3f' % (k, metric, scores_mean))

Acuracia - KNN: k =  1, metric = euclidean, scores: 0.995
Acuracia - KNN: k =  1, metric = manhattan, scores: 0.996
Acuracia - KNN: k =  1, metric = chebyshev, scores: 0.993
Acuracia - KNN: k =  2, metric = euclidean, scores: 0.994
Acuracia - KNN: k =  2, metric = manhattan, scores: 0.995
Acuracia - KNN: k =  2, metric = chebyshev, scores: 0.992
Acuracia - KNN: k =  3, metric = euclidean, scores: 0.993
Acuracia - KNN: k =  3, metric = manhattan, scores: 0.995
Acuracia - KNN: k =  3, metric = chebyshev, scores: 0.991
Acuracia - KNN: k =  4, metric = euclidean, scores: 0.992
Acuracia - KNN: k =  4, metric = manhattan, scores: 0.994
Acuracia - KNN: k =  4, metric = chebyshev, scores: 0.989
Acuracia - KNN: k =  5, metric = euclidean, scores: 0.991
Acuracia - KNN: k =  5, metric = manhattan, scores: 0.993
Acuracia - KNN: k =  5, metric = chebyshev, scores: 0.987
Acuracia - KNN: k =  6, metric = euclidean, scores: 0.990
Acuracia - KNN: k =  6, metric = manhattan, scores: 0.993
Acuracia - KNN

In [101]:
results = np.array(results)
idx_best_result = results[:, 1].argmax()
results[idx_best_result]

array([{'n_neighbors': 1, 'metric': 'manhattan'}, 0.9958774482577667,
       0.002190101118254926], dtype=object)