    Atividade
    ● Selecione uma série temporal
    ● Gere as bases de treinamento e teste baseadas em janela de tempo
    ● O alvo deve ser discreto (defina 3 faixas de valores)
    ● Utilizando algum modelo de aprendizagem de máquinas, calcule a acurácia na base de teste

In [1]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')

from sklearn import preprocessing
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split, GridSearchCV
%matplotlib inline

In [2]:
def series_to_supervised(data, n_in=1, n_out=1, dropnan=True):
    n_vars = 1 if type(data) is list else data.shape[1]
    df = pd.DataFrame(data)
    cols, names = list(), list()
    # input sequence (t-n, ... t-1)
    for i in range(n_in, 0, -1):
        cols.append(df.shift(i))
        names += [('var%d(t-%d)' % (j+1, i)) for j in range(n_vars)]
    # forecast sequence (t, t+1, ... t+n)
    for i in range(0, n_out):
        cols.append(df.shift(-i))
        if i == 0:
            names += [('var%d(t)' % (j+1)) for j in range(n_vars)]
        else:
            names += [('var%d(t+%d)' % (j+1, i)) for j in range(n_vars)]
    # put it all together
    agg = pd.concat(cols, axis=1)
    agg.columns = names
    # drop rows with NaN values
    if dropnan:
        agg.dropna(inplace=True)
    return agg

In [3]:
def valor(valor):
    if valor <= 200:
        valor = 'Valor1'
        return valor
    elif valor > 200 and valor <= 400:
        valor = 'Valor2'
        return valor
    else:
        valor = 'Valor3' 
        return valor

In [4]:
dados = pd.read_csv('CSVs/AirPassengers.csv', sep=',', header=0, index_col='Month')

In [5]:
janela = series_to_supervised(data=dados, n_in=3)
y = janela['var1(t)'].apply(valor)
x = janela[['var1(t-3)', 'var1(t-2)', 'var1(t-1)']]

In [6]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state = 0)
clf = SVC()

In [7]:
clf.fit(x_train, y_train)
pred = clf.predict(x_test)

In [8]:
print("Acuracia",accuracy_score(y_test,pred))
print("===============================")
print("Matriz Confusão ")
print(confusion_matrix(y_test, pred))
print("===============================")
print("Relatório de Classificação")
print(classification_report(y_test, pred))

Acuracia 0.4186046511627907
Matriz Confusão 
[[ 0 15  0]
 [ 0 18  0]
 [ 0 10  0]]
Relatório de Classificação
             precision    recall  f1-score   support

     Valor1       0.00      0.00      0.00        15
     Valor2       0.42      1.00      0.59        18
     Valor3       0.00      0.00      0.00        10

avg / total       0.18      0.42      0.25        43



In [9]:
parametros_grid = {'C':[0.1, 1, 10, 100, 1000],'gamma':[0.0001, 0.001, 0.01, 0.1, 1], 'kernel':['rbf','linear']}

In [10]:
gs = GridSearchCV(clf, parametros_grid, refit=True, verbose=2, scoring='accuracy')

In [11]:
gs.fit(x_train,y_train)

Fitting 3 folds for each of 50 candidates, totalling 150 fits
[CV] C=0.1, gamma=0.0001, kernel=rbf .................................
[CV] .................. C=0.1, gamma=0.0001, kernel=rbf, total=   0.0s
[CV] C=0.1, gamma=0.0001, kernel=rbf .................................
[CV] .................. C=0.1, gamma=0.0001, kernel=rbf, total=   0.0s
[CV] C=0.1, gamma=0.0001, kernel=rbf .................................
[CV] .................. C=0.1, gamma=0.0001, kernel=rbf, total=   0.0s
[CV] C=0.1, gamma=0.0001, kernel=linear ..............................
[CV] ............... C=0.1, gamma=0.0001, kernel=linear, total=   0.0s
[CV] C=0.1, gamma=0.0001, kernel=linear ..............................
[CV] ............... C=0.1, gamma=0.0001, kernel=linear, total=   0.0s
[CV] C=0.1, gamma=0.0001, kernel=linear ..............................
[CV] ............... C=0.1, gamma=0.0001, kernel=linear, total=   0.0s
[CV] C=0.1, gamma=0.001, kernel=rbf ..................................
[CV] ..........

[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s


[CV] ................. C=1, gamma=0.0001, kernel=linear, total=   0.1s
[CV] C=1, gamma=0.001, kernel=rbf ....................................
[CV] ..................... C=1, gamma=0.001, kernel=rbf, total=   0.0s
[CV] C=1, gamma=0.001, kernel=rbf ....................................
[CV] ..................... C=1, gamma=0.001, kernel=rbf, total=   0.0s
[CV] C=1, gamma=0.001, kernel=rbf ....................................
[CV] ..................... C=1, gamma=0.001, kernel=rbf, total=   0.0s
[CV] C=1, gamma=0.001, kernel=linear .................................
[CV] .................. C=1, gamma=0.001, kernel=linear, total=   0.0s
[CV] C=1, gamma=0.001, kernel=linear .................................
[CV] .................. C=1, gamma=0.001, kernel=linear, total=   0.0s
[CV] C=1, gamma=0.001, kernel=linear .................................
[CV] .................. C=1, gamma=0.001, kernel=linear, total=   0.1s
[CV] C=1, gamma=0.01, kernel=rbf .....................................
[CV] .

[CV] ............... C=100, gamma=0.0001, kernel=linear, total=   3.2s
[CV] C=100, gamma=0.0001, kernel=linear ..............................
[CV] ............... C=100, gamma=0.0001, kernel=linear, total=   0.7s
[CV] C=100, gamma=0.0001, kernel=linear ..............................
[CV] ............... C=100, gamma=0.0001, kernel=linear, total=  11.6s
[CV] C=100, gamma=0.001, kernel=rbf ..................................
[CV] ................... C=100, gamma=0.001, kernel=rbf, total=   0.0s
[CV] C=100, gamma=0.001, kernel=rbf ..................................
[CV] ................... C=100, gamma=0.001, kernel=rbf, total=   0.0s
[CV] C=100, gamma=0.001, kernel=rbf ..................................
[CV] ................... C=100, gamma=0.001, kernel=rbf, total=   0.0s
[CV] C=100, gamma=0.001, kernel=linear ...............................
[CV] ................ C=100, gamma=0.001, kernel=linear, total=   1.4s
[CV] C=100, gamma=0.001, kernel=linear ...............................
[CV] .

[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed: 10.8min finished


GridSearchCV(cv=None, error_score='raise',
       estimator=SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False),
       fit_params=None, iid=True, n_jobs=1,
       param_grid={'C': [0.1, 1, 10, 100, 1000], 'gamma': [0.0001, 0.001, 0.01, 0.1, 1], 'kernel': ['rbf', 'linear']},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring='accuracy', verbose=2)

In [12]:
print("Melhores parametros",gs.best_params_)
pred = gs.predict(x_test)

Melhores parametros {'C': 0.1, 'gamma': 0.0001, 'kernel': 'linear'}


In [13]:
print("Acuracia",accuracy_score(y_test,pred))
print("===============================")
print("Matriz Confusão ")
print(confusion_matrix(y_test, pred))
print("===============================")
print("Relatório de Classificação")
print(classification_report(y_test, pred))

Acuracia 0.8372093023255814
Matriz Confusão 
[[14  1  0]
 [ 3 13  2]
 [ 0  1  9]]
Relatório de Classificação
             precision    recall  f1-score   support

     Valor1       0.82      0.93      0.87        15
     Valor2       0.87      0.72      0.79        18
     Valor3       0.82      0.90      0.86        10

avg / total       0.84      0.84      0.83        43

