# Projeto 7: Classificação multiclasse iris com validação cruzada

## Etapa 1: Importação das bibliotecas

In [None]:
!pip install skorch

Collecting skorch
[?25l  Downloading https://files.pythonhosted.org/packages/18/c7/2f6434f9360c91a4bf14ae85f634758e5dacd3539cca4266a60be9f881ae/skorch-0.9.0-py3-none-any.whl (125kB)
[K     |██▋                             | 10kB 27.4MB/s eta 0:00:01[K     |█████▏                          | 20kB 2.9MB/s eta 0:00:01[K     |███████▉                        | 30kB 3.8MB/s eta 0:00:01[K     |██████████▍                     | 40kB 4.1MB/s eta 0:00:01[K     |█████████████                   | 51kB 3.3MB/s eta 0:00:01[K     |███████████████▋                | 61kB 3.6MB/s eta 0:00:01[K     |██████████████████▎             | 71kB 4.1MB/s eta 0:00:01[K     |████████████████████▉           | 81kB 4.4MB/s eta 0:00:01[K     |███████████████████████▍        | 92kB 4.6MB/s eta 0:00:01[K     |██████████████████████████      | 102kB 4.5MB/s eta 0:00:01[K     |████████████████████████████▋   | 112kB 4.5MB/s eta 0:00:01[K     |███████████████████████████████▎| 122kB 4.5MB/s eta 0:00

In [None]:
import pandas as pd
import numpy as np
import torch.nn as nn        
from skorch import NeuralNetClassifier
import torch
from sklearn.model_selection import cross_val_score
torch.__version__

'1.6.0+cu101'

## Etapa 2: Base de dados

In [None]:
np.random.seed(123)
torch.manual_seed(123)

<torch._C.Generator at 0x7f60659ba120>

In [None]:
base = pd.read_csv('iris.csv')
previsores = base.iloc[:, 0:4].values
classe = base.iloc[:, 4].values

In [None]:
from sklearn.preprocessing import LabelEncoder
encoder = LabelEncoder()
classe = encoder.fit_transform(classe)

In [None]:
np.unique(classe)

array([0, 1, 2])

In [None]:
previsores = previsores.astype('float32')
classe = classe.astype('int64')

## Etapa 3: Construção do modelo

In [None]:
class classificador_torch(nn.Module):
    def __init__(self):
        super().__init__()
        self.dense0 = nn.Linear(4, 16)
        self.activation0 = nn.ReLU()
        self.dense1 = nn.Linear(16, 16)
        self.activation1 = nn.ReLU()
        self.dense2 = nn.Linear(16, 3)
        
    def forward(self, X):
        X = self.dense0(X)
        X = self.activation0(X)
        X = self.dense1(X)
        X = self.activation1(X)
        X = self.dense2(X)
        return X

In [None]:
classificador_sklearn = NeuralNetClassifier(module = classificador_torch, 
                                            criterion = torch.nn.CrossEntropyLoss, 
                                            optimizer = torch.optim.Adam,
                                            max_epochs = 1000,
                                            batch_size = 10,
                                            train_split = False)

## Etapa 4: Validação cruzada

In [None]:
resultados = cross_val_score(classificador_sklearn, previsores, classe, cv = 5, 
                             scoring = 'accuracy')

[1;30;43mA saída de streaming foi truncada nas últimas 5000 linhas.[0m
     10        [36m0.4109[0m  0.0187
     11        0.5542  0.0185
     12        [36m0.4098[0m  0.0190
     13        0.5332  0.0178
     14        0.4993  0.0185
     15        [36m0.3979[0m  0.0181
     16        [36m0.3369[0m  0.0186
     17        0.3687  0.0185
     18        0.3944  0.0199
     19        [36m0.2903[0m  0.0188
     20        0.3097  0.0174
     21        [36m0.2867[0m  0.0202
     22        [36m0.2556[0m  0.0174
     23        0.2571  0.0232
     24        [36m0.2474[0m  0.0162
     25        [36m0.2309[0m  0.0174
     26        [36m0.2160[0m  0.0173
     27        [36m0.2134[0m  0.0160
     28        [36m0.2090[0m  0.0183
     29        [36m0.2031[0m  0.0170
     30        [36m0.1998[0m  0.0194
     31        [36m0.1923[0m  0.0217
     32        [36m0.1882[0m  0.0201
     33        [36m0.1818[0m  0.0218
     34        0.1826  0.0194
     35        [36m0.17

In [None]:
media = resultados.mean()
desvio = resultados.std()

In [None]:
media, desvio

(0.9800000000000001, 0.02666666666666666)

In [None]:
resultados

array([1.        , 1.        , 0.96666667, 0.93333333, 1.        ])

# Tunning 

In [None]:
# Tunning 

import pandas as pd
import torch.nn as nn
from skorch import NeuralNetClassifier
import torch
import torch.nn.functional as F
from sklearn.model_selection import GridSearchCV
torch.__version__

class classificador_torch(nn.Module):
    def __init__(self, activation, neurons, initializer, dropout):
        super().__init__()
        self.dense0 = nn.Linear(4, neurons)
        initializer(self.dense0.weight)
        self.activation0 = activation
        self.dense1 = nn.Linear(neurons, neurons)
        initializer(self.dense1.weight)
        self.activation1 = activation
        self.dense2 = nn.Linear(neurons, 3)
        
        self.dropout = nn.Dropout(dropout)
        
    def forward(self, X):
        X = self.dense0(X)
        X = self.activation0(X)
        X = self.dropout(X)
        X = self.dense1(X)
        X = self.activation1(X)
        X = self.dropout(X)
        X = self.dense2(X)
        return X

In [None]:
torch.manual_seed(123)
base = pd.read_csv('iris.csv')
previsores = base.iloc[:, 0:4].values
classe = base.iloc[:, 4].values

from sklearn.preprocessing import LabelEncoder
labelencoder = LabelEncoder()
classe = labelencoder.fit_transform(classe)

previsores = previsores.astype('float32')
classe = classe.astype('int64')

In [None]:
clf_sklearn_tunning = NeuralNetClassifier(module = classificador_torch,
                                            criterion = torch.nn.CrossEntropyLoss,
                                            train_split = False)


In [None]:
params = {'batch_size': [10, 30],
          'max_epochs': [2000, 3000],
          'optimizer': [torch.optim.Adam, torch.optim.SGD],
          'module__activation': [F.relu, F.tanh, F.sigmoid],
          'module__neurons': [4, 8, 16],
          'module__initializer': [torch.nn.init.uniform_, torch.nn.init.normal_],
          'module__dropout': [0.2, 0.3]
          }

In [None]:
grid_search = GridSearchCV(estimator = clf_sklearn_tunning, 
                           param_grid = params, 
                           scoring = 'accuracy',
                           cv = 3)

In [None]:
grid_search = grid_search.fit(previsores, classe)

[1;30;43mA saída de streaming foi truncada nas últimas 5000 linhas.[0m
    857        0.3642  0.0120
    858        0.3842  0.0118
    859        0.5559  0.0121
    860        0.3132  0.0122
    861        0.4438  0.0120
    862        0.4826  0.0123
    863        0.3522  0.0122
    864        0.3897  0.0121
    865        0.5592  0.0147
    866        0.4574  0.0122
    867        0.4054  0.0122
    868        0.3037  0.0123
    869        0.4130  0.0122
    870        0.3824  0.0120
    871        0.4794  0.0121
    872        0.4737  0.0120
    873        0.5082  0.0149
    874        0.5164  0.0123
    875        0.3919  0.0121
    876        0.4881  0.0121
    877        0.4939  0.0122
    878        0.5588  0.0123
    879        0.4371  0.0135
    880        0.4765  0.0124
    881        0.3806  0.0123
    882        0.4994  0.0122
    883        0.4942  0.0120
    884        0.4416  0.0122
    885        0.4400  0.0122
    886        0.4157  0.0119
    887        0.5134  0.01

In [None]:
melhores_parametros = grid_search.best_params_
melhor_precisao = grid_search.best_score_