## Bibliotecas

In [16]:
import pandas as pd
from math import ceil
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import f1_score

## Dataset

In [2]:
dataset = pd.read_csv('WheatSeedDataset.csv', sep='\t')
# types = ['Kama', 'Rosa', 'Canadian']
# for i in range(len(dataset)):
#     dataset['Type'].loc[i] = types[dataset['Type'].loc[i]-1]
dataset.head()

Unnamed: 0,Area,Perimeter,Compactness,Length of Kernel,Width of Kernel,Asymmetry Coefficient,Length of Kernel Groove,Type
0,15.26,14.84,0.871,5.763,3.312,2.221,5.22,1
1,14.88,14.57,0.8811,5.554,3.333,1.018,4.956,1
2,14.29,14.09,0.905,5.291,3.337,2.699,4.825,1
3,13.84,13.94,0.8955,5.324,3.379,2.259,4.805,1
4,16.14,14.99,0.9034,5.658,3.562,1.355,5.175,1


## Regra da Pirâmide Geométrica

        Nh = α·√(Ni·No) ; Nh = Número de Neurônios Ocultos
                          Ni = Número de Neurônios de Entrada
                          No = Número de Neurônios de Saída
                          α  = [0.5, 2, 3]

In [3]:
def piramide_geometrica(ni, no, alfa):
    nh = alfa*((ni*no)**(1/2))
    return ceil(nh)

##  Distribuição dos Neurônios em duas Camadas Ocultas

Função para gerar todas as possíveis 2-tuplas que representam o número de neurônios distribuídos por duas camadas ocultas de uma RNA do tipo MLP, dado o número de neurônios ocultos obtidos previamente pela Regra da Pirâmide Geométrica.

In [4]:
def hidden_layers(layers, nh):
    for i in range(1, nh):
        neurons_layers = (i, nh-i)
        layers.append(neurons_layers)
    return layers

### Criação de Lista de Camadas Ocultas a Partir da Regra da Pirâmide Geométrica

In [5]:
num_in = 7
num_out = 3
alpha = [0.5, 2, 3]
layers = []

In [9]:
for i in range(len(alpha)):
    nh = piramide_geometrica(num_in, num_out, alpha[i])
    print('Para α = %.1f, Nh = %d'%(alpha[i],nh))
    hidden_layers(layers, nh)#insere cada possibilidade de camadas ocultas, dado o numero de neurônios, na lista 'layers'
    
print()
print('Distribuições de Camadas Ocultas:\n')
for i in layers:
    print(i)

Para α = 0.5, Nh = 3
Para α = 2.0, Nh = 10
Para α = 3.0, Nh = 14

Distribuições de Camadas Ocultas:

(1, 2)
(2, 1)
(1, 9)
(2, 8)
(3, 7)
(4, 6)
(5, 5)
(6, 4)
(7, 3)
(8, 2)
(9, 1)
(1, 13)
(2, 12)
(3, 11)
(4, 10)
(5, 9)
(6, 8)
(7, 7)
(8, 6)
(9, 5)
(10, 4)
(11, 3)
(12, 2)
(13, 1)


In [14]:
parameters = {'solver': ['lbfgs'], 
              'activation': ['identity', 'logistic', 'tanh', 'relu'],
              'hidden_layer_sizes': layers,
              'max_iter':[1000]}

gs = GridSearchCV(MLPClassifier(), parameters)

In [34]:
#################  TESTES:  #######################

In [17]:
y = dataset['Type']
X = dataset.drop(['Type'], axis=1)

X_train, X_test, Y_train, Y_test = train_test_split(X, y, test_size=0.30)

In [18]:
gs.fit(X_train,Y_train)

GridSearchCV(cv=None, error_score='raise',
       estimator=MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(100,), learning_rate='constant',
       learning_rate_init=0.001, max_iter=200, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=None,
       shuffle=True, solver='adam', tol=0.0001, validation_fraction=0.1,
       verbose=False, warm_start=False),
       fit_params=None, iid=True, n_jobs=1,
       param_grid={'solver': ['lbfgs'], 'activation': ['identity', 'logistic', 'tanh', 'relu'], 'hidden_layer_sizes': [(1, 2), (2, 1), (1, 9), (2, 8), (3, 7), (4, 6), (5, 5), (6, 4), (7, 3), (8, 2), (9, 1), (1, 13), (2, 12), (3, 11), (4, 10), (5, 9), (6, 8), (7, 7), (8, 6), (9, 5), (10, 4), (11, 3), (12, 2), (13, 1)], 'max_iter': [1000]},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring=None, verbose=0)

In [36]:
print(gs.score(X_train,Y_train))
print(gs.best_params_)

0.9863945578231292
{'activation': 'identity', 'hidden_layer_sizes': (8, 2), 'max_iter': 1000, 'solver': 'lbfgs'}
