In [1]:
import pandas as pd
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [2]:
bc = datasets.load_breast_cancer()

In [3]:
df = pd.DataFrame(bc['data'], columns=bc['feature_names'])
df['target'] = bc['target']
df['target_name'] = pd.Categorical.from_codes(bc['target'], bc['target_names'])
df

Unnamed: 0,mean radius,mean texture,mean perimeter,mean area,mean smoothness,mean compactness,mean concavity,mean concave points,mean symmetry,mean fractal dimension,...,worst perimeter,worst area,worst smoothness,worst compactness,worst concavity,worst concave points,worst symmetry,worst fractal dimension,target,target_name
0,17.99,10.38,122.80,1001.0,0.11840,0.27760,0.30010,0.14710,0.2419,0.07871,...,184.60,2019.0,0.16220,0.66560,0.7119,0.2654,0.4601,0.11890,0,malignant
1,20.57,17.77,132.90,1326.0,0.08474,0.07864,0.08690,0.07017,0.1812,0.05667,...,158.80,1956.0,0.12380,0.18660,0.2416,0.1860,0.2750,0.08902,0,malignant
2,19.69,21.25,130.00,1203.0,0.10960,0.15990,0.19740,0.12790,0.2069,0.05999,...,152.50,1709.0,0.14440,0.42450,0.4504,0.2430,0.3613,0.08758,0,malignant
3,11.42,20.38,77.58,386.1,0.14250,0.28390,0.24140,0.10520,0.2597,0.09744,...,98.87,567.7,0.20980,0.86630,0.6869,0.2575,0.6638,0.17300,0,malignant
4,20.29,14.34,135.10,1297.0,0.10030,0.13280,0.19800,0.10430,0.1809,0.05883,...,152.20,1575.0,0.13740,0.20500,0.4000,0.1625,0.2364,0.07678,0,malignant
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
564,21.56,22.39,142.00,1479.0,0.11100,0.11590,0.24390,0.13890,0.1726,0.05623,...,166.10,2027.0,0.14100,0.21130,0.4107,0.2216,0.2060,0.07115,0,malignant
565,20.13,28.25,131.20,1261.0,0.09780,0.10340,0.14400,0.09791,0.1752,0.05533,...,155.00,1731.0,0.11660,0.19220,0.3215,0.1628,0.2572,0.06637,0,malignant
566,16.60,28.08,108.30,858.1,0.08455,0.10230,0.09251,0.05302,0.1590,0.05648,...,126.70,1124.0,0.11390,0.30940,0.3403,0.1418,0.2218,0.07820,0,malignant
567,20.60,29.33,140.10,1265.0,0.11780,0.27700,0.35140,0.15200,0.2397,0.07016,...,184.60,1821.0,0.16500,0.86810,0.9387,0.2650,0.4087,0.12400,0,malignant


### Modelando

In [4]:
X = df.drop(columns=['target','target_name'])
Y = df[['target']]

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state = 7)

**Conferindo os datasets**

In [6]:
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

(455, 30)
(114, 30)
(455, 1)
(114, 1)


**Construindo o modelo**

In [7]:
from sklearn.neural_network import MLPClassifier

In [8]:
clf = MLPClassifier(hidden_layer_sizes=(100), max_iter=1000, solver='lbfgs') 
#MultiLayer Perceptron 
##Configurações básicas nos hiperparãmetros: hidden_layer_sizes - Quantidade de Camadas intermediárias (As vírgulas da tupla define a quantidade de subcamadas). max_iter - N° de repetições

#Outras opções: activation{‘identity’, ‘logistic’, ‘tanh’, ‘relu’}, default=’relu’ | solver{‘lbfgs’, ‘sgd’, ‘adam’}, default=’adam’ (lbfgs é bom para datasets pequenos)

**Ajustando o modelo**

In [9]:
#Normalizar (Opcional)

from sklearn.preprocessing import StandardScaler
std = StandardScaler()

std.fit(X_train)
X_train = std.transform(X_train)
X_test = std.transform(X_test)

In [10]:
clf.fit(X_train, y_train)

  return f(*args, **kwargs)


MLPClassifier(hidden_layer_sizes=100, max_iter=1000, solver='lbfgs')

**Predição**

In [11]:
y_pred = clf.predict(X_test)
y_pred

array([1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1,
       0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0,
       1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
       1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1,
       1, 1, 0, 1])

**Verificando performance**

In [12]:
print(f'Acurácia: {(accuracy_score(y_test, y_pred)*100):.2f}%') #A resposta vem calculada de 0-1, então multiplicamos por 100 para visualização intuitiva

Acurácia: 99.12%


**Usando gradiente descendente padrão (solver='adam')**

*Com parâmetros padrões (hidden_layer_sizes=(100) - Uma Camada - e max_iter=1000), a acurácia foi de 91.23%**

*Com duas subcamadas iguais (50,50), a acurácia foi de 92.98%*

*Com três, 90.35%*

*Com 3 camadas descendentes (100, 50, 10), 90.35%*

*****98.25%***** *(Normalizado)*

**Usando um método otimizado para datasets pequenos (solver='lbfgs')**

*Acurácia: 96.49%*

*****99.12%***** *(Normalizado)*