# Classificação binária - Brest Cancer Simples

Base de dados: https://archive.ics.uci.edu/dataset/17/breast+cancer+winsconsin+diagnostic

## Importando base de dados

In [18]:
import pandas as pd

In [19]:
# previsores 
X = pd.read_csv('2_cancer_entradas.csv')
X

Unnamed: 0,radius1,texture1,perimeter1,area1,smoothness1,compactness1,concavity1,concave_points1,symmetry1,fractal_dimension1,...,radius3,texture3,perimeter3,area3,smoothness3,compactness3,concavity3,concave_points3,symmetry3,fractal_dimension3
0,17.99,10.38,122.80,1001.0,0.11840,0.27760,0.30010,0.14710,0.2419,0.07871,...,25.380,17.33,184.60,2019.0,0.16220,0.66560,0.7119,0.2654,0.4601,0.11890
1,20.57,17.77,132.90,1326.0,0.08474,0.07864,0.08690,0.07017,0.1812,0.05667,...,24.990,23.41,158.80,1956.0,0.12380,0.18660,0.2416,0.1860,0.2750,0.08902
2,19.69,21.25,130.00,1203.0,0.10960,0.15990,0.19740,0.12790,0.2069,0.05999,...,23.570,25.53,152.50,1709.0,0.14440,0.42450,0.4504,0.2430,0.3613,0.08758
3,11.42,20.38,77.58,386.1,0.14250,0.28390,0.24140,0.10520,0.2597,0.09744,...,14.910,26.50,98.87,567.7,0.20980,0.86630,0.6869,0.2575,0.6638,0.17300
4,20.29,14.34,135.10,1297.0,0.10030,0.13280,0.19800,0.10430,0.1809,0.05883,...,22.540,16.67,152.20,1575.0,0.13740,0.20500,0.4000,0.1625,0.2364,0.07678
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
564,21.56,22.39,142.00,1479.0,0.11100,0.11590,0.24390,0.13890,0.1726,0.05623,...,25.450,26.40,166.10,2027.0,0.14100,0.21130,0.4107,0.2216,0.2060,0.07115
565,20.13,28.25,131.20,1261.0,0.09780,0.10340,0.14400,0.09791,0.1752,0.05533,...,23.690,38.25,155.00,1731.0,0.11660,0.19220,0.3215,0.1628,0.2572,0.06637
566,16.60,28.08,108.30,858.1,0.08455,0.10230,0.09251,0.05302,0.1590,0.05648,...,18.980,34.12,126.70,1124.0,0.11390,0.30940,0.3403,0.1418,0.2218,0.07820
567,20.60,29.33,140.10,1265.0,0.11780,0.27700,0.35140,0.15200,0.2397,0.07016,...,25.740,39.42,184.60,1821.0,0.16500,0.86810,0.9387,0.2650,0.4087,0.12400


In [20]:
# saidas 
y = pd.read_csv('2_cancer_saidas.csv')
y

Unnamed: 0,Diagnosis
0,M
1,M
2,M
3,M
4,M
...,...
564,M
565,M
566,M
567,M


## Divisão base de treino e teste

In [21]:
import sklearn 
from sklearn.model_selection import train_test_split
sklearn.__version__

'1.4.2'

In [22]:
X_treinamento, X_teste, y_treinamento, y_teste = train_test_split(X,y,test_size=0.25)

In [23]:
X_treinamento.shape , y_treinamento.shape

((426, 30), (426, 1))

In [24]:
X_teste.shape , y_teste.shape

((143, 30), (143, 1))

## Estrutura da Rede Neural

In [25]:
# Importacao desta lib para desativar erro no TensorFlow
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"

In [26]:
# !pip uninstall -y tensorflow
!pip install -q tensorflow==2.16.1
import tensorflow as tf
from tensorflow.keras.models import Sequential
tf.__version__

'2.16.1'

In [27]:

# shape - numero de colunas do dataframe
# units - para calcular units, depende da quantidade de colunas (30) e de saidas (1) Pois é uma classificação binária
#
#    (30 + 1) / 2 = 15.5 => 16
#

rede_neural = Sequential([
     tf.keras.layers.InputLayer(shape = (30,)),
     tf.keras.layers.Dense(units = 16, activation = 'relu', kernel_initializer = 'random_uniform'),
     tf.keras.layers.Dense(units = 16, activation = 'relu', kernel_initializer = 'random_uniform'),
     tf.keras.layers.Dense(units = 1, activation = 'sigmoid')
])

In [28]:
rede_neural.summary()

## Tratando a base de dados

É necessário converter os dados categóricos nominais em ordinais

In [29]:
from sklearn.preprocessing import LabelEncoder
import numpy as np

# Converte os rótulos para numérico (0 e 1)
label_encoder = LabelEncoder()
y_treinamento = label_encoder.fit_transform(y_treinamento)
y_teste = label_encoder.fit_transform(y_teste)

# Converte X_treinamento para float32 se ainda não estiver
X_treinamento = np.array(X_treinamento, dtype=np.float32)
y_treinamento = np.array(y_treinamento, dtype=np.float32)
y_teste = np.array(y_teste, dtype=np.float32)

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


## Configuração e execução da rede neural

In [30]:
otimizador = tf.keras.optimizers.Adam(learning_rate=0.001, clipvalue=0.5)

In [31]:
rede_neural.compile(optimizer = otimizador, loss = 'binary_crossentropy', metrics = ['binary_accuracy'])

In [32]:
rede_neural.fit(X_treinamento, y_treinamento, batch_size = 10, epochs = 100)

Epoch 1/100
[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - binary_accuracy: 0.5651 - loss: 0.6628
Epoch 2/100
[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - binary_accuracy: 0.8582 - loss: 0.4648
Epoch 3/100
[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - binary_accuracy: 0.8888 - loss: 0.3782
Epoch 4/100
[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - binary_accuracy: 0.8913 - loss: 0.3402
Epoch 5/100
[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - binary_accuracy: 0.9112 - loss: 0.2496
Epoch 6/100
[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - binary_accuracy: 0.9069 - loss: 0.2841
Epoch 7/100
[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - binary_accuracy: 0.8870 - loss: 0.3009
Epoch 8/100
[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - binary_accuracy: 0.9233 - loss:

<keras.src.callbacks.history.History at 0x768414112b70>

In [42]:
pesos0 = rede_neural.layers[0].get_weights()
pesos0

[array([[ 7.61017762e-03, -1.31732821e-02, -6.32495806e-03,
          1.97873443e-01,  1.39745981e-01,  2.46365108e-02,
          1.68841369e-02,  1.25951655e-02,  1.59086883e-01,
          1.87086947e-02,  1.46053538e-01,  8.50466713e-02,
         -4.59894128e-02, -2.48945817e-01, -7.49368221e-02,
         -7.03911856e-03],
        [ 7.15040639e-02, -3.85849588e-02, -1.78318247e-02,
          3.06408927e-02,  1.02362260e-02, -1.41081214e-02,
         -3.73747461e-02, -4.48898673e-02,  8.25504661e-02,
          3.51707004e-02,  7.93737099e-02, -6.36045858e-02,
          2.08509658e-02, -1.34726524e-01,  5.65358028e-02,
          4.01176699e-02],
        [-7.31469393e-02, -2.42707375e-02, -2.82674320e-02,
          1.80374191e-03, -7.88178220e-02,  2.09580921e-02,
          2.93053687e-04, -1.82827935e-02,  1.16197437e-01,
         -3.21643949e-02, -1.27997786e-01,  5.62058575e-02,
          2.60774419e-02,  1.20503746e-01, -2.53951624e-02,
          2.70516798e-03],
        [-3.2773513

In [43]:
len(pesos0)

2

In [44]:
len(pesos0[1])

16

In [45]:
len(pesos0[0])

30

In [46]:
pesos1 = rede_neural.layers[1].get_weights()
pesos1

[array([[-0.01643307, -0.04466858, -0.00218125,  0.07923651, -0.08297267,
          0.03281034, -0.00181834,  0.02643207, -0.02482375,  0.02532833,
         -0.00316911,  0.10959738,  0.12622426, -0.01234752, -0.02937013,
         -0.08749118],
        [ 0.0212589 ,  0.0445361 , -0.01475208, -0.04476358,  0.02607987,
          0.01080777,  0.03799057, -0.03814541,  0.02910647, -0.02047763,
          0.0069708 , -0.04137192, -0.00870334,  0.03256024, -0.03569018,
          0.03137947],
        [ 0.01819737,  0.04745078,  0.0134138 , -0.03534054, -0.01005059,
          0.01541314,  0.04221456, -0.01630113, -0.04958943, -0.0239962 ,
          0.02038172, -0.03731878, -0.03608631, -0.00544555,  0.00899074,
          0.04651492],
        [-0.0859156 ,  0.03830175, -0.01895658,  0.15305361, -0.09126256,
         -0.03691082,  0.00205725,  0.02834872, -0.04078498,  0.01171153,
         -0.14223239,  0.06899986,  0.15126926,  0.03858669, -0.04888933,
         -0.05630698],
        [ 0.00953889

In [48]:
pesos2 = rede_neural.layers[2].get_weights()
pesos2

[array([[-0.09280916],
        [-0.46366566],
        [-0.32861027],
        [ 0.77575207],
        [-0.3725118 ],
        [-0.46749794],
        [-0.40108794],
        [-0.01423483],
        [-0.21385767],
        [-0.17936775],
        [-0.09740965],
        [ 0.69876176],
        [ 0.64251685],
        [ 0.04384428],
        [ 0.39748663],
        [ 0.52578765]], dtype=float32),
 array([-0.22934376], dtype=float32)]

## Previsões

In [33]:
previsoes = rede_neural.predict(X_teste)
previsoes = previsoes > 0.5 # Convertendo o valor float com uma classe True/False

[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step 


In [34]:
previsoes

array([[False],
       [False],
       [False],
       [ True],
       [False],
       [False],
       [ True],
       [ True],
       [False],
       [False],
       [False],
       [ True],
       [ True],
       [False],
       [False],
       [False],
       [False],
       [False],
       [False],
       [ True],
       [False],
       [False],
       [False],
       [False],
       [False],
       [ True],
       [ True],
       [ True],
       [False],
       [False],
       [False],
       [ True],
       [False],
       [ True],
       [False],
       [ True],
       [ True],
       [False],
       [False],
       [ True],
       [ True],
       [ True],
       [False],
       [ True],
       [False],
       [False],
       [ True],
       [False],
       [False],
       [False],
       [False],
       [ True],
       [False],
       [False],
       [False],
       [False],
       [ True],
       [False],
       [ True],
       [False],
       [False],
       [False],
       [

In [35]:
y_teste

array([1., 0., 0., 1., 0., 0., 1., 1., 0., 0., 0., 1., 1., 0., 0., 0., 0.,
       0., 1., 1., 0., 0., 0., 0., 0., 1., 1., 1., 0., 1., 1., 1., 0., 1.,
       0., 1., 1., 0., 0., 1., 1., 1., 0., 1., 0., 0., 1., 0., 0., 0., 0.,
       1., 0., 0., 0., 1., 1., 0., 1., 0., 0., 1., 1., 1., 0., 0., 1., 0.,
       1., 0., 0., 1., 0., 0., 1., 1., 0., 1., 0., 0., 1., 1., 1., 1., 1.,
       0., 1., 0., 0., 0., 0., 0., 0., 1., 0., 0., 1., 1., 0., 1., 0., 1.,
       0., 0., 0., 1., 0., 0., 0., 1., 1., 1., 1., 0., 0., 1., 0., 1., 1.,
       1., 0., 0., 0., 0., 0., 1., 1., 0., 0., 0., 1., 1., 0., 1., 0., 1.,
       1., 1., 0., 1., 0., 0., 0.], dtype=float32)

## Comparação da base de previsão com a base de teste

In [36]:
from sklearn.metrics import accuracy_score,confusion_matrix

In [37]:
accuracy_score(y_teste, previsoes)

0.9300699300699301

In [38]:
confusion_matrix(y_teste, previsoes)

array([[79,  0],
       [10, 54]])

In [40]:
rede_neural.evaluate(X_teste,y_teste)

[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - binary_accuracy: 0.9168 - loss: 0.4622 


[0.41719356179237366, 0.9300699234008789]

## Validação Cruzada

In [49]:
!pip install scikeras

Collecting scikeras
  Downloading scikeras-0.13.0-py3-none-any.whl.metadata (3.1 kB)
Downloading scikeras-0.13.0-py3-none-any.whl (26 kB)
Installing collected packages: scikeras
Successfully installed scikeras-0.13.0


In [50]:
import pandas as pd
import tensorflow as tf
import sklearn
import scikeras

In [52]:
pd.__version__ , tf.__version__, sklearn.__version__, scikeras.__version__

('2.2.2', '2.16.1', '1.4.2', '0.13.0')

In [53]:
from scikeras.wrappers import KerasClassifier
from sklearn.model_selection import cross_val_score
from tensorflow.keras.models import Sequential
from tensorflow.keras import backend as k

In [65]:
def criar_rede():
    k.clear_session()
    rede_neural = Sequential([
        tf.keras.layers.InputLayer(shape = (30,)), 
        tf.keras.layers.Dense(units = 16, activation = 'relu', kernel_initializer = 'random_uniform'),
        tf.keras.layers.Dropout(rate = 0.2),# Aqui é a aplicação de um Dropout - não obrigatório para avaliar a rede_neural
        tf.keras.layers.Dense(units = 16, activation = 'relu', kernel_initializer = 'random_uniform'),
        tf.keras.layers.Dropout(rate = 0.2),
        tf.keras.layers.Dense(units = 1, activation = 'sigmoid')])
    otimizador = tf.keras.optimizers.Adam(learning_rate = 0.001, clipvalue = 0.5)
    rede_neural.compile(optimizer = otimizador, loss='binary_crossentropy', metrics = ['binary_accuracy'])
    return rede_neural

In [66]:
rede_neural = KerasClassifier(model = criar_rede, epochs=100, batch_size=10)

In [67]:
resultados = cross_val_score(estimator = rede_neural, X = X , y = y, cv = 10, scoring = 'accuracy')

Epoch 1/100
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - binary_accuracy: 0.5851 - loss: 0.8586
Epoch 2/100
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - binary_accuracy: 0.7564 - loss: 0.5259
Epoch 3/100
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - binary_accuracy: 0.7696 - loss: 0.5054
Epoch 4/100
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - binary_accuracy: 0.8358 - loss: 0.4104
Epoch 5/100
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - binary_accuracy: 0.8682 - loss: 0.3441
Epoch 6/100
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - binary_accuracy: 0.8413 - loss: 0.3872
Epoch 7/100
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - binary_accuracy: 0.8822 - loss: 0.3416
Epoch 8/100
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - binary_accuracy: 0.8705 - loss:

In [74]:
# Sem Dropout
resultados

array([0.94736842, 0.85964912, 0.92982456, 0.94736842, 0.9122807 ,
       0.9122807 , 0.96491228, 0.92982456, 0.9122807 , 0.98214286])

In [75]:
# Sem Dropout
resultados.mean()

0.9297932330827068

In [80]:
# Sem Dropout
resultados.std()

0.032298404819953656

In [77]:
# Com Dropout
resultados

array([0.94736842, 0.85964912, 0.92982456, 0.94736842, 0.9122807 ,
       0.9122807 , 0.96491228, 0.92982456, 0.9122807 , 0.98214286])

In [78]:
# Com Dropout
resultados.mean()

0.9297932330827068

In [None]:
# Com Dropout
resultados.std()