# Classificação binária - Brest Cancer Simples

Base de dados: https://archive.ics.uci.edu/dataset/17/breast+cancer+winsconsin+diagnostic

## Importando base de dados

In [29]:
import pandas as pd

In [30]:
# previsores 
X = pd.read_csv('2_cancer_entradas.csv')
X

Unnamed: 0,radius1,texture1,perimeter1,area1,smoothness1,compactness1,concavity1,concave_points1,symmetry1,fractal_dimension1,...,radius3,texture3,perimeter3,area3,smoothness3,compactness3,concavity3,concave_points3,symmetry3,fractal_dimension3
0,17.99,10.38,122.80,1001.0,0.11840,0.27760,0.30010,0.14710,0.2419,0.07871,...,25.380,17.33,184.60,2019.0,0.16220,0.66560,0.7119,0.2654,0.4601,0.11890
1,20.57,17.77,132.90,1326.0,0.08474,0.07864,0.08690,0.07017,0.1812,0.05667,...,24.990,23.41,158.80,1956.0,0.12380,0.18660,0.2416,0.1860,0.2750,0.08902
2,19.69,21.25,130.00,1203.0,0.10960,0.15990,0.19740,0.12790,0.2069,0.05999,...,23.570,25.53,152.50,1709.0,0.14440,0.42450,0.4504,0.2430,0.3613,0.08758
3,11.42,20.38,77.58,386.1,0.14250,0.28390,0.24140,0.10520,0.2597,0.09744,...,14.910,26.50,98.87,567.7,0.20980,0.86630,0.6869,0.2575,0.6638,0.17300
4,20.29,14.34,135.10,1297.0,0.10030,0.13280,0.19800,0.10430,0.1809,0.05883,...,22.540,16.67,152.20,1575.0,0.13740,0.20500,0.4000,0.1625,0.2364,0.07678
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
564,21.56,22.39,142.00,1479.0,0.11100,0.11590,0.24390,0.13890,0.1726,0.05623,...,25.450,26.40,166.10,2027.0,0.14100,0.21130,0.4107,0.2216,0.2060,0.07115
565,20.13,28.25,131.20,1261.0,0.09780,0.10340,0.14400,0.09791,0.1752,0.05533,...,23.690,38.25,155.00,1731.0,0.11660,0.19220,0.3215,0.1628,0.2572,0.06637
566,16.60,28.08,108.30,858.1,0.08455,0.10230,0.09251,0.05302,0.1590,0.05648,...,18.980,34.12,126.70,1124.0,0.11390,0.30940,0.3403,0.1418,0.2218,0.07820
567,20.60,29.33,140.10,1265.0,0.11780,0.27700,0.35140,0.15200,0.2397,0.07016,...,25.740,39.42,184.60,1821.0,0.16500,0.86810,0.9387,0.2650,0.4087,0.12400


In [31]:
# saidas 
y = pd.read_csv('2_cancer_saidas.csv')
y

Unnamed: 0,Diagnosis
0,M
1,M
2,M
3,M
4,M
...,...
564,M
565,M
566,M
567,M


## Divisão base de treino e teste

In [32]:
import sklearn 
from sklearn.model_selection import train_test_split
sklearn.__version__

'1.4.2'

In [33]:
X_treinamento, X_teste, y_treinamento, y_teste = train_test_split(X,y,test_size=0.25)

In [34]:
X_treinamento.shape , y_treinamento.shape

((426, 30), (426, 1))

In [35]:
X_teste.shape , y_teste.shape

((143, 30), (143, 1))

## Estrutura da Rede Neural

In [36]:
# Importacao desta lib para desativar erro no TensorFlow
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"

In [37]:
# !pip uninstall -y tensorflow
!pip install -q tensorflow==2.16.1
import tensorflow as tf
from tensorflow.keras.models import Sequential
tf.__version__

'2.16.1'

In [38]:

# shape - numero de colunas do dataframe
# units - para calcular units, depende da quantidade de colunas (30) e de saidas (1) Pois é uma classificação binária
#
#    (30 + 1) / 2 = 15.5 => 16
#

rede_neural = Sequential([
     tf.keras.layers.InputLayer(shape = (30,)),
     tf.keras.layers.Dense(units = 16, activation = 'relu', kernel_initializer = 'random_uniform'),
     tf.keras.layers.Dense(units = 1, activation = 'sigmoid')
])

In [39]:
rede_neural.summary()

## Tratando a base de dados

É necessário converter os dados categóricos nominais em ordinais

In [57]:
from sklearn.preprocessing import LabelEncoder
import numpy as np

# Converte os rótulos para numérico (0 e 1)
label_encoder = LabelEncoder()
y_treinamento = label_encoder.fit_transform(y_treinamento)
y_teste = label_encoder.fit_transform(y_teste)

# Converte X_treinamento para float32 se ainda não estiver
X_treinamento = np.array(X_treinamento, dtype=np.float32)
y_treinamento = np.array(y_treinamento, dtype=np.float32)
y_teste = np.array(y_teste, dtype=np.float32)

## Configuração e execução da rede neural

In [46]:
rede_neural.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['binary_accuracy'])

In [47]:
rede_neural.fit(X_treinamento, y_treinamento, batch_size = 10, epochs = 100)

Epoch 1/100
[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - binary_accuracy: 0.6136 - loss: 11.7533
Epoch 2/100
[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - binary_accuracy: 0.5195 - loss: 0.7109 
Epoch 3/100
[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - binary_accuracy: 0.5161 - loss: 0.6970 
Epoch 4/100
[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - binary_accuracy: 0.6064 - loss: 0.6907
Epoch 5/100
[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - binary_accuracy: 0.6840 - loss: 0.6833
Epoch 6/100
[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - binary_accuracy: 0.8164 - loss: 0.6730
Epoch 7/100
[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - binary_accuracy: 0.6273 - loss: 0.6515
Epoch 8/100
[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - binary_accuracy: 0.6725 - lo

<keras.src.callbacks.history.History at 0x7c07db519f70>

## Previsões

In [52]:
previsoes = rede_neural.predict(X_teste)
previsoes = previsoes > 0.5 # Convertendo o valor float com uma classe True/False

[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 


In [53]:
previsoes

array([[ True],
       [False],
       [False],
       [ True],
       [ True],
       [ True],
       [False],
       [ True],
       [ True],
       [ True],
       [ True],
       [False],
       [False],
       [False],
       [ True],
       [False],
       [False],
       [ True],
       [False],
       [ True],
       [False],
       [False],
       [ True],
       [False],
       [False],
       [ True],
       [False],
       [ True],
       [False],
       [ True],
       [ True],
       [False],
       [ True],
       [ True],
       [False],
       [False],
       [False],
       [False],
       [False],
       [False],
       [ True],
       [False],
       [False],
       [False],
       [False],
       [False],
       [False],
       [ True],
       [ True],
       [False],
       [False],
       [False],
       [ True],
       [ True],
       [False],
       [False],
       [ True],
       [False],
       [False],
       [False],
       [ True],
       [False],
       [

In [58]:
y_teste

array([1., 0., 0., 0., 1., 1., 0., 1., 1., 1., 1., 0., 0., 0., 1., 0., 0.,
       1., 0., 1., 0., 0., 1., 0., 0., 1., 0., 1., 0., 1., 1., 0., 1., 1.,
       0., 1., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.,
       0., 0., 1., 0., 0., 1., 0., 0., 0., 1., 0., 1., 0., 1., 0., 0., 0.,
       1., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 1.,
       1., 0., 0., 0., 0., 1., 1., 0., 0., 1., 0., 0., 1., 1., 0., 1., 0.,
       1., 0., 0., 0., 1., 0., 0., 1., 0., 0., 1., 1., 1., 0., 1., 1., 1.,
       1., 0., 0., 0., 0., 1., 1., 1., 1., 1., 0., 0., 0., 0., 1., 1., 0.,
       0., 0., 0., 0., 1., 1., 0.], dtype=float32)

## Comparação da base de previsão com a base de teste

In [59]:
from sklearn.metrics import accuracy_score,confusion_matrix

In [60]:
accuracy_score(y_teste, previsoes)

0.9020979020979021

In [61]:
confusion_matrix(y_teste, previsoes)

array([[83,  5],
       [ 9, 46]])

In [62]:
rede_neural.evaluate(X_teste,y_teste)

[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - binary_accuracy: 0.9205 - loss: 0.1968 


[0.22801123559474945, 0.9020978808403015]