In [2]:
import torch
from torch import nn
import pandas as pd
import numpy as np

# Preparando o dataset 

In [4]:
data = pd.read_csv('../datasets/breast-cancer.csv')

In [5]:
data.columns

Index(['id', 'diagnosis', 'radius_mean', 'texture_mean', 'perimeter_mean',
       'area_mean', 'smoothness_mean', 'compactness_mean', 'concavity_mean',
       'concave points_mean', 'symmetry_mean', 'fractal_dimension_mean',
       'radius_se', 'texture_se', 'perimeter_se', 'area_se', 'smoothness_se',
       'compactness_se', 'concavity_se', 'concave points_se', 'symmetry_se',
       'fractal_dimension_se', 'radius_worst', 'texture_worst',
       'perimeter_worst', 'area_worst', 'smoothness_worst',
       'compactness_worst', 'concavity_worst', 'concave points_worst',
       'symmetry_worst', 'fractal_dimension_worst', 'Unnamed: 32'],
      dtype='object')

In [6]:
X = data.drop(['Unnamed: 32', 'id', 'diagnosis'], axis=1)
y = data['diagnosis']

In [7]:
y = y.apply(lambda x: 1 if x == 'M' else 0)

In [8]:
y

0      1
1      1
2      1
3      1
4      1
      ..
564    1
565    1
566    1
567    1
568    0
Name: diagnosis, Length: 569, dtype: int64

In [9]:
X

Unnamed: 0,radius_mean,texture_mean,perimeter_mean,area_mean,smoothness_mean,compactness_mean,concavity_mean,concave points_mean,symmetry_mean,fractal_dimension_mean,...,radius_worst,texture_worst,perimeter_worst,area_worst,smoothness_worst,compactness_worst,concavity_worst,concave points_worst,symmetry_worst,fractal_dimension_worst
0,17.99,10.38,122.80,1001.0,0.11840,0.27760,0.30010,0.14710,0.2419,0.07871,...,25.380,17.33,184.60,2019.0,0.16220,0.66560,0.7119,0.2654,0.4601,0.11890
1,20.57,17.77,132.90,1326.0,0.08474,0.07864,0.08690,0.07017,0.1812,0.05667,...,24.990,23.41,158.80,1956.0,0.12380,0.18660,0.2416,0.1860,0.2750,0.08902
2,19.69,21.25,130.00,1203.0,0.10960,0.15990,0.19740,0.12790,0.2069,0.05999,...,23.570,25.53,152.50,1709.0,0.14440,0.42450,0.4504,0.2430,0.3613,0.08758
3,11.42,20.38,77.58,386.1,0.14250,0.28390,0.24140,0.10520,0.2597,0.09744,...,14.910,26.50,98.87,567.7,0.20980,0.86630,0.6869,0.2575,0.6638,0.17300
4,20.29,14.34,135.10,1297.0,0.10030,0.13280,0.19800,0.10430,0.1809,0.05883,...,22.540,16.67,152.20,1575.0,0.13740,0.20500,0.4000,0.1625,0.2364,0.07678
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
564,21.56,22.39,142.00,1479.0,0.11100,0.11590,0.24390,0.13890,0.1726,0.05623,...,25.450,26.40,166.10,2027.0,0.14100,0.21130,0.4107,0.2216,0.2060,0.07115
565,20.13,28.25,131.20,1261.0,0.09780,0.10340,0.14400,0.09791,0.1752,0.05533,...,23.690,38.25,155.00,1731.0,0.11660,0.19220,0.3215,0.1628,0.2572,0.06637
566,16.60,28.08,108.30,858.1,0.08455,0.10230,0.09251,0.05302,0.1590,0.05648,...,18.980,34.12,126.70,1124.0,0.11390,0.30940,0.3403,0.1418,0.2218,0.07820
567,20.60,29.33,140.10,1265.0,0.11780,0.27700,0.35140,0.15200,0.2397,0.07016,...,25.740,39.42,184.60,1821.0,0.16500,0.86810,0.9387,0.2650,0.4087,0.12400


In [10]:
from sklearn.model_selection import train_test_split

In [11]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=99)

In [12]:
def pandas2torch(df):
    np_array = np.array(df)
    return torch.tensor(np_array, dtype=torch.float)

In [13]:
np.array(X_train)

array([[1.404e+01, 1.598e+01, 8.978e+01, ..., 7.453e-02, 2.725e-01,
        7.234e-02],
       [1.223e+01, 1.956e+01, 7.854e+01, ..., 1.080e-01, 2.668e-01,
        8.174e-02],
       [1.371e+01, 2.083e+01, 9.020e+01, ..., 1.556e-01, 3.196e-01,
        1.151e-01],
       ...,
       [1.747e+01, 2.468e+01, 1.161e+02, ..., 1.721e-01, 2.160e-01,
        9.300e-02],
       [1.008e+01, 1.511e+01, 6.376e+01, ..., 1.042e-02, 2.933e-01,
        7.697e-02],
       [1.674e+01, 2.159e+01, 1.101e+02, ..., 1.813e-01, 4.863e-01,
        8.633e-02]])

In [14]:
X_train_torch = pandas2torch(X_train)
X_test_torch = pandas2torch(X_test)
y_train_torch = pandas2torch(y_train)
y_test_torch = pandas2torch(y_test)

In [15]:
type(X_train_torch)

torch.Tensor

In [16]:
from torch.utils.data import Dataset, DataLoader, TensorDataset

In [17]:
dataset = TensorDataset(X_train_torch, y_train_torch)

In [18]:
dataset

<torch.utils.data.dataset.TensorDataset at 0x7f411d221a80>

In [19]:
train_loader = DataLoader(dataset=dataset, batch_size=32, shuffle=True)

In [20]:
train_loader

<torch.utils.data.dataloader.DataLoader at 0x7f411d221780>

## Arquitetura da rede neural

In [64]:
classificador1 = nn.Sequential(
    nn.Linear(in_features=30, out_features=16),
    nn.ReLU(),
    nn.Linear(in_features=16, out_features=16),
    nn.ReLU(),
    nn.Linear(16, 1),
    nn.Sigmoid()
)

In [65]:
classificador1

Sequential(
  (0): Linear(in_features=30, out_features=16, bias=True)
  (1): ReLU()
  (2): Linear(in_features=16, out_features=16, bias=True)
  (3): ReLU()
  (4): Linear(in_features=16, out_features=1, bias=True)
  (5): Sigmoid()
)

In [66]:
from torchinfo import summary

In [67]:
summary(classificador1)

Layer (type:depth-idx)                   Param #
Sequential                               --
├─Linear: 1-1                            496
├─ReLU: 1-2                              --
├─Linear: 1-3                            272
├─ReLU: 1-4                              --
├─Linear: 1-5                            17
├─Sigmoid: 1-6                           --
Total params: 785
Trainable params: 785
Non-trainable params: 0

In [68]:
# função de erro (Bynary cross entropy)
error_fn = nn.BCELoss()

In [69]:
optimizer = torch.optim.Adam(classificador1.parameters(), lr=0.001, weight_decay=0.0001)

## Treinamento da rede neural

#### Sem POO

In [70]:
epocas = 100

In [71]:
train_loader.batch_size

32

In [72]:
for epoch in range(epocas):
    running_loss = 0.

    for batch in train_loader:
        inputs, labels = batch
        optimizer.zero_grad()

        outputs = classificador1(inputs) #classificador1.foward(inputs)
        outputs = outputs.squeeze()
        loss = error_fn(outputs, labels)
        
        #backpropagation
        loss.backward()

        # Atualizar os pesos
        optimizer.step()

        # erro acumulado
        running_loss += loss.item()

    print(f'Época {epoch+1}: loss {running_loss/len(train_loader)}')

Época 1: loss 0.7504502495129903
Época 2: loss 0.4212428500254949
Época 3: loss 0.3826503892739614
Época 4: loss 0.3668994704882304
Época 5: loss 0.3282494793335597
Época 6: loss 0.2970696707566579
Época 7: loss 0.28724365234375
Época 8: loss 0.27739868859450023
Época 9: loss 0.2621150160829226
Época 10: loss 0.2862500528494517
Época 11: loss 0.25355974783500035
Época 12: loss 0.24083938995997112
Época 13: loss 0.23678288459777833
Época 14: loss 0.2470708558956782
Época 15: loss 0.24253431856632232
Época 16: loss 0.24386279334624608
Época 17: loss 0.25215415805578234
Época 18: loss 0.2267906015117963
Época 19: loss 0.2900180548429489
Época 20: loss 0.23332903484503428
Época 21: loss 0.2505473126967748
Época 22: loss 0.2504628265897433
Época 23: loss 0.2506648520628611
Época 24: loss 0.2394290491938591
Época 25: loss 0.23960798730452856
Época 26: loss 0.22293815712134044
Época 27: loss 0.2431792547305425
Época 28: loss 0.22091798335313798
Época 29: loss 0.21987516656517983
Época 30: los

In [73]:
# Armazenando pesos
params = list(classificador1.parameters())

In [74]:
params

[Parameter containing:
 tensor([[ 5.8210e-02,  1.5940e-01, -2.2470e-01,  6.7227e-02, -1.3471e-01,
           4.8025e-01,  6.3897e-01,  6.9568e-01, -1.3029e-01,  1.5350e-02,
           1.1264e-02,  1.2448e-01,  2.6802e-01, -7.5626e-02,  9.0033e-02,
           4.4053e-01,  5.7423e-01,  2.2049e-01, -3.7639e-02,  9.0409e-02,
          -2.8248e-02, -1.0662e-02,  7.3819e-02, -1.8940e-02,  1.0925e-02,
           5.6175e-01,  6.4616e-01,  5.5449e-01, -2.0860e-02,  8.9261e-02],
         [-1.1393e-02, -1.0725e-02, -1.4767e-02, -1.3558e-02,  5.5076e-29,
           9.0550e-28, -1.0136e-30, -7.2806e-23, -4.4835e-24,  2.1362e-29,
          -7.4439e-11, -6.6103e-05, -6.8057e-04,  2.2944e-03, -7.7589e-26,
           1.9716e-30, -2.1780e-29, -3.5707e-30,  2.3161e-30,  1.5452e-30,
          -1.6128e-02, -1.0009e-02, -1.5657e-02, -1.6093e-02, -4.1885e-25,
          -4.7154e-20,  2.1420e-18, -8.4909e-29, -6.8577e-15, -3.9747e-29],
         [-7.3259e-03, -5.0641e-03, -6.9831e-03, -4.0847e-03,  1.0926e-09,


## Avaliando o modelo

In [32]:
classificador1.eval()

Sequential(
  (0): Linear(in_features=30, out_features=16, bias=True)
  (1): ReLU()
  (2): Linear(in_features=16, out_features=16, bias=True)
  (3): ReLU()
  (4): Linear(in_features=16, out_features=1, bias=True)
  (5): Sigmoid()
)

In [33]:
y_pred = classificador1(X_test_torch)

In [34]:
y_pred.size()

torch.Size([114, 1])

In [35]:
y_pred = y_pred.detach().numpy()

In [36]:
y_pred = np.array(y_pred > 0.5)

In [37]:
y_pred

array([[False],
       [False],
       [False],
       [False],
       [False],
       [ True],
       [ True],
       [ True],
       [False],
       [False],
       [ True],
       [False],
       [ True],
       [False],
       [False],
       [ True],
       [ True],
       [False],
       [False],
       [False],
       [ True],
       [False],
       [False],
       [False],
       [ True],
       [False],
       [False],
       [False],
       [False],
       [False],
       [ True],
       [False],
       [False],
       [False],
       [False],
       [False],
       [False],
       [False],
       [False],
       [False],
       [False],
       [False],
       [False],
       [ True],
       [False],
       [False],
       [False],
       [False],
       [ True],
       [False],
       [False],
       [ True],
       [ True],
       [False],
       [ True],
       [False],
       [False],
       [False],
       [False],
       [False],
       [ True],
       [ True],
       [

In [39]:
from sklearn.metrics import accuracy_score
accuracy_score(y_test, y_pred)

0.956140350877193

## Modelo com POO

In [55]:
class NeuralNet(nn.Module):
    def __init__(self):
        super().__init__()

        self.layers = nn.Sequential(
            nn.Linear(in_features=30, out_features=16),
            nn.ReLU(),
            nn.Dropout(p=0.2),
            nn.Linear(in_features=16, out_features=16),
            nn.ReLU(),
            nn.Dropout(p=0.2),
            nn.Linear(16, 1),
            nn.Sigmoid()
        )

    def forward(self, X):
        return self.layers(X)

In [56]:
classificador2 = NeuralNet()

In [57]:
summary(classificador2)

Layer (type:depth-idx)                   Param #
NeuralNet                                --
├─Sequential: 1-1                        --
│    └─Linear: 2-1                       496
│    └─ReLU: 2-2                         --
│    └─Dropout: 2-3                      --
│    └─Linear: 2-4                       272
│    └─ReLU: 2-5                         --
│    └─Dropout: 2-6                      --
│    └─Linear: 2-7                       17
│    └─Sigmoid: 2-8                      --
Total params: 785
Trainable params: 785
Non-trainable params: 0

In [58]:
for epoch in range(epocas):
    running_loss = 0.

    for batch in train_loader:
        inputs, labels = batch
        optimizer.zero_grad()

        outputs = classificador2(X=inputs) #classificador1.foward(inputs)
        outputs = outputs.squeeze()
        loss = error_fn(outputs, labels)
        
        #backpropagation
        loss.backward()

        # Atualizar os pesos
        optimizer.step()

        # erro acumulado
        running_loss += loss.item()

    print(f'Época {epoch+1}: loss {running_loss/len(train_loader)}')

Época 1: loss 23.570593388875327
Época 2: loss 21.752505493164062
Época 3: loss 23.240208435058594
Época 4: loss 22.616341145833335
Época 5: loss 23.387231190999348
Época 6: loss 22.68624407450358
Época 7: loss 21.61809533437093
Época 8: loss 23.741385968526206
Época 9: loss 23.14932200113932
Época 10: loss 22.21875508626302
Época 11: loss 21.327874183654785
Época 12: loss 22.791946347554525
Época 13: loss 23.25377960205078
Época 14: loss 22.58102518717448
Época 15: loss 21.705133777626905
Época 16: loss 21.14740956624349
Época 17: loss 23.231599617004395
Época 18: loss 23.275274912516277
Época 19: loss 22.5814723332723
Época 20: loss 24.548191833496094
Época 21: loss 23.966707928975424
Época 22: loss 24.034313074747722
Época 23: loss 22.85200792948405
Época 24: loss 22.22448091506958
Época 25: loss 22.329011917114258
Época 26: loss 23.541843986511232
Época 27: loss 22.31293487548828
Época 28: loss 20.9810884475708
Época 29: loss 22.68082440694173
Época 30: loss 23.03684794108073
Época

In [59]:
classificador2.eval()

NeuralNet(
  (layers): Sequential(
    (0): Linear(in_features=30, out_features=16, bias=True)
    (1): ReLU()
    (2): Dropout(p=0.2, inplace=False)
    (3): Linear(in_features=16, out_features=16, bias=True)
    (4): ReLU()
    (5): Dropout(p=0.2, inplace=False)
    (6): Linear(in_features=16, out_features=1, bias=True)
    (7): Sigmoid()
  )
)

In [60]:
y_pred = classificador2(X_test_torch)

In [61]:
y_pred = y_pred.detach().numpy()

In [62]:
y_pred = y_pred > 0.5

In [63]:
accuracy_score(y_test, y_pred)

0.6666666666666666