# Project 2: Binary classification brest cancer with cross validation and dropout

# Step 1: Libraries

In [None]:
!pip install skorch

Collecting skorch
  Downloading skorch-0.15.0-py3-none-any.whl (239 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m239.3/239.3 kB[0m [31m3.4 MB/s[0m eta [36m0:00:00[0m


In [5]:
import pandas as pd
import numpy as np
import torch.nn as nn
from skorch import NeuralNetBinaryClassifier
import torch
from sklearn.model_selection import cross_val_score
import seaborn as sns

# Step 2

Database

In [3]:
np.random.seed(123)
torch.manual_seed(123)

<torch._C.Generator at 0x7b17f71ceeb0>

In [4]:
forecasters = pd.read_csv('/content/entradas_breast.csv')
labels = pd.read_csv('/content/saidas_breast.csv')

In [9]:
forecasters = np.array(forecasters, dtype='float32')
labels = np.array(labels, dtype='float32').squeeze(1)

In [10]:
labels.shape

(569,)

In [11]:
type(forecasters)

numpy.ndarray

In [12]:
type(labels)

numpy.ndarray

# Step 3

class for neural network structure

In [19]:
class classifier_torch(nn.Module):
  def __init__(self):
    super().__init__()

    # 30 -> 16 -> 16 -> 1
    self.dense0 = nn.Linear(30, 16)
    torch.nn.init.uniform_(self.dense0.weight)
    self.activation0 = nn.ReLU()

    self.dense1 = nn.Linear(16, 16)
    torch.nn.init.uniform_(self.dense1.weight)
    self.activation1 = nn.ReLU()

    self.dense2 = nn.Linear(16,1)
    torch.nn.init.uniform_(self.dense2.weight)
    self.output = nn.Sigmoid()

  def forward(self, X):
      # X -> input

    X = self.dense0(X)
    X = self.activation0(X)

    X = self.dense1(X)
    X = self.activation1(X)

    X = self.dense2(X)
    X = self.output(X)

    return X

# Step 4

Skorch

In [20]:
sklearn_classifier = NeuralNetBinaryClassifier(module=classifier_torch,
                                               criterion=torch.nn.BCELoss,
                                               optimizer=torch.optim.Adam,
                                               lr=0.001,
                                               optimizer__weight_decay=0.0001,
                                               max_epochs=100,
                                               batch_size=10,
                                               train_split=False)

# Step 5

Cross Validation

In [21]:
result = cross_val_score(sklearn_classifier, forecasters, labels, cv=10, scoring= 'accuracy' )

  epoch    train_loss     dur
-------  ------------  ------
      1       [36m37.1094[0m  0.1844
      2       37.1094  0.0974
      3       37.1094  0.0871
      4       37.1094  0.0828
      5       37.1094  0.0913
      6       37.1094  0.0876
      7       37.1094  0.0963
      8       37.1094  0.0991
      9       37.1094  0.0934
     10       37.1094  0.0922
     11       37.1094  0.0877
     12       37.1094  0.0890
     13       37.1094  0.0855
     14       37.1094  0.0838
     15       37.1094  0.0921
     16       37.1094  0.0838
     17       37.1094  0.0914
     18       37.1094  0.0921
     19       37.1094  0.1045
     20       37.1094  0.0846
     21       37.1094  0.0832
     22       37.1094  0.0864
     23       37.1094  0.0907
     24       37.1094  0.0932
     25       [36m12.2332[0m  0.0964
     26        [36m0.5179[0m  0.0851
     27        [36m0.5031[0m  0.0833
     28        [36m0.4697[0m  0.0867
     29        [36m0.4653[0m  0.0866
     30        

In [22]:
result

array([0.87719298, 0.80701754, 0.87719298, 0.63157895, 0.85964912,
       0.92982456, 0.85964912, 0.92982456, 0.89473684, 0.625     ])

In [23]:
mean_ = result.mean()
mean_

0.8291666666666666

In [24]:
std_ = result.std()
std_

0.10594829114735756

# Step 6

Dropout

In [25]:
class classifier_torch(nn.Module):
  def __init__(self):
    super().__init__()

    # 30 -> 16 -> 16 -> 1
    self.dense0 = nn.Linear(30, 16)
    torch.nn.init.uniform_(self.dense0.weight)
    self.activation0 = nn.ReLU()
    self.dropout0 = nn.Dropout(0.2)

    self.dense1 = nn.Linear(16, 16)
    torch.nn.init.uniform_(self.dense1.weight)
    self.activation1 = nn.ReLU()
    self.dropout1 = nn.Dropout(0.2)


    self.dense2 = nn.Linear(16,1)
    torch.nn.init.uniform_(self.dense2.weight)
    self.output = nn.Sigmoid()

  def forward(self, X):
      # X -> input

    X = self.dense0(X)
    X = self.activation0(X)
    X = self.dropout0(X)

    X = self.dense1(X)
    X = self.activation1(X)
    X = self.dropout1(X)

    X = self.dense2(X)
    X = self.output(X)

    return X

In [26]:
sklearn_classifier = NeuralNetBinaryClassifier(module=classifier_torch,
                                               criterion=torch.nn.BCELoss,
                                               optimizer=torch.optim.Adam,
                                               lr=0.001,
                                               optimizer__weight_decay=0.0001,
                                               max_epochs=100,
                                               batch_size=10,
                                               train_split=False)

In [27]:
result = cross_val_score(sklearn_classifier, forecasters, labels, cv=10, scoring= 'accuracy' )

  epoch    train_loss     dur
-------  ------------  ------
      1       [36m37.1094[0m  0.1984
      2       37.1094  0.0943
      3       37.1094  0.0953
      4       37.1094  0.0918
      5       37.1094  0.1011
      6       37.1094  0.1102
      7       37.1094  0.0994
      8       37.1094  0.0975
      9       37.1094  0.0979
     10       37.1094  0.0996
     11       37.1094  0.0899
     12       37.1094  0.0883
     13       37.1094  0.0946
     14       37.1094  0.0924
     15       37.1094  0.0949
     16       37.1094  0.0969
     17       37.1094  0.1145
     18       37.1094  0.0923
     19       37.1094  0.0919
     20       37.1094  0.0954
     21       37.1094  0.0976
     22       37.1094  0.0931
     23       37.1094  0.1025
     24       37.1094  0.0955
     25       37.1094  0.1017
     26       37.1094  0.0893
     27       37.1094  0.1139
     28       [36m12.1185[0m  0.0942
     29        [36m0.6426[0m  0.1156
     30        [36m0.5615[0m  0.0995
    

In [28]:
result

array([0.84210526, 0.80701754, 0.85964912, 0.96491228, 0.85964912,
       0.92982456, 0.63157895, 0.92982456, 0.89473684, 0.875     ])

In [30]:
mean_ = result.mean()
mean_

0.8594298245614034

In [31]:
std_ = result.std()
std_

0.08802823886220076