In [1]:
import torch
import pandas as pd
import numpy as np

In [2]:
train_perc = 0.8

In [3]:
X_and_Y = pd.read_csv('breast-cancer-wisconsin.data',
                                        delimiter=',',
                                        header=None,
                                        index_col=0)

X_and_Y.columns = ['Thickness', 'Uniformity of Cell Size', 'Uniformity of Cell Shape', 'Marginal Adhesion', 'Single Epithelial Cell Size', 'Bare Nuclei', 'Bland Chromatin', 'Normal Nucleoli', 'Mitoses', 'Class']

# Clean data by dropping values with missing data
X_and_Y = X_and_Y.replace("?", np.nan)
X_and_Y = X_and_Y.dropna()

# Get values from pandas
X_and_Y = X_and_Y.values

# Separate X and Y
np.random.shuffle(X_and_Y)   # Shuffle the data.
X = X_and_Y[:, 0:-1]         # First column to second last column: Features (numerical values)
Y = X_and_Y[:, -1]           # Last column: Labels (0 or 1)
print(X.shape, Y.shape)      # Check the shapes.

Y[Y == 2] = 0
Y[Y == 4] = 1


X_train_val = X[:int(train_perc*len(X))] # Get features from train + val set.
X_test      = X[int(train_perc*len(X)):] # Get features from test set.
Y_train_val = Y[:int(train_perc*len(Y))] # Get labels from train + val set.
Y_test      = Y[int(train_perc*len(Y)):] # Get labels from test set.
print(X_train_val.shape, X_test.shape, Y_train_val.shape, Y_test.shape)

X_test = X_test.astype(float)
X_test = torch.FloatTensor(X_test)
Y_test = Y_test.astype(float)
Y_test = torch.FloatTensor(Y_test)

X_train_val = X_train_val.astype(float)
X_train_val = torch.FloatTensor(X_train_val)
Y_train_val = Y_train_val.astype(float)
Y_train_val = torch.FloatTensor(Y_train_val)

(683, 9) (683,)
(546, 9) (137, 9) (546,) (137,)


In [4]:
class Feedforward(torch.nn.Module):
        def __init__(self, input_size, hidden_size):
            super(Feedforward, self).__init__()
            self.input_size = input_size
            self.hidden_size  = hidden_size
            self.fc1 = torch.nn.Linear(self.input_size, self.hidden_size)
            self.relu = torch.nn.ReLU()
            self.fc2 = torch.nn.Linear(self.hidden_size, 1)
            self.sigmoid = torch.nn.Sigmoid()
        def forward(self, x):
            hidden = self.fc1(x)
            relu = self.relu(hidden)
            output = self.fc2(relu)
            output = self.sigmoid(output)
            return output

In [5]:
model = Feedforward(X_train_val.shape[1], 10)
criterion = torch.nn.BCELoss()
optimizer = torch.optim.SGD(model.parameters(), lr = 0.001)

In [6]:
model.eval()
y_pred = model(X_test)
before_train = criterion(y_pred.squeeze(), Y_test)
print('Test loss before training' , before_train.item())

Test loss before training 0.5457474589347839


In [7]:
model.train()
epoch = 20
for epoch in range(epoch):
    optimizer.zero_grad()
    # Forward pass
    y_pred = model(X_train_val)
    # Compute Loss
    loss = criterion(y_pred.squeeze(), Y_train_val)
   
    print('Epoch {}: train loss: {}'.format(epoch, loss.item()))
    # Backward pass
    loss.backward()
    optimizer.step()

Epoch 0: train loss: 0.5786882638931274
Epoch 1: train loss: 0.5784894824028015
Epoch 2: train loss: 0.5782906413078308
Epoch 3: train loss: 0.5780943036079407
Epoch 4: train loss: 0.5778982639312744
Epoch 5: train loss: 0.5777022242546082
Epoch 6: train loss: 0.5775094628334045
Epoch 7: train loss: 0.5773164629936218
Epoch 8: train loss: 0.5771243572235107
Epoch 9: train loss: 0.5769334435462952
Epoch 10: train loss: 0.5767440795898438
Epoch 11: train loss: 0.5765547156333923
Epoch 12: train loss: 0.5763673186302185
Epoch 13: train loss: 0.5761801600456238
Epoch 14: train loss: 0.5759938955307007
Epoch 15: train loss: 0.5758078694343567
Epoch 16: train loss: 0.5756235718727112
Epoch 17: train loss: 0.5754392743110657
Epoch 18: train loss: 0.5752558708190918
Epoch 19: train loss: 0.5750730037689209


In [8]:
model.eval()
y_pred = model(X_test)
# after_train = criterion(y_pred.squeeze(), Y_test) 
# print('Test loss after Training' , after_train.item())
y_pred


tensor([[0.4737],
        [0.6274],
        [0.4767],
        [0.5160],
        [0.9473],
        [0.4350],
        [0.5156],
        [0.4721],
        [0.5272],
        [0.5267],
        [0.4790],
        [0.4746],
        [0.4866],
        [0.4299],
        [0.4036],
        [0.8519],
        [0.4697],
        [0.4796],
        [0.9606],
        [0.4487],
        [0.8999],
        [0.3800],
        [0.4932],
        [0.4790],
        [0.6632],
        [0.5246],
        [0.4924],
        [0.8900],
        [0.9219],
        [0.5289],
        [0.9089],
        [0.5880],
        [0.8299],
        [0.4628],
        [0.4746],
        [0.4628],
        [0.5231],
        [0.8068],
        [0.4796],
        [0.5456],
        [0.4772],
        [0.8326],
        [0.4912],
        [0.4956],
        [0.9284],
        [0.4956],
        [0.4059],
        [0.4956],
        [0.4651],
        [0.7857],
        [0.5026],
        [0.9127],
        [0.5383],
        [0.7607],
        [0.6905],
        [0