In [7]:
import torch
import pandas as pd
import numpy as np

In [8]:
train_perc = 0.8

In [9]:
X_and_Y = pd.read_csv('breast-cancer-wisconsin.data',
                                        delimiter=',',
                                        header=None,
                                        index_col=0)

X_and_Y.columns = ['Thickness', 'Uniformity of Cell Size', 'Uniformity of Cell Shape', 'Marginal Adhesion', 'Single Epithelial Cell Size', 'Bare Nuclei', 'Bland Chromatin', 'Normal Nucleoli', 'Mitoses', 'Class']

# Clean data by dropping values with missing data
X_and_Y = X_and_Y.replace("?", np.nan)
X_and_Y = X_and_Y.dropna()

# Get values from pandas
X_and_Y = X_and_Y.values

# Separate X and Y
np.random.shuffle(X_and_Y)   # Shuffle the data.
X = X_and_Y[:, 0:-1]         # First column to second last column: Features (numerical values)
Y = X_and_Y[:, -1]           # Last column: Labels (0 or 1)
print(X.shape, Y.shape)      # Check the shapes.

Y[Y == 2] = 0
Y[Y == 4] = 1


X_train_val = X[:int(train_perc*len(X))] # Get features from train + val set.
X_test      = X[int(train_perc*len(X)):] # Get features from test set.
Y_train_val = Y[:int(train_perc*len(Y))] # Get labels from train + val set.
Y_test      = Y[int(train_perc*len(Y)):] # Get labels from test set.
print(X_train_val.shape, X_test.shape, Y_train_val.shape, Y_test.shape)

X_test = X_test.astype(float)
X_test = torch.FloatTensor(X_test)
Y_test = Y_test.astype(float)
Y_test = torch.FloatTensor(Y_test)

X_train_val = X_train_val.astype(float)
X_train_val = torch.FloatTensor(X_train_val)
Y_train_val = Y_train_val.astype(float)
Y_train_val = torch.FloatTensor(Y_train_val)

(683, 9) (683,)
(546, 9) (137, 9) (546,) (137,)


In [111]:
class Feedforward(torch.nn.Module):
        def __init__(self, input_size, hidden_size):
            super(Feedforward, self).__init__()
            self.input_size = input_size
            self.hidden_size  = hidden_size
            self.fc1 = torch.nn.Linear(self.input_size, self.hidden_size)
            self.relu = torch.nn.ReLU()
            self.fc2 = torch.nn.Linear(self.hidden_size, 1)
            self.sigmoid = torch.nn.Sigmoid()
        def forward(self, x):
            hidden = self.fc1(x)
            relu = self.relu(hidden)
            output = self.fc2(relu)
            output = self.sigmoid(output)
            return output

In [124]:
model = Feedforward(X_train_val.shape[1], 10)
criterion = torch.nn.BCELoss()
optimizer = torch.optim.SGD(model.parameters(), lr = 0.1)

In [125]:
model.eval()
y_pred = model(X_test)
before_train = criterion(y_pred.squeeze(), Y_test)
print('Test loss before training' , before_train.item())

Test loss before training 0.6042898893356323


In [126]:
model.train()
epoch = 100
for epoch in range(epoch):
    optimizer.zero_grad()
    # Forward pass
    y_pred = model(X_train_val)
    # Compute Loss
    loss = criterion(y_pred.squeeze(), Y_train_val)
   
    print('Epoch {}: train loss: {}'.format(epoch, loss.item()))
    # Backward pass
    loss.backward()
    optimizer.step()

Epoch 0: train loss: 0.5988958477973938
Epoch 1: train loss: 0.5634580254554749
Epoch 2: train loss: 0.5363854765892029
Epoch 3: train loss: 0.5126324892044067
Epoch 4: train loss: 0.49141940474510193
Epoch 5: train loss: 0.4723828136920929
Epoch 6: train loss: 0.455081045627594
Epoch 7: train loss: 0.4396416246891022
Epoch 8: train loss: 0.4258510172367096
Epoch 9: train loss: 0.41326791048049927
Epoch 10: train loss: 0.4015505313873291
Epoch 11: train loss: 0.390625536441803
Epoch 12: train loss: 0.38046425580978394
Epoch 13: train loss: 0.3710257411003113
Epoch 14: train loss: 0.3621004521846771
Epoch 15: train loss: 0.35368120670318604
Epoch 16: train loss: 0.3457433879375458
Epoch 17: train loss: 0.3382556736469269
Epoch 18: train loss: 0.33116021752357483
Epoch 19: train loss: 0.3244476914405823
Epoch 20: train loss: 0.31809431314468384
Epoch 21: train loss: 0.31207722425460815
Epoch 22: train loss: 0.3063555359840393
Epoch 23: train loss: 0.3009171187877655
Epoch 24: train loss:

In [127]:
model.eval()
y_pred = model(X_test)
# after_train = criterion(y_pred.squeeze(), Y_test) 
# print('Test loss after Training' , after_train.item())
translated = [1.0 if x > 0.5 else 0.0 for x in y_pred]
print(translated)
print(Y_test)

correct = 0

for i, val in enumerate(translated):
    if val == Y_test[i]:
        correct += 1
    else:
        print("Index: {} is wrong. Expected: {} Output: {}".format(i, Y_test[i], val))
        
print("Accuracy: {}".format(correct / len(translated)))


[0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0]
tensor([0., 1., 1., 0., 1., 1., 1., 0., 1., 0., 0., 1., 0., 1., 0., 0., 0., 0.,
        0., 1., 1., 1., 0., 1., 0., 0., 1., 0., 1., 0., 0., 0., 0., 0., 1., 1.,
        0., 0., 0., 0., 0., 1., 0., 1., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0.,
        1., 0., 1., 0., 0., 1., 1., 1., 1., 0., 0., 1., 0., 1., 0., 0., 1.