In [1]:
import pandas as pd
import numpy as np
import time
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset, DataLoader
from torch.utils.data.sampler import SubsetRandomSampler
from sklearn.preprocessing import LabelEncoder

In [24]:
# Hyper-parameters 
input_size = 30
hidden_size = 20
num_classes = 2
num_epochs = 10
batch_size = 100
learning_rate = 0.001

In [25]:
# Read in data
df = pd.read_csv("wdbc.data",header = None)

# Drop patients' ID column
df.drop(df.columns[[0]], axis=1, inplace=True)

# Change categorical labels to binary labels
categorical_features = [1]
df[1] = LabelEncoder().fit_transform(df[1])


In [26]:
# Make a class for breast cancer dataset
class BCData(Dataset):
    """Customized class that pre-processes breast cancer dataset"""
    def __init__(self, data):
        """
        Args:
            data: The breast cancer pandas dataframe 
        """
        self.main = data
        self.n = len(data)

    def __len__(self):
        return self.n

    def __getitem__(self, idx):
        features = self.main.iloc[idx,1:31].values
        features = features.astype('double')
 
        features = torch.from_numpy(features)
        
            
        label = self.main.iloc[idx,0]

        sample = (features,label)


        return sample

In [27]:
# Get the dataset through the class
dataset = BCData(df)

In [28]:
# Specify portion for splitting, shuffle data, set seed
percent_split = .2
shuffle_dataset = True
random_seed= 5

# Creating data indices for training and testing splits
dataset_size = len(dataset)
indices = list(range(dataset_size))
split = int(np.floor(percent_split * dataset_size))
if shuffle_dataset :
    np.random.seed(random_seed)
    np.random.shuffle(indices)
train_indices, val_indices = indices[split:], indices[:split]

# Creating PT data samplers and loaders
train_sampler = SubsetRandomSampler(train_indices)
test_sampler = SubsetRandomSampler(val_indices)

train_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, 
                                           sampler=train_sampler)
test_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size,
                                                sampler=test_sampler)

In [29]:
# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') #CUDA is a GPU


In [30]:
# Neural Network Class
class NeuralNet(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(NeuralNet, self).__init__()
        torch.manual_seed(10) # For reproducibility
        self.fc1 = nn.Linear(input_size, hidden_size) 
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, hidden_size) 
        self.relu = nn.ReLU()
        self.fc3 = nn.Linear(hidden_size, num_classes)  
    
    def forward(self, x):
        x = x.view(x.size(0), -1)
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)
        out = self.relu(out)
        out = self.fc3(out)
        return out

In [31]:
# Set model
model = NeuralNet(input_size, hidden_size, num_classes).double().to(device)

In [36]:
# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)  

# Set start time
start = time.time()


for epoch in range(num_epochs):
    train_loss, test_loss = [], []
    correct = 0
    total = 0
    # Train the model
    for i, (images, labels) in enumerate(train_loader):  
        # Move tensors to the configured device
        images = images.to(device)
        labels = labels.to(device=device, dtype=torch.int64)
        
        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)
        
        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        train_loss.append(loss.item())
    
    with torch.no_grad():
        
        # Predict the model
        for images, labels in test_loader:
            images = images.to(device)
            labels = labels.to(device=device, dtype=torch.int64)
            outputs = model(images)
            lose = criterion(outputs, labels)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            test_loss.append(lose.item())

        print ("Epoch:", epoch + 1, ", Training Loss: ", np.mean(train_loss), ", Test loss: ", np.mean(test_loss))

# Set end time         
end = time.time()  
print('Accuracy is: {} %'.format(100 * correct / total))
print('It takes ' +  str(end - start)  + ' seconds to run Feedforward Neural Network algorithm on breast cancer dataset.')

Epoch: 1 , Training Loss:  0.6845908738007255 , Test loss:  0.6750081612212686
Epoch: 2 , Training Loss:  0.6727100685713361 , Test loss:  0.6615619370427674
Epoch: 3 , Training Loss:  0.6615446190171506 , Test loss:  0.6512931140249234
Epoch: 4 , Training Loss:  0.6508305675936852 , Test loss:  0.641615979101956
Epoch: 5 , Training Loss:  0.6392863536993544 , Test loss:  0.6311188779016845
Epoch: 6 , Training Loss:  0.6260670234971829 , Test loss:  0.6030209288307431
Epoch: 7 , Training Loss:  0.6130074190977727 , Test loss:  0.5994383105527462
Epoch: 8 , Training Loss:  0.5977328445541845 , Test loss:  0.5697127597411669
Epoch: 9 , Training Loss:  0.5789165722592146 , Test loss:  0.5719134975230908
Epoch: 10 , Training Loss:  0.5598552279867224 , Test loss:  0.523098688448268
Accuracy is: 86.72566371681415 %
It takes 2.290900945663452 seconds to run Feedforward Neural Network algorithm on breast cancer dataset.
