In [1]:
import pandas as pd
import numpy as np
from sklearn.utils import shuffle
from torch.autograd import Variable
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils
import torch.nn.functional as F
import torch


In [2]:
df = pd.read_csv("datasets/breast-cancer-wisconsin.csv",sep=',')
df = shuffle(df)
train_df = df[:500]
test_df = df[500:]

In [3]:
def getDS(df):
    df1 = df.iloc[:,1:10]
    df2 = df.iloc[:,10]
    y = np.array(df2)
    y[y==4]=1 # malignant
    y[y==2]=0 # benign
    X = np.array(df1)
    X = X.astype(float)
    return X,y

In [4]:
train = getDS(train_df)
test = getDS(test_df)

In [5]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(9, 50)
        self.fc2 = nn.Linear(50, 2)
        self.scores = nn.Softmax()

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        x = self.scores(x)
        return x

In [6]:

class CancerDataset(Dataset):
    """CancerDataset."""

    def __init__(self, X, y):
        """
        Args:
            csv_file (string): Path to the csv file with annotations.
            root_dir (string): Directory with all the images.
            transform (callable, optional): Optional transform to be applied
                on a sample.
        """
        self.X = X
        self.y = y

    def __len__(self):
        return (self.X.shape[0])

    def __getitem__(self, idx):
        sample = {'X': self.X[idx], 'classes': self.y[idx]}
        return sample

In [7]:
train_ds = CancerDataset(train[0],train[1])
test_ds = CancerDataset(test[0],test[1])

In [8]:
train_dataloader = DataLoader(train_ds, batch_size=20,shuffle=True, num_workers=4)
test_dataloader = DataLoader(test_ds, batch_size=20,shuffle=True, num_workers=4)

In [9]:
def train_time(net, train_dataloader):
    criterion = nn.NLLLoss()
    optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
    net.train()
    
    for epoch in range(3):  # loop over the dataset multiple times

        running_loss = 0.0
        for i, data in enumerate(train_dataloader, 0):
            # get the inputs
            inputs, labels = data['X'], data['classes']
            # wrap them in Variable
            inputs, labels = Variable(inputs.float()), Variable(labels)

            # zero the parameter gradients
            optimizer.zero_grad()

            # forward + backward + optimize
            outputs = net(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            # print statistics
            running_loss += loss.data[0]
            if i % 2000 == 1999:    # print every 2000 mini-batches
                print('[%d, %5d] loss: %.3f' %
                      (epoch + 1, i + 1, running_loss / 2000))
                running_loss = 0.0

    print('Finished Training')
    return net

In [10]:
def test_time(net, test_dataloader):
    net.eval()
    test_loss = 0
    correct = 0
    total = 0
    criterion = nn.NLLLoss()
    for batch_idx, data in enumerate(test_dataloader):
        inputs, labels = data['X'], data['classes']
        inputs, targets = Variable(inputs.float()), Variable(labels)
        outputs = net(inputs)
        loss = criterion(outputs, targets)

        test_loss += loss.data[0]
        _, predicted = torch.max(outputs.data, 1)
        total += targets.size(0)
        correct += predicted.eq(targets.data).cpu().sum()
    print "test set Accuracy", correct/float(total)

In [11]:
tnet = Net()
tnet = train_time(tnet,train_dataloader)


Finished Training


In [12]:
test_time(tnet,test_dataloader)

test set Accuracy 0.868852459016


In [13]:
class BinaryLayer(nn.Linear):
    
    def forward(self, input_weights):
        out = super(BinaryLayer, self).forward(input_weights)
        return out

In [14]:
class BinaryNet(nn.Module):
    def __init__(self):
        super(BinaryNet, self).__init__()
        self.fc1 = BinaryLayer(9, 50)
        self.fc2 = BinaryLayer(50, 2)
        self.scores = nn.Softmax()

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        x = self.scores(x)
        return x

In [15]:
binet = BinaryNet()
binet = train_time(binet,train_dataloader)

Finished Training


In [16]:
test_time(binet,test_dataloader)

test set Accuracy 0.890710382514
