# Pytorch Implementation With Breast Cancer Dataset

### Loading the libraries

In [None]:
import torch
import numpy as np
import pandas as pd
from torch.autograd import Variable
import torch.nn.functional as F
from sklearn.preprocessing import LabelBinarizer, LabelEncoder
import seaborn as sns
from sklearn.metrics import confusion_matrix, classification_report

### Loading the breast Cancer Dataset

In [None]:
data = pd.read_csv('../input/breast-cancer/breast-cancer.csv')
data.head(10)

# 'M' -> Maligant Tumor
# 'B' -> Benign Tumor

In [None]:
cols = data.columns
x_data = data[cols[2:-1]]
print(len(cols))
print(data.columns)

# There are 33 features available for classifying the tumor as 'M' / 'B'

In [None]:
# Converting the labels to One-Hot-Representation

y_data = data[cols[1]]
le = LabelEncoder() 
y_data = np.array(le.fit_transform(y_data))
y_data[:5], le.classes_

print(y_data.shape, x_data.values.shape)

# Wrapping the x_data and y_data with Variable

x_data = Variable(torch.from_numpy(x_data.values))
y_data = Variable(torch.from_numpy(y_data))

In [None]:
# Defining the model to be trained

class Model(torch.nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        self.l1 = torch.nn.Linear(30,16) # Input_size, hidden_size
        self.l2 = torch.nn.Linear(16,4) 
        self.l3 = torch.nn.Linear(4,1) # Hidden_Size, Output_Size
        
        self.sigmoid = torch.nn.Sigmoid() #Loss function at the last layer
        
    def forward(self, x):
        out1 = self.sigmoid(self.l1(x)) 
        out2 = self.sigmoid(self.l2(out1))
        y_pred = self.sigmoid(self.l3(out2))
        return y_pred
    
model = Model()

In [None]:
print(model)

In [None]:
criterion = torch.nn.BCELoss(reduction='sum') # Defining the loss function Binary CrossEntropy
optimizer = torch.optim.Adam(model.parameters(), lr=0.001) # Defining the optimizer Adam

for epochs in range(5000): # Number of Epochs to be run
    y_pred = model(x_data.float())
    loss = criterion(y_pred, y_data.view(-1,1).float())
    print('Epoch',epochs,'Loss:',loss.item(), '- Pred:', y_pred.data[0])
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()


In [None]:
# Just doing a sample predictions

model.double().forward(x_data.data[25]) > 0.5, y_data[25]

# Both are True (Maligant Tumor)[Prediction is perfect]

In [None]:
# Just doing a sample predictions

model.double().forward(x_data.data[55]) > 0.5, y_data[55]

# Both are False (Benign Tumor)[Prediction is perfect]

In [None]:
a = pred.numpy()
b = y_data.numpy()
pred.numpy().reshape(-1).shape, y_data.numpy().shape

In [None]:
# Printing the confusion matrix

c = confusion_matrix(a,b)
sns.heatmap(c, annot=True, xticklabels=le.classes_, yticklabels=le.classes_);

# By this matrix, it is found that there is '0' Misclassification from both the classes

In [None]:
# Classification report 

print(classification_report(a,b))

# All the scores such as recall, precision & F1 score is giving 100%