In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
data=pd.read_csv("/kaggle/input/breast-cancer-wisconsin-data/data.csv")
data

In [None]:
data[['diagnosis']].groupby('diagnosis').size()

In [None]:
data[data['diagnosis'] == 'M'].groupby(data['diagnosis']).count()


In [None]:
data[data['diagnosis'] != 'M'].groupby(data['diagnosis']).count()

In [None]:
data.drop(columns=['Unnamed: 32'])

In [None]:
data.drop(columns=['id'])

In [None]:
from sklearn import preprocessing
le = preprocessing.LabelEncoder()
data['diagnosis'] = le.fit_transform(data['diagnosis'])

In [None]:
data.head()

In [None]:
data_source=data.drop(columns=['Unnamed: 32','id'])

In [None]:
from sklearn.model_selection import train_test_split
trainingDataSet, evaluationDataSet = train_test_split(data_source, test_size = 0.2)
print(
    " Training data set : ",
     trainingDataSet.shape,
     "\n",
    "Evalutation data set : ",
    evaluationDataSet.shape
)

In [None]:
import torch
import torchvision
import numpy as np
import matplotlib.pyplot as plt
import torch.nn as nn
import torch.nn.functional as F
from torchvision.transforms import ToTensor
from torchvision.utils import make_grid
from torch.utils.data.dataloader import DataLoader
from torch.utils.data import random_split
import torch.utils.data as data_utils

In [None]:
train = data_utils.TensorDataset(torch.Tensor(np.array(trainingDataSet.drop(columns=['diagnosis']))), torch.Tensor(np.array(trainingDataSet['diagnosis'])))
train_loader = data_utils.DataLoader(train, batch_size = 150, shuffle = True)

In [None]:
test = data_utils.TensorDataset(torch.Tensor(np.array(evaluationDataSet.drop(columns=['diagnosis']))), torch.Tensor(np.array(evaluationDataSet['diagnosis'])))
test_loader = data_utils.DataLoader(test, batch_size = 150, shuffle = True)

In [None]:
for batch in train_loader:
    images, labels = batch 
    out = images                  # Generate predictions
print(out)

In [None]:
class CancerClassificationBase(nn.Module):
    def training_step(self, batch):
        images, labels = batch 
        out = self(images)                  # Generate predictions
        labels=labels.view(len(labels),1)
        loss = F.binary_cross_entropy(out, labels)   # Calculate loss
        acc = accuracy(out, labels)  
        #return {'val_loss': loss.detach(), 'val_acc': acc}
        return loss
    
    def validation_step(self, batch):
        images, labels = batch 
        out = self(images)     # Generate predictions
        labels=labels.view(len(labels),1)
        loss = F.binary_cross_entropy(out, labels)   # Calculate loss
        acc = accuracy(out, labels)           # Calculate accuracy
        return {'val_loss': loss.detach(), 'val_acc': acc}
        
    def validation_epoch_end(self, outputs):
        batch_losses = [x['val_loss'] for x in outputs]
        epoch_loss = torch.stack(batch_losses).mean()   # Combine losses
        batch_accs = [x['val_acc'] for x in outputs]
        epoch_acc = torch.stack(batch_accs).mean()      # Combine accuracies
        return {'val_loss': epoch_loss.item(), 'val_acc': epoch_acc.item()}
    
    def epoch_end(self, epoch, result):
        print("Epoch [{}], val_loss: {:.4f}, val_acc: {:.4f}".format(epoch, result['val_loss'], result['val_acc']))

In [None]:
def accuracy(outputs, labels):
    _, preds = torch.max(outputs, dim=1)
    return torch.tensor(torch.sum(preds == labels).item() / len(preds))
def evaluate(model, val_loader):
    outputs = [model.validation_step(batch) for batch in val_loader]
    return model.validation_epoch_end(outputs)

In [None]:
#SGD Plain Vannila
def fit(epochs, lr, model, train_loader, val_loader, opt_func=torch.optim.SGD):
    history = []
    optimizer = opt_func(model.parameters(), lr)
    for epoch in range(epochs):
        # Training Phase 
        for batch in train_loader:
            loss = model.training_step(batch)
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()
        # Validation phase
        result = evaluate(model, val_loader)
        model.epoch_end(epoch, result)
        history.append(result)
    return history

In [None]:
def plot_losses(train_loss,test_loss):
    losses_test = [x['val_loss'] for x in test_loss]
    losses_train = [x['val_loss'] for x in train_loss]
    plt.plot(losses_test, '-x',label='Test Loss')
    plt.plot(losses_train, '--',label='Training Loss')
    plt.legend(["Test Loss", "Training Loss"], loc ="lower right") 
    plt.xlabel('epoch')
    plt.ylabel('loss')
    plt.title('Loss vs. No. of epochs');


In [None]:
def plot_accuracies(test_accuracy,train_accuracy):
    accuracy_test = [x['val_acc'] for x in test_accuracy]
    accuracy_train = [x['val_acc'] for x in train_accuracy]
    plt.plot(accuracy_test, '-x',label='Test Accuracy')
    plt.plot(accuracy_train, '--',label='Training Accuracy')
    plt.legend(["Test Accuracy", "Training Accuracy"], loc ="lower right") 
    plt.xlabel('epoch')
    plt.ylabel('Accuracy')
    plt.title('Accuracy vs. No. of epochs');

In [None]:
#Actual Model
class BreastCancerIndentification(CancerClassificationBase):
    def __init__(self):
        super().__init__()
        self.linear1 = nn.Linear(30, 60)
        self.linear2 = nn.Linear(60, 30)
        self.linear3 = nn.Linear(30, 1)
        #            layers.Dense(1, activation="sigmoid")(x)
    def forward(self, xb):
        # Max pooling over a (2, 2) window
        #print(self.num_flat_features(xb))
        xb = F.relu(self.linear1(xb))
        xb = F.relu(self.linear2(xb))
        xb = F.sigmoid(self.linear3(xb))
        return xb

In [None]:
model = BreastCancerIndentification()

In [None]:
model1=BreastCancerIndentification()

In [None]:
evaluate(model, test_loader)

In [None]:
history = [evaluate(model, test_loader)]
history_train=[evaluate(model1, train_loader)]

In [None]:
history += fit(100, 1e-1, model, train_loader, test_loader)
history_train += fit(100, 1e-1, model1, train_loader, train_loader)
#history += fit(10, 1e-2, model, train_loader, test_loader)
#history_train += fit(10, 1e-2, model1, train_loader, train_loader)
#history += fit(10, 1e-3, model, train_loader, test_loader)
#history_train += fit(10, 1e-3, model1, train_loader, train_loader)


In [None]:
evaluate(model, test_loader)

In [None]:
evaluate(model1, test_loader)

In [None]:
plot_losses(history,history_train)

In [None]:
plot_accuracies(history,history_train)

In [None]:
Y_Probability=[]
Y_Actual=[]
for batch in test_loader:
   img,label=batch
   try:
    outputs2=model(img)
    if len(Y_Probability)==0:
        _,pred = torch.max(outputs2, 1)
        Y_Probability=pred.detach().numpy()
        Y_Actual=label.detach().numpy()
    else:
       if len(outputs2.detach().numpy()[:,-1])==len(label.detach().numpy()):
           _,pred = torch.max(outputs2, 1)
           pred=pred.detach().numpy()
           Y_Actual=np.append(Y_Actual,label.detach().numpy())        #print('hihi')
           Y_Probability=np.append(Y_Probability, pred)
   except:
       print('skipped')
        
from sklearn.metrics import classification_report
print(classification_report(Y_Actual, Y_Probability))

In [None]:
#SGD with momentum
def fit(epochs, lr, model, train_loader, val_loader, opt_func=torch.optim.SGD):
    history = []
    optimizer = opt_func(model.parameters(), lr, momentum=0.9)
    for epoch in range(epochs):
        # Training Phase 
        for batch in train_loader:
            loss = model.training_step(batch)
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()
        # Validation phase
        result = evaluate(model, val_loader)
        model.epoch_end(epoch, result)
        history.append(result)
    return history

In [None]:
model = BreastCancerIndentification()
model1=BreastCancerIndentification()

history = [evaluate(model, test_loader)]
history_train=[evaluate(model1, train_loader)]
history += fit(50, 1e-1, model, train_loader, test_loader)
history_train += fit(50, 1e-1, model1, train_loader, train_loader)

In [None]:
Y_Probability=[]
Y_Actual=[]
for batch in test_loader:
   img,label=batch
   try:
    outputs2=model(img)
    if len(Y_Probability)==0:
        _,pred = torch.max(outputs2, 1)
        Y_Probability=pred.detach().numpy()
        Y_Actual=label.detach().numpy()
    else:
       if len(outputs2.detach().numpy()[:,-1])==len(label.detach().numpy()):
           _,pred = torch.max(outputs2, 1)
           pred=pred.detach().numpy()
           Y_Actual=np.append(Y_Actual,label.detach().numpy())        #print('hihi')
           Y_Probability=np.append(Y_Probability, pred)
   except:
       print('skipped')
        
from sklearn.metrics import classification_report
print(classification_report(Y_Actual, Y_Probability))

In [None]:
#Adam Vannila
def fit(epochs, lr, model, train_loader, val_loader, opt_func=torch.optim.Adam):
    history = []
    optimizer = opt_func(model.parameters(), lr)
    for epoch in range(epochs):
        # Training Phase 
        for batch in train_loader:
            loss = model.training_step(batch)
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()
        # Validation phase
        result = evaluate(model, val_loader)
        model.epoch_end(epoch, result)
        history.append(result)
    return history

In [None]:
model = BreastCancerIndentification()
model1=BreastCancerIndentification()

history = [evaluate(model, test_loader)]
history_train=[evaluate(model1, train_loader)]
history += fit(50, 1e-1, model, train_loader, test_loader)
history_train += fit(50, 1e-1, model1, train_loader, train_loader)

In [None]:
Y_Probability=[]
Y_Actual=[]
for batch in test_loader:
   img,label=batch
   try:
    outputs2=model(img)
    if len(Y_Probability)==0:
        _,pred = torch.max(outputs2, 1)
        Y_Probability=pred.detach().numpy()
        Y_Actual=label.detach().numpy()
    else:
       if len(outputs2.detach().numpy()[:,-1])==len(label.detach().numpy()):
           _,pred = torch.max(outputs2, 1)
           pred=pred.detach().numpy()
           Y_Actual=np.append(Y_Actual,label.detach().numpy())        #print('hihi')
           Y_Probability=np.append(Y_Probability, pred)
   except:
       print('skipped')
        
from sklearn.metrics import classification_report
print(classification_report(Y_Actual, Y_Probability))

In [None]:
#RNN Model
class BreastCancerIndentification(CancerClassificationBase):
    def __init__(self):
        super().__init__()
        self.hidden_dim = 60
        self.n_layers = 1
        self.rnn = nn.RNN(30, self.hidden_dim, self.n_layers, batch_first=True)   
        # Fully connected layer
        self.fc = nn.Linear(60, 1)

    def forward(self, xb):
        # Max pooling over a (2, 2) window
        #print(self.num_flat_features(xb))
        hidden = self.init_hidden(batch_size=len(xb))
        # Passing in the input and hidden state into the model and obtaining outputs
        xb=xb.resize_((len(xb),1,30))
        #print(xb.size())
        out, hidden = self.rnn(xb, hidden)
        #print(out.size())
        # Reshaping the outputs such that it can be fit into the fully connected layer
        out = out.contiguous().view(-1, self.hidden_dim)
        out = F.sigmoid(self.fc(out))
        return out
    
    def init_hidden(self, batch_size):
        # This method generates the first hidden state of zeros which we'll use in the forward pass
        # We'll send the tensor holding the hidden state to the device we specified earlier as well
        hidden = torch.zeros(self.n_layers, batch_size, self.hidden_dim)
        return hidden
        

In [None]:
#SGD Plain Vannila
def fit(epochs, lr, model, train_loader, val_loader, opt_func=torch.optim.SGD):
    history = []
    optimizer = opt_func(model.parameters(), lr)
    for epoch in range(epochs):
        # Training Phase 
        for batch in train_loader:
            loss = model.training_step(batch)
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()
        # Validation phase
        result = evaluate(model, val_loader)
        model.epoch_end(epoch, result)
        history.append(result)
    return history

In [None]:
model = BreastCancerIndentification()
model1=BreastCancerIndentification()

In [None]:
history = [evaluate(model, test_loader)]
history += fit(50, 1e-2, model, train_loader, test_loader)

In [None]:
history_train=[evaluate(model1, train_loader)]
history_train += fit(50, 1e-2, model1, train_loader, train_loader)

In [None]:
plot_losses(history,history_train)

In [None]:
plot_accuracies(history,history_train)

In [None]:
Y_Probability=[]
Y_Actual=[]
for batch in test_loader:
   img,label=batch
   try:
    outputs2=model(img)
    if len(Y_Probability)==0:
        _,pred = torch.max(outputs2, 1)
        Y_Probability=pred.detach().numpy()
        Y_Actual=label.detach().numpy()
    else:
       if len(outputs2.detach().numpy()[:,-1])==len(label.detach().numpy()):
           _,pred = torch.max(outputs2, 1)
           pred=pred.detach().numpy()
           Y_Actual=np.append(Y_Actual,label.detach().numpy())        #print('hihi')
           Y_Probability=np.append(Y_Probability, pred)
   except:
       print('skipped')
        
from sklearn.metrics import classification_report
print(classification_report(Y_Actual, Y_Probability))