In [None]:

from google.colab import drive
drive.mount('/content/drive')

In [None]:
!pwd

In [None]:
%cd /content/drive/My\ Drive/colab

In [None]:
!ls cell_images/

In [None]:
!pwd


In [None]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [None]:

import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
import os

import torch
from torch import nn, optim
from torchvision import transforms, datasets, models
from torch.utils.data.sampler import SubsetRandomSampler

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device


In [None]:
data_transforms = transforms.Compose([
    transforms.RandomResizedCrop(64),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])


In [None]:
data_dir = 'cell_images/'
data = datasets.ImageFolder(data_dir, transform=data_transforms)


In [None]:
valid_pct = 0.2
test_pct = 0.1

In [None]:
data_len = len(data)
data_indices = list(range(data_len))
np.random.shuffle(data_indices)

valid_split = int(np.floor((valid_pct) * data_len))
test_split = int(np.floor((valid_pct + test_pct) * data_len))

valid_idx, test_idx, train_idx = data_indices[:valid_split], data_indices[valid_split:test_split], data_indices[test_split:]

train_sampler = SubsetRandomSampler(train_idx)
valid_sampler = SubsetRandomSampler(valid_idx)
test_sampler = SubsetRandomSampler(test_idx)

dataloaders = {
    'train': torch.utils.data.DataLoader(data, batch_size=64, sampler=train_sampler),
    'valid': torch.utils.data.DataLoader(data, batch_size=32, sampler=valid_sampler),
    'test': torch.utils.data.DataLoader(data, batch_size=20, sampler=test_sampler)
}

print(len(valid_idx), len(test_idx), len(train_idx))

In [None]:
invTrans = transforms.Compose([ transforms.Normalize([ 0., 0., 0. ],[ 1/0.229, 1/0.224, 1/0.225 ]),
                                transforms.Normalize([ -0.485, -0.456, -0.406 ],[ 1., 1., 1. ]),
                               ])

def imshow(img):
    img = invTrans(img)
    img = img.numpy()
    plt.imshow(np.transpose(img, (1, 2, 0)))

In [None]:
classes=['infected','uninfected']

dataiter = iter(dataloaders['train'])
images, labels = dataiter.next()

fig = plt.figure(figsize=(6, 6))
#display 20 images
for idx in np.arange(9):
    ax = fig.add_subplot(3, 3, idx+1, xticks=[], yticks=[])
    imshow(images[idx])
    ax.set_title(classes[labels[idx]])
plt.savefig('data_vis.png')


In [None]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        
        self.layer1 = nn.Sequential(
            nn.Conv2d(3, 16, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.Conv2d(16, 16, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Conv2d(16, 32, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.Conv2d(32, 32, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2)
        )
        self.fc = nn.Sequential(
            nn.Linear(8*8*64, 500),
            nn.ReLU(),
            nn.Dropout(p=0.2),
            nn.Linear(500, 100),
            nn.ReLU(),
            nn.Dropout(p=0.2),
            nn.Linear(100, 2)
        )
    
    def forward(self, x):
        out = self.layer1(x)
        out = out.view(out.shape[0], -1)
        out = self.fc(out)
        return out


model_scratch = Net()
model_scratch.to(device)

In [None]:
criterion_scratch = nn.CrossEntropyLoss()

# optimizer_scratch = optim.SGD(model_scratch.parameters(), lr=0.001, momentum=0.9)
optimizer_scratch = optim.Adam(model_scratch.parameters(), lr=0.001)


In [None]:
def train(n_epochs, loaders, model, optimizer, criterion, device, save_path):
    """returns trained model"""
    # initialize tracker for minimum validation loss
    valid_loss_min = np.Inf 
    train_losses = []
    val_losses = []
    print('in')
    
    for epoch in range(1, n_epochs+1):
        # initialize variables to monitor training and validation loss
        train_loss = 0.0
        valid_loss = 0.0
        
        ###################
        # train the model #
        ###################
        model.train()
        for batch_idx, (data, target) in enumerate(loaders['train']):
            # move to GPU or CPU
            data, target = data.to(device), target.to(device)
            print(epoch)
            
            ## find the loss and update the model parameters accordingly
            ## record the average training loss as follows
            ## train_loss = train_loss + ((1 / (batch_idx + 1)) * (loss.data - train_loss))
            optimizer.zero_grad()
            output = model(data)
            loss = criterion(output, target)
            
            loss.backward()
            optimizer.step()
            
            train_loss += ((1 / (batch_idx + 1)) * (loss.data - train_loss))
            
        ######################    
        # validate the model #
        ######################
        model.eval()
        for batch_idx, (data, target) in enumerate(loaders['valid']):
            # move to GPU or CPU
            data, target = data.to(device), target.to(device)
            
            ## update the average validation loss
            output = model(data)
            loss = criterion(output, target)
            
            valid_loss += ((1 / (batch_idx + 1)) * (loss.data - valid_loss))
            
        # print training/validation statistics 
        print('Epoch: {} \tTraining Loss: {:.6f} \tValidation Loss: {:.6f}'.format(
            epoch, 
            train_loss,
            valid_loss
            ))
        
        train_losses.append(train_loss)
        val_losses.append(valid_loss)
        
        if valid_loss < valid_loss_min:
            print('Saving model..')
            valid_loss_min = valid_loss
            torch.save(model.state_dict(), save_path)
    
    # return trained model
    return model, train_losses, val_losses


In [None]:
model_scratch, train_losses, val_losses = train(20, dataloaders, model_scratch, optimizer_scratch, 
                                                criterion_scratch, device, 'models/model_scratch-img64.pt')

In [None]:
plt.plot(train_losses, label='Training loss')
plt.plot(val_losses, label='Validation loss')
plt.xlabel('Iterations')
plt.ylabel('Loss')
plt.legend()
_ = plt.ylim()
# plt.title('Training Validation Loss with CNN from scratch')
plt.savefig('Training Validation Loss with CNN from scratch.png')
plt.show()

In [None]:
model_scratch.load_state_dict(torch.load('models/model_scratch-img64.pt'))

In [None]:
def test(loaders, model, criterion, device):
    sigmoid = lambda x: 1 / (1 + np.exp(-x))
    
    # monitor test loss and accuracy
    test_loss = 0.
    correct = 0.
    total = 0.
    y = None
    y_hat = None
    
    model.eval()
    for batch_idx, (data, target) in enumerate(loaders['test']):
        # move to GPU or CPU
        data, target = data.to(device), target.to(device)
        
        # forward pass: compute predicted outputs by passing inputs to the model
        output = model(data)
        # calculate the loss
        loss = criterion(output, target)
        # update average test loss 
        test_loss = test_loss + ((1 / (batch_idx + 1)) * (loss.data - test_loss))
        # convert output probabilities to predicted class
        pred = output.data.max(1, keepdim=True)[1]
        
        if y is None:
            y = target.cpu().numpy()
            y_hat = pred.data.cpu().view_as(target).numpy()
        else:
            y = np.append(y, target.cpu().numpy())
            y_hat = np.append(y_hat, pred.data.cpu().view_as(target).numpy())
            
        # compare predictions to true label
        correct += np.sum(np.squeeze(pred.eq(target.data.view_as(pred))).cpu().numpy())
        total += data.size(0)
            
    print('Test Loss: {:.6f}\n'.format(test_loss))

    print('\nTest Accuracy: %2d%% (%2d/%2d)' % (
        100. * correct / total, correct, total))
    
    return y, y_hat

In [None]:
y, y_hat = test(dataloaders, model_scratch, criterion_scratch, device)

In [None]:
from sklearn.metrics import confusion_matrix

In [None]:
[[tn,fp],[fn,tp]]=confusion_matrix(y,y_hat)

In [None]:
precision=tp/(tp+fp)
print(precision)

In [None]:
recall=tp/(tp+fn)
print(recall)

In [None]:
f1_score=2*(precision*recall)/(precision+recall)

In [None]:
f1_score

In [None]:
cm=confusion_matrix(y,y_hat)

In [None]:
import seaborn as sns


In [None]:
c=sns.heatmap(cm, annot=True)
fig = c.get_figure()
fig.savefig("output.png")