<a href="https://colab.research.google.com/github/mahir1995/Malaria-Detection/blob/master/Malaria_Detection_using_PyTorch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Malaria Detection

## Download data

In [0]:
from google.colab import files
uploaded = files.upload()

!mkdir -p ~/.kaggle/ && mv kaggle.json ~/.kaggle/ && chmod 600 ~/.kaggle/kaggle.json

Saving kaggle.json to kaggle.json


In [0]:
!kaggle datasets download -d iarunava/cell-images-for-detecting-malaria

Downloading cell-images-for-detecting-malaria.zip to /content
 95% 321M/337M [00:05<00:00, 44.3MB/s]
100% 337M/337M [00:05<00:00, 60.1MB/s]


In [0]:
!unzip cell-images-for-detecting-malaria.zip

## Import dependencies

In [0]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import torch
from torch import nn, optim
from torchvision import transforms, datasets, models
from torch.utils.data import DataLoader
from torch.utils.data.sampler import SubsetRandomSampler

from pathlib import Path

## Load dataset

In [0]:
# Define transforms for training, testing and validation set
train_transforms = transforms.Compose([transforms.RandomRotation(30), 
                                      transforms.RandomResizedCrop(224), 
                                      transforms.RandomVerticalFlip(), 
                                      transforms.RandomHorizontalFlip(),
                                      transforms.ToTensor(),
                                      transforms.Normalize([0.485, 0.456, 0.406],
                                                           [0.229, 0.224, 0.225])])

test_transforms = transforms.Compose([transforms.Resize(256), 
                                      transforms.CenterCrop(224),
                                      transforms.ToTensor(),
                                      transforms.Normalize([0.485, 0.456, 0.406],
                                                           [0.229, 0.224, 0.225])])

test_transforms = transforms.Compose([transforms.Resize(256), 
                                      transforms.CenterCrop(224),
                                      transforms.ToTensor(),
                                      transforms.Normalize([0.485, 0.456, 0.406],
                                                           [0.229, 0.224, 0.225])])

In [0]:
path = Path('cell_images/')

In [0]:
!ls {path}

Parasitized  Uninfected


In [0]:
train_data = datasets.ImageFolder(path, transform=train_transforms)

In [0]:
train_data.classes

['Parasitized', 'Uninfected']

In [0]:
# Convert data to normalized float tensor
transform = transforms.Compose([transforms.ToTensor(),
                               transforms.Normalize([0.5, 0.5, 0.5],
                                                   [0.5, 0.5, 0.5])])

In [0]:
num_trains = len(train_data)
indices = list(range(num_trains))
np.random.shuffle(indices)

In [0]:
valid_split = int(np.floor(0.2 * num_trains))
test_split = int(np.floor((0.1 + 0.2) * num_trains))
valid_split, test_split

(5511, 8267)

In [0]:
valid_idx, test_idx, train_idx = indices[:valid_split], indices[valid_split:test_split], indices[test_split:]
len(valid_idx), len(test_idx), len(train_idx)

(5511, 2756, 19291)

In [0]:
train_sampler = SubsetRandomSampler(train_idx)
test_sampler = SubsetRandomSampler(test_idx)
valid_sampler = SubsetRandomSampler(valid_idx)

In [0]:
train_dl = DataLoader(train_data, batch_size=64, sampler=train_sampler)
valid_dl = DataLoader(train_data, batch_size=32, sampler=valid_sampler)
test_dl = DataLoader(train_data, batch_size=20, sampler=test_sampler)

## Train  model

In [0]:
model = models.resnet34(pretrained=True)

In [0]:
for param in model.parameters():
    param.requires_grad = False

In [0]:
model.fc = nn.Linear(512, 2, bias=False)

In [0]:
fc_parameters = model.fc.parameters()
for param in fc_parameters:
    param.requires_grad = True

In [0]:
model

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace)
      (conv2): Co

In [0]:
use_cuda = torch.cuda.is_available()
if use_cuda:
    model = model.cuda()

In [0]:
criterion = nn.CrossEntropyLoss()
opt = optim.SGD(model.fc.parameters(), lr=0.001, momentum=0.9)

In [0]:
def train(epochs, model, optimizer, criterion, use_cuda=True):
    
    valid_loss_min = np.Inf
    
    for epoch in range(1, epochs+1):
        
        train_loss = 0.0
        valid_loss = 0.0
        
        model.train()
        for batch_idx, (data, target) in enumerate(train_dl):
            if use_cuda:
                data, target = data.cuda(), target.cuda()
                
            optimizer.zero_grad()
            
            output = model(data)
            loss = criterion(output, target)
            loss.backward()
            
            optimizer.step()
            
            train_loss = train_loss + ((1 / (batch_idx + 1)) * loss.data - train_loss)
            
            if batch_idx % 100 == 0:
                print ('Epoch:', epoch, 'Batch:', batch_idx, 'loss:', train_loss)
                
        model.eval()
        for batch_idx, (data, target) in enumerate(valid_dl):
            if use_cuda:
                data, target = data.cuda(), target.cuda()
                
            output = model(data)
            loss = criterion(output, target)
            
            valid_loss = valid_loss + ((1 / (batch_idx + 1)) * (loss.data - valid_loss))
            
        print('Epoch:', epoch, 'Training loss:', train_loss, 'Valid loss:', valid_loss)
        
    return model

In [0]:
model = train(3, model, opt, criterion, use_cuda)

Epoch: 1 Batch: 0 loss: tensor(0.7783, device='cuda:0')
Epoch: 1 Batch: 100 loss: tensor(0.0044, device='cuda:0')
Epoch: 1 Batch: 200 loss: tensor(0.0019, device='cuda:0')
Epoch: 1 Batch: 300 loss: tensor(0.0013, device='cuda:0')
Epoch: 1 Training loss: tensor(0.0012, device='cuda:0') Valid loss: tensor(0.3732, device='cuda:0')
Epoch: 2 Batch: 0 loss: tensor(0.4941, device='cuda:0')
Epoch: 2 Batch: 100 loss: tensor(0.0034, device='cuda:0')
Epoch: 2 Batch: 200 loss: tensor(0.0015, device='cuda:0')
Epoch: 2 Batch: 300 loss: tensor(0.0015, device='cuda:0')
Epoch: 2 Training loss: tensor(0.0016, device='cuda:0') Valid loss: tensor(0.3429, device='cuda:0')
Epoch: 3 Batch: 0 loss: tensor(0.4222, device='cuda:0')
Epoch: 3 Batch: 100 loss: tensor(0.0026, device='cuda:0')
Epoch: 3 Batch: 200 loss: tensor(0.0018, device='cuda:0')
Epoch: 3 Batch: 300 loss: tensor(0.0011, device='cuda:0')
Epoch: 3 Training loss: tensor(0.0016, device='cuda:0') Valid loss: tensor(0.3576, device='cuda:0')


## Testing

In [0]:
def test(model, criterion, use_cuda):
    
    test_loss = 0
    correct = 0
    total = 0
    
    for batch_idx, (data, target) in enumerate(test_dl):
        if use_cuda:
            data, target = data.cuda(), target.cuda()
            
        output = model(data)
        loss = criterion(output, target)
        
        test_loss = test_loss + ((1 / (batch_idx + 1)) * (loss.data - test_loss))
        
        pred = output.data.max(1, keepdim=True)[1]
        
        correct = np.sum(np.squeeze(pred.eq(target.data.view_as(pred))).cpu().numpy())
        total += data.size(0)
        
    print('Test Loss:', test_loss)
    print('Test Acuracy:', 100 * correct / total)
    
test(model, criterion, use_cuda)

Test Loss: tensor(0.3383, device='cuda:0')
Test Acuracy: 0.5079825834542816
