# Blood Cell Classification
Subtypes : Eosinophil, Lymphocyte, Monocyte and Neutrophil

## Importing Modules

In [None]:
import math
import torch
import torch.nn as nn
import torch.nn.functional as F

from torch.utils.data import Dataset, DataLoader
import torchvision
import torchvision.transforms as transforms
import os
import torch.optim as optim
import torch.backends.cudnn as cudnn
from torch.autograd.variable import Variable
import pandas as pd
import cv2

%matplotlib inline
%pylab inline

## Organize

In [None]:
def pplus(*things):
    sentence = ""
    for i in things:
        if not isinstance(i, str):
            i  =  str(i)
        sentence += " " +i
    print('------------------------------------')
    print(sentence)
    print('------------------------------------')

In [None]:
DIR = "../input/dataset2-master/dataset2-master/images/"
TEST = "../input/dataset2-master/dataset2-master/images/TEST/"
TRAIN = "../input/dataset2-master/dataset2-master/images/TRAIN/"

In [None]:
batch_size = 128
lr = 0.0002

## Inspecting

In [None]:
pplus("Top dir:", os.listdir("../input"))
pplus("dataset2 dir:", os.listdir(DIR))
pplus("TEST_SIMPLE:", os.listdir(DIR+"TEST_SIMPLE"))
pplus('samples in one TS dir:', len(os.listdir(DIR+"TEST_SIMPLE/MONOCYTE")))
pplus('samples in another TS dir:', len(os.listdir(DIR+"TEST_SIMPLE/EOSINOPHIL")))
print('Looks like Class imbalance, Plot a histogram to get proper estimate')
pplus(len(os.listdir(TEST+"MONOCYTE")))
pplus(len(os.listdir(TEST+"EOSINOPHIL")))
print('Looks like augmented set doesnot have any imbalance')


In [None]:
mappings = dict(zip(['NEUTROPHIL', 'EOSINOPHIL', 'MONOCYTE','LYMPHOCYTE'],list(range(0,4))))
print(mappings)

In [None]:
labels = pd.read_csv('../input/dataset2-master/dataset2-master/labels.csv')
pplus(labels.info())

In [None]:
pplus(labels.head())
pplus(labels.tail())

## Plotting a Sample of each

In [None]:
plt.figure(figsize=(16,16))

def plot_image(location, subplot):
    title = location.split('/')[0]
    location = TRAIN+location
    plt.subplot(subplot)
    plt.title(title)
    plt.axis('off')
    plt.imshow(cv2.imread(location))
    return


In [None]:
loc = []
loc.append('EOSINOPHIL/_0_207.jpeg')
loc.append('LYMPHOCYTE/_0_204.jpeg')
loc.append('MONOCYTE/_0_180.jpeg')
loc.append('NEUTROPHIL/_0_292.jpeg')

subplot = 221
for i in loc:
    plot_image(i, subplot)
    subplot += 1

In [None]:
from tqdm import tqdm
def get_data(folder):
    im = []
    labels = []
    for subtype in os.listdir(folder):
        if not subtype.startswith('.'):
            label = mappings[subtype]
        for img_name in tqdm(os.listdir(folder+subtype)):
            im.append(cv2.resize(cv2.imread(folder+subtype+'/'+img_name), (64,64)))
            labels.append(label)
    return np.asarray(im), np.asarray(labels)


In [None]:
train_im, train_labels = get_data(TRAIN)
test_im, test_labels = get_data(TEST)

## Create Custom Dataset Class

In [None]:
class BloodDataset(Dataset):
    def __init__(self, images, labels, transform=None):
        self.images = images
        self.labels = labels
        self.transform = transform
        
    def __len__(self):
        return len(self.labels)
    
    def __getitem__(self, idx):
        if self.transform:
            return self.transform(self.images[idx]), self.labels[idx]
        return self.images[idx], self.labels[idx]
        

In [None]:

tfms = transforms.Compose([transforms.ToTensor()])
train = BloodDataset(train_im, train_labels, transform=tfms)


mean = tuple((train_im.mean(axis=(0,1,2))/255).round(4))
std = tuple((train_im.std(axis=(0,1,2))/255).round(4))

tfms = transforms.Compose([transforms.ToTensor(),
                           transforms.Normalize(mean, std)])

train = BloodDataset(train_im, train_labels, transform=tfms)

pplus('Mean Values:',mean)
pplus('Std Dev',std)


In [None]:
tfms = transforms.Compose([transforms.ToTensor()])
test = BloodDataset(test_im, test_labels, transform=tfms)

mean = tuple((test_im.std(axis=(0,1,2))/255).round(4))
std = tuple((test_im.std(axis=(0,1,2))/255).round(4))

tfms = transforms.Compose([transforms.ToTensor(),
                            transforms.Normalize(mean, std)])
test = BloodDataset(test_im, test_labels, transform=tfms)

pplus('Mean Values:', mean)
pplus('Std Dev', std)

In [None]:
# Only available in pytorch 0.4.1
# valid, training = torch.utils.data.random_split(train, len(train_labels)//10, len(train_labels)-len(train_labels)//10 )

In [None]:
train.__getitem__(3)[0]

In [None]:
trainloader = DataLoader(train, batch_size=batch_size, shuffle=True)
testloader = DataLoader(test, batch_size=64, shuffle=False)

## Perfect! All looks good, now let's define our network
- Let's call it depth seperable wide resnet.

In [None]:


class Network(nn.Module):
    def __init__(self):
        super().__init__()
        self.layer0 = self.conv(3,32)
        self.block1 = Block(32)
        self.block2 = Block(64)
        self.block3 = Block(128)
        self.avgpool = nn.AdaptiveAvgPool2d(1)
        self.fc = nn.Linear(256, 4)

    def conv(self, in_, out_): 
        return nn.Sequential(
            nn.Conv2d(in_, out_, 3, 1, 1, bias=False),
            nn.ReLU(inplace=True),
            nn.BatchNorm2d(out_))
        
    def forward(self, x):
        x = self.layer0(x)
        x = self.block1(x)
        x = self.block2(x)
        x = self.block3(x)
        x = self.avgpool(x)
        x = x.view(-1, 256)
        x = self.fc(x)
        return x
# ----------------------------------------------------------------------------------------#
class Block(nn.Module):
    def __init__(self, in_):
        super().__init__()
        self.layer1 = self.conv_ds(in_, in_*2)
        self.layer2 = self.conv_ds(in_*2, in_*2)
        self.layer2_1 = self.conv_bn(in_*2, in_*2)
        
        self.layer1_1x1 = self.one_by_one(in_, in_*2)

    def one_by_one(self, in_, out_):
        return nn.Sequential(
            nn.Conv2d(in_, out_, 1, 1, bias=False),
            nn.ReLU(inplace=True),
            nn.BatchNorm2d(out_)
        )
    def conv_bn(self, in_, out_):
        return nn.Sequential(
            nn.Conv2d(in_, out_, 3, 2, 1, bias=False),
            nn.ReLU(inplace=True),
            nn.BatchNorm2d(out_))
    
    def conv_ds(self, in_, out_): 
        mid = round(in_ * 3/2)
        return nn.Sequential(
            nn.Conv2d(in_, mid, 1, 1, bias=False),
            nn.ReLU(inplace=True),
            nn.BatchNorm2d(mid),
            nn.Conv2d(mid, mid, 3, 1, 1, groups=mid, bias=False),
            nn.ReLU(inplace=True),
            nn.BatchNorm2d(mid),
            nn.Conv2d(mid, out_, 1, 1, bias=False),
            nn.ReLU(inplace=True),
            nn.BatchNorm2d(out_))
    def forward(self, x):
        x = self.layer1(x) + self.layer1_1x1(x)
        x = self.layer2(x) + x
        x = self.layer2_1(x)
        return x

In [None]:
print(Network())
if not torch.cuda.is_available(): pplus('Turn ON your GPU')
net = Network()
net = net.cuda()

## Create a Training Regime

In [None]:
# optimizer = optim.SGD(net.parameters(), lr=lr, momentum=0.85, weight_decay=1e-4)
optimizer = optim.Adam(net.parameters(), lr=lr, betas=(0.5, 0.999))
criterion = nn.CrossEntropyLoss()

In [None]:
def train(epoch):
    net.train()
    train_loss = 0
    counter = 0
    accuracy = 0
    for batch_idx, (data, targets) in enumerate(trainloader):
        data, targets = Variable(data).cuda(), Variable(targets).cuda()
        optimizer.zero_grad()
        outputs = net(data)
        loss = criterion(outputs, targets)
        train_loss += loss.data.cpu().numpy().round(5)
        counter += 1
        loss.backward()
        optimizer.step()
        
#         ticks = np.zeros(targets.size(0))
#         ticks[outputs.data.cpu().numpy().argmax(1)[1] == targets.data.cpu().numpy()] = 1
#         accuracy = ticks.sum()/len(target)

        _, predicted = torch.max(outputs.data,1)
        accuracy = (predicted == targets).sum()
        
        if batch_idx%10 == 0:
            pplus('Train Epoch:', epoch, batch_idx*len(data),'/',len(trainloader.dataset),
                  '|\n Train Loss:',loss.data.cpu().numpy().round(5),
                  '|\n Accuracy:', (accuracy*100/labels.data.size(0).round(5)
                 )
    return train_loss/counter

In [None]:
train_stack = []

In [None]:

for i in range(200):
    train_stack.append(train(i))
    plt.plot(train_stack)
    plt.xlabel('Epochs --->')
    plt.ylabel('Loss --->')
    plt.title('Train Loss')
    plt.show()
#     test_stack.append(test(i))

In [None]:
plt.plot(train_stack)
plt.xlabel('Epochs --->')
plt.ylabel('Loss --->')
plt.title('Train Loss')
plt.show()

In [None]:
maza = {'train_loss': train_stack}
df = pd.DataFrame(maza)

In [None]:
df.to_csv('log.csv')

In [None]:
print(train_stack)