In [None]:
import sys

root = '/kaggle/'
sys.path.append(root)

'''
Import Libraries
'''

import os
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torchvision import transforms
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
import torchvision.models as models
from torch.utils.tensorboard import SummaryWriter
import albumentations as A
from albumentations.pytorch import ToTensorV2
from PIL import Image

# from einops import rearrange, repeat
# from einops.layers.torch import Rearrange

import matplotlib.pyplot as plt

import time
import copy


In [None]:
''' 
Dataset Class
'''

class CSVDataset(Dataset):
    def __init__(self, annotations_df, img_dir, transform=None, target_transform=None, aug=True):
        self.img_labels = annotations_df
        self.img_dir = img_dir
        self.transform = transform
        self.target_transform = target_transform
        self.aug = aug

    def __len__(self):
        return len(self.img_labels)

    def __getitem__(self, idx):
        img_path = os.path.join(self.img_dir, self.img_labels.iloc[idx, 0])
        image = Image.open(img_path).convert('RGB')
        #label = self.img_labels.iloc[idx, 1]
        if self.transform:
            if self.aug:
                image = np.array(image)
                image = self.transform(image=image)
                image = image['image']
            else:
                image = self.transform(image)

#         if self.target_transform:
#             label = self.target_transform(label)
        sample = {"image": image}
        return sample


In [None]:
'''
Resnet
'''

# Resnet
class ResNet(nn.Module):
    def __init__(self, layers, dropout=0.0):
        super(ResNet, self).__init__()
        self.inplanes = 64
        self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=7, padding=3, stride=2, bias=False)  ## this is stride2 in the original implementation
        self.bn1 = nn.BatchNorm2d(self.inplanes)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.layer1 = self.make_layer(64, layers[0])
        self.layer2 = self.make_layer(128, layers[1], stride=2)
        self.layer3 = self.make_layer(256, layers[2], stride=2)
        self.layer4 = self.make_layer(512, layers[3], stride=2)
        self.avgpool = nn.AdaptiveAvgPool2d((1,1))
        self.fc = nn.Linear(512, 10)
        self.dropout = nn.Dropout(dropout) if dropout > 0.0 else None



    def make_layer(self, planes, blocks, stride=1):
        downsample = None
        if stride != 1:
            downsample = nn.Sequential(
                nn.Conv2d(self.inplanes, planes, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(planes)
            )

        layers = []
        layers.append(ResBlock(self.inplanes, planes, stride, downsample))
        self.inplanes = planes
        for _ in range(1, blocks):
            layers.append(ResBlock(self.inplanes, planes))

        return nn.Sequential(*layers)

    def forward(self, x):
        out = self.relu(self.bn1(self.conv1(x)))
        out = self.maxpool(out)
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = self.avgpool(out)
        out = torch.flatten(out, 1)
        out = self.fc(out)
        if self.dropout is not None:
            out = self.dropout(out)

        return out


class ResBlock(nn.Module):
    def __init__(self, inplanes, planes, stride=1, downsample=None):
        super().__init__()
        self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)
        self.downsample = downsample
    
    def forward(self, x):
        identity = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        out = self.conv2(out)
        out = self.bn2(out)

        if self.downsample is not None:
            identity = self.downsample(x)

        out += identity
        out = self.relu(out)

        return out

class MobileNetV2(nn.Module):
    def __init__(self, width_mult=1.0, dropout=0.0):
        super(MobileNetV2, self).__init__()
        inverted_residual_setting = [
            [1, 16, 1, 1],
            [6, 24, 2, 2],
            [6, 32, 3, 2],
            [6, 64, 4, 2],
            [6, 96, 3, 1],
            [6, 160, 3, 2],
            [6, 320, 1, 1]
        ]

        input_channel = 32
        last_channel = 1280
        
        input_channel = _make_divisible(input_channel * width_mult, 8)
        last_channel = _make_divisible(last_channel * max(1.0, width_mult) * width_mult, 8)
        features = [ConvBNReLU(3, input_channel, stride=2)]

        for t,c,n,s in inverted_residual_setting:
            output_channel = _make_divisible(c * width_mult, 8)
            for i in range(n):
                stride = s if i == 0 else 1
                features.append(InvertedResidual(input_channel, output_channel, stride, expand_ratio=t))
                input_channel = output_channel

        features.append(ConvBNReLU(input_channel, last_channel, kernel_size=1))
        self.features = nn.Sequential(*features)

        self.classifier = nn.Sequential(
            nn.Dropout(dropout),
            nn.Linear(last_channel, 10)
        )

    def forward(self, x):
        out = self.features(x)
        out = F.adaptive_avg_pool2d(out, (1,1))
        out = torch.flatten(out, 1)
        out = self.classifier(out)
        return out

class InvertedResidual(nn.Module):
    def __init__(self, in_planes, out_planes, stride, expand_ratio):
        super().__init__()
        hidden_dim = int(round(in_planes * expand_ratio))
        self.use_res_connect = stride == 1 and in_planes == out_planes

        layers = []  # depthwise separable convolution with bottleneck
        if expand_ratio != 1:
            layers.append(ConvBNReLU(in_planes, hidden_dim, kernel_size=1))
        layers.extend([
            ConvBNReLU(hidden_dim, hidden_dim, stride=stride, groups=hidden_dim),
            nn.Conv2d(hidden_dim, out_planes, kernel_size=1, bias=False),
            nn.BatchNorm2d(out_planes)
        ])
        self.conv = nn.Sequential(*layers)

    def forward(self, x):
        if self.use_res_connect:
            return x + self.conv(x)
        else:
            return self.conv(x)


class ConvBNReLU(nn.Module):
    def __init__(self, in_planes, out_planes, kernel_size=3, stride=1, groups=1):
        super().__init__()
        padding = (kernel_size-1) // 2
        self.layers = nn.Sequential(
            nn.Conv2d(in_planes, out_planes, kernel_size, stride, padding, groups=groups, bias=False),
            nn.BatchNorm2d(out_planes),
            nn.ReLU6(inplace=True) # necessary? consider switching to relu
        )

    def forward(self, x):
        out = self.layers(x)
        return out


def _make_divisible(v, divisor, min_value=None):
    if min_value is None:
        min_value = divisor

    new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)  # rounds numbers in range [num-divisor/2, num+divisor/2-1] to num, where num is a multiple of divisor

    if new_v < 0.9 * v:
        new_v += divisor  # ensures that round down does not decrease v by more than 10%

    return new_v

'''
VIT
'''
# helpers

def pair(t):
    return t if isinstance(t, tuple) else (t, t)

# classes

class PreNorm(nn.Module):
    def __init__(self, dim, fn):
        super().__init__()
        self.norm = nn.LayerNorm(dim)
        self.fn = fn
    def forward(self, x, **kwargs):
        return self.fn(self.norm(x), **kwargs)

class FeedForward(nn.Module):
    def __init__(self, dim, hidden_dim, dropout = 0.):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(dim, hidden_dim),
            nn.GELU(),
            nn.Dropout(dropout),
            nn.Linear(hidden_dim, dim),
            nn.Dropout(dropout)
        )
    def forward(self, x):
        return self.net(x)

class Attention(nn.Module):
    def __init__(self, dim, heads = 8, dim_head = 64, dropout = 0.):
        super().__init__()
        inner_dim = dim_head *  heads
        project_out = not (heads == 1 and dim_head == dim)

        self.heads = heads
        self.scale = dim_head ** -0.5

        self.attend = nn.Softmax(dim = -1)
        self.to_qkv = nn.Linear(dim, inner_dim * 3, bias = False)

        self.to_out = nn.Sequential(
            nn.Linear(inner_dim, dim),
            nn.Dropout(dropout)
        ) if project_out else nn.Identity()

    def forward(self, x):
        b, n, _, h = *x.shape, self.heads
        qkv = self.to_qkv(x).chunk(3, dim = -1)
        q, k, v = map(lambda t: rearrange(t, 'b n (h d) -> b h n d', h = h), qkv)

        dots = torch.einsum('b h i d, b h j d -> b h i j', q, k) * self.scale

        attn = self.attend(dots)

        out = torch.einsum('b h i j, b h j d -> b h i d', attn, v)
        out = rearrange(out, 'b h n d -> b n (h d)')
        return self.to_out(out)

class Transformer(nn.Module):
    def __init__(self, dim, depth, heads, dim_head, mlp_dim, dropout = 0.):
        super().__init__()
        self.layers = nn.ModuleList([])
        for _ in range(depth):
            self.layers.append(nn.ModuleList([
                PreNorm(dim, Attention(dim, heads = heads, dim_head = dim_head, dropout = dropout)),
                PreNorm(dim, FeedForward(dim, mlp_dim, dropout = dropout))
            ]))
    def forward(self, x):
        for attn, ff in self.layers:
            x = attn(x) + x
            x = ff(x) + x
        return x


class VIT(nn.Module):
    def __init__(self, *, image_size, patch_size, num_classes, dim, depth, heads, mlp_dim, pool = 'cls', channels = 3, dim_head = 64, dropout = 0., emb_dropout = 0.):
        super().__init__()
        image_height, image_width = pair(image_size)
        patch_height, patch_width = pair(patch_size)

        assert image_height % patch_height == 0 and image_width % patch_width == 0, 'Image dimensions must be divisible by the patch size.'

        num_patches = (image_height // patch_height) * (image_width // patch_width)
        patch_dim = channels * patch_height * patch_width
        assert pool in {'cls', 'mean'}, 'pool type must be either cls (cls token) or mean (mean pooling)'

        self.to_patch_embedding = nn.Sequential(
            Rearrange('b c (h p1) (w p2) -> b (h w) (p1 p2 c)', p1 = patch_height, p2 = patch_width),
            nn.Linear(patch_dim, dim),
        )

        self.pos_embedding = nn.Parameter(torch.randn(1, num_patches + 1, dim))
        self.cls_token = nn.Parameter(torch.randn(1, 1, dim))
        self.dropout = nn.Dropout(emb_dropout)

        self.transformer = Transformer(dim, depth, heads, dim_head, mlp_dim, dropout)

        self.pool = pool
        self.to_latent = nn.Identity()

        self.mlp_head = nn.Sequential(
            nn.LayerNorm(dim),
            nn.Linear(dim, num_classes)
        )

    def forward(self, img):
        x = self.to_patch_embedding(img)
        b, n, _ = x.shape

        cls_tokens = repeat(self.cls_token, '() n d -> b n d', b = b)
        x = torch.cat((cls_tokens, x), dim=1)
        x += self.pos_embedding[:, :(n + 1)]
        x = self.dropout(x)

        x = self.transformer(x)

        x = x.mean(dim = 1) if self.pool == 'mean' else x[:, 0]

        x = self.to_latent(x)
        return self.mlp_head(x)

class Ensemble(nn.Module):
    def __init__(self, modelA, modelB, modelC):
        super().__init__()
        self.modelA = modelA
        self.modelB = modelB
        self.modelC = modelC
        
    def forward(self, x):
        outA = self.modelA(x)
        outB = self.modelB(x)
        outC = self.modelC(x)
        out = outA + outB + outC
        return out


In [None]:
'''
Auxiliary Functions
'''

def get_model(model, width_mult=1.0, dropout=0.2):
    if model == 'base':
        return models.resnet18()
    elif model == 'resnet':
        return ResNet([2,2,2,2], dropout)
    elif model == 'mobilenet':
        return MobileNetV2(width_mult=width_mult, dropout=dropout)
    elif model == 'VIT':
        return VIT(image_size=(384,384), patch_size=16, num_classes=5, dim=512, depth=6, heads=12, mlp_dim=1024)
    else:
        raise NotImplementedError(f'Model [{model}] not implemented')

def get_transforms(aug=True, p=0.3):
    train_transforms = None
    val_transforms = None
    if aug:
        train_transforms = A.Compose(
            [
                A.RandomCrop(288, 288),
                A.Resize(384, 384),
                A.ShiftScaleRotate(shift_limit=0.05, scale_limit=0.05, rotate_limit=15, p=p),
                A.RandomBrightnessContrast(p=p),
                A.HorizontalFlip(p=p),
                A.Normalize(mean=(0.5,0.5,0.5), std=(0.5,0.5,0.5)),
                ToTensorV2(),
            ]
        )
        val_transforms = A.Compose(
            [
                A.CenterCrop(288, 288),
                A.Resize(384, 384),
                A.Normalize(mean=(0.5,0.5,0.5), std=(0.5,0.5,0.5)),
                ToTensorV2(),
            ]
        )
    else:
        train_transforms = transforms.Compose(
            [transforms.ToTensor(),
            transforms.RandomCrop((384,384)),
            transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]
        )
        val_transforms = transforms.Compose(
            [transforms.ToTensor(),
            transforms.CenterCrop((384,384)),
            transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]
        )
    return train_transforms, val_transforms
    
def save_model(net, name, epoch, save_dir):
    path = os.path.join(save_dir, f'{epoch}_net.pth')
    torch.save(net.state_dict(), path)
    
def load_model(net, name, epoch, save_dir, device):
    path = os.path.join(save_dir, f'{epoch}_net.pth')
    net.load_state_dict(torch.load(path, map_location=device), strict=False)
    return net
    
def print_and_save_args(args, path):
    message = ""
    for k, v in args.items():
        message += f'{str(k):>15}: {str(v):<10}\n'
    print(' '*20 + '[OPTIONS]' + ' '*20)
    print(message)
    with open(path, 'w') as f:
        f.write(message)


In [None]:
# Test Parameters
args = {}
args['name'] = 'mobilenet_384_randomcrop_width_mult_1.8'  # MUST SET EXPERIMENT NAME BEFORE TRAINING (for saving model)
args['batch_size'] = 32
args['width_mult1'] = 1.8
args['width_mult2'] = 1.0
args['width_mult3'] = 1.0
args['dropout'] = 0.0
args['aug'] = False
args['model1'] = 'mobilenet'
args['model2'] = 'mobilenet'
args['model3'] = 'resnet'
args['gpu_id'] = 0

assert args['name'] is not None, "Must set experiment name before training"

data_dir = os.path.join(root, 'input/cassava-leaf-disease-classification/')
save_dir1 = os.path.join(root, 'input/m-18-aug')
save_dir2 = os.path.join(root, 'input/mnoaug')
save_dir3 = os.path.join(root, 'input/r-augg')

img_dir = os.path.join(data_dir, 'test_images')
test_pd = pd.DataFrame()
test_pd['image_id'] = list(os.listdir(img_dir))
num_test = len(test_pd)

_, test_transforms = get_transforms(args['aug'])

test_dataset = CSVDataset(test_pd, img_dir, transform=test_transforms, aug=args['aug'])
test_dataloader = DataLoader(test_dataset, batch_size=args['batch_size'], shuffle=False)

device = 'cuda:' + str(args['gpu_id']) if torch.cuda.is_available() else 'cpu'
print(f'test images: {num_test} \t device: {device}')

net1 = get_model(args['model1'], width_mult=args['width_mult1'], dropout=args['dropout']).to(device)
net1 = load_model(net1, args['name'], 'best', save_dir1, device)

net2 = get_model(args['model2'], width_mult=args['width_mult2'], dropout=args['dropout']).to(device)
net2 = load_model(net2, args['name'], 'best', save_dir2, device)

net3 = get_model(args['model3'], width_mult=args['width_mult3'], dropout=args['dropout']).to(device)
net3 = load_model(net3, args['name'], 'best', save_dir3, device)

net = Ensemble(net1, net2, net3)

In [None]:
'''
Test
'''
num_params = sum(p.numel() for p in net.parameters() if p.requires_grad)
def human_format(num):
    magnitude = 0
    while abs(num) >= 1000:
        magnitude += 1
        num /= 1000.0
    return '%.2f%s' % (num, ['', 'K', 'M', 'G', 'T', 'P'][magnitude])

print(f'Number of total parameters: {human_format(num_params)}')

# Calculate validation accuracy and save best model
pred_list = []
with torch.no_grad():
    for i, data in enumerate(test_dataloader):
        imgs = data['image'].float().to(device)
        outputs = net(imgs)
        pred_list += outputs.argmax(dim=1).tolist()

test_pd['label'] = np.array(pred_list)
print(test_pd.head())
test_pd.to_csv('submission.csv', index=False)