Changelog - 
* pretrainedmodels by Cadene repo
* seresnext50_32x4d
* added custom head
* added code for mish
* adaptiveconcatpool2d/bn_drop
* albumentations (SSR,normalize imagenet weights)
* conv2d model 3channel
* nn.SmoothL1Loss
* sigmoid/relu - >mish (?)


changes to try/make -
* over9k / ranger optim
* augmix
* 5-fold CV
* GeM pooling for head


In [None]:
!pip install ../input/pretrainedmodels/pretrainedmodels-0.7.4/pretrainedmodels-0.7.4/ > /dev/null

In [None]:
import pandas as pd
import pickle
import pretrainedmodels
import time
import torchvision
import torch.nn as nn
from tqdm import tqdm_notebook as tqdm

from PIL import Image, ImageFile
from torch.utils.data import Dataset
import torch
import torch.optim as optim
from torchvision import transforms
from torch.optim import lr_scheduler
import os
from albumentations import (
    PadIfNeeded,
    HorizontalFlip,
    VerticalFlip,    
    CenterCrop,    
    Crop,
    Rotate,
    Compose,
    Transpose,
    RandomRotate90,
    ElasticTransform,
    GridDistortion, 
    OpticalDistortion,
    RandomSizedCrop,
    OneOf,
    #MultiplicativeNoise,
    #CoarseDropout,
    CLAHE,
    RandomBrightnessContrast,    
    Cutout,
    RandomGamma,
    Rotate,
    ShiftScaleRotate ,
    GaussNoise,
    Blur,
    #GlassBlur,
    Normalize,
    MotionBlur,
    MedianBlur,   
    IAAPiecewiseAffine,
    GaussianBlur
)

device = torch.device("cuda:0")
ImageFile.LOAD_TRUNCATED_IMAGES = True

In [None]:
import numpy as np

In [None]:
import torch.nn as nn
import torch.nn.functional as F  #(uncomment if needed,but you likely already have it)

#Mish - "Mish: A Self Regularized Non-Monotonic Neural Activation Function"
#https://arxiv.org/abs/1908.08681v1
#implemented for PyTorch / FastAI by lessw2020 
#github: https://github.com/lessw2020/mish

class Mish(nn.Module):
    def __init__(self):
        super().__init__()

    def forward(self, x):
        #inlining this saves 1 second per epoch (V100 GPU) vs having a temp x and then returning x(!)
        return x *( torch.tanh(F.softplus(x)))

In [None]:
def convert_relu_to_mish(model):
    for child_name, child in model.named_children():
        if isinstance(child, nn.ReLU):
            setattr(model, child_name, Mish())
        else:
            convert_relu_to_mish(child)

In [None]:
def convert_sigmoid_to_mish(model):
    for child_name, child in model.named_children():
        if isinstance(child, nn.Sigmoid):
            setattr(model, child_name, Mish())
        else:
            convert_relu_to_mish(child)


In [None]:
class AdaptiveConcatPool2d(nn.Module):
    "Layer that concats `AdaptiveAvgPool2d` and `AdaptiveMaxPool2d`."
    def __init__(self, sz):
      super().__init__()
      "Output will be 2*sz or 2 if sz is None"
      self.output_size = sz or 1
      self.ap = nn.AdaptiveAvgPool2d(self.output_size)
      self.mp = nn.AdaptiveMaxPool2d(self.output_size)

    def forward(self, x): return torch.cat([self.mp(x), self.ap(x)], 1)


In [None]:
class Flatten(nn.Module):
    "Flatten `x` to a single dimension, often used at the end of a model. `full` for rank-1 tensor"
    def __init__(self, full:bool=False): 
      super().__init__()
      self.full = full
    def forward(self, x): return x.view(-1) if self.full else x.view(x.size(0), -1)

In [None]:
def bn_drop_lin( n_in:int, n_out:int, actn:nn.Module,bn:bool=True, p:float=0.):
    "Sequence of batchnorm (if `bn`), dropout (with `p`) and linear (`n_in`,`n_out`) layers followed by `actn`."
    layers = [nn.BatchNorm1d(n_in)] if bn else []
    if p != 0: layers.append(nn.Dropout(p))
    layers.append(nn.Linear(n_in, n_out))
    if actn is not None: layers.append(actn)
    return layers

In [None]:
class RetinopathyDatasetTrain(Dataset):

    def __init__(self, csv_file , transform):

        self.data = pd.read_csv(csv_file)
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        img_name = os.path.join('../input/aptos2019-blindness-detection/train_images', self.data.loc[idx, 'id_code'] + '.png')
        image = Image.open(img_name)
        image = image.resize((224, 224), resample=Image.BILINEAR)
        image = np.asarray( image, dtype=np.uint8 )
        if self.transform:
            result = self.transform(image=image)
            image = result['image']
        label = torch.tensor(self.data.loc[idx, 'diagnosis'])
        return {'image': transforms.ToTensor()(image),
                'labels': label
                }

In [None]:
class Head(nn.Module):
    def __init__(self, nc, n, ps=0.5):
        super(Head,self).__init__()
        layers = [AdaptiveConcatPool2d(1), Mish(), Flatten()] + \
            bn_drop_lin(nc*2, 512, Mish(),True, ps) + \
            bn_drop_lin(512, n, None, True, ps)
        self.fc = nn.Sequential(*layers)
        self._init_weight()
        
    def _init_weight(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                torch.nn.init.kaiming_normal_(m.weight)
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1.0)
                m.bias.data.zero_()
        
    def forward(self, x):
        return self.fc(x)

In [None]:
arch = pretrainedmodels.__dict__['se_resnext50_32x4d']

In [None]:
class DRModel(nn.Module):
    def __init__(self, arch = arch , pre=True):
        super(DRModel,self).__init__()
        m = arch(pretrained='imagenet') if pre else arch(pretrained=None)
        conv = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
        #w = (m.layer0.conv1.weight.sum(1)).unsqueeze(1)
        #conv.weight = nn.Parameter(w)
        self.layer0 = nn.Sequential(conv, m.layer0.bn1, m.layer0.relu1, m.layer0.pool)
        self.layer1 = m.layer1
        self.layer2 = m.layer2
        self.layer3 = m.layer3
        self.layer4 = nn.Sequential(m.layer4[0], m.layer4[1], m.layer4[2])

        
        nc = self.layer4[-1].se_module.fc2.out_channels       # changes as per architecture
        self.head = Head(nc,1)
#         self.head1 = Head(nc,n[0])
#         self.base_model = m
#         self.head = Head(1000,1)
#         convert_sigmoid_to_mish(self.base_model)
#         convert_relu_to_mish(self.base_model)
        
        
    def forward(self, x):    
        x = self.layer0(x)
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        
        x = self.head(x)
        
        return x

In [None]:
model = DRModel(pre=True)
model=model.to(device)

In [None]:
model

In [None]:
# model = torchvision.models.resnet101(pretrained=False)
# model.load_state_dict(torch.load("../input/pytorch-pretrained-models/resnet101-5d3b4d8f.pth"))
# num_features = model.fc.in_features
# model.fc = nn.Linear(2048, 1)

# model = model.to(device)

In [None]:
train_transforms = Compose([ 
    ShiftScaleRotate(p=1,rotate_limit=180, shift_limit=0.2,scale_limit=0.2),
    Normalize(mean=(0.0692),std=(0.2051))])

In [None]:
train_dataset = RetinopathyDatasetTrain(csv_file='../input/aptos2019-blindness-detection/train.csv',transform = train_transforms)
data_loader = torch.utils.data.DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=4)



optimizer = optim.Adam(model.parameters(), lr=2e-3)
scheduler = lr_scheduler.StepLR(optimizer, step_size=10)

In [None]:
since = time.time()
criterion = nn.SmoothL1Loss()
num_epochs = 25
for epoch in  range(num_epochs):
    print('Epoch {}/{}'.format(epoch, num_epochs - 1))
    print('-' * 10)
    scheduler.step()
    model.train()
    running_loss = 0.0
    tk0 = tqdm(data_loader, total=int(len(data_loader)))
    counter = 0
    for bi, d in enumerate(tk0):
        inputs = d["image"]
        labels = d["labels"].view(-1, 1)
        inputs = inputs.to(device, dtype=torch.float)
        labels = labels.to(device, dtype=torch.float)
        optimizer.zero_grad()
        with torch.set_grad_enabled(True):
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
        running_loss += loss.item()
        counter += 1
        tk0.set_postfix(loss=(running_loss / (counter * data_loader.batch_size)))
    epoch_loss = running_loss / len(data_loader)
    print('Training Loss: {:.4f}'.format(epoch_loss))

time_elapsed = time.time() - since
print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
torch.save(model.state_dict(), "model.pth")

Pkl_Filename = "Pickle_SeresModel.pkl"  

with open(Pkl_Filename, 'wb') as file:  
    pickle.dump(model, file)