In [1]:
import sys
sys.path.append('../input/pytorch-image-models/pytorch-image-models-master')

# basic imports
import os
OUTPUT_DIR = './'
if not os.path.exists(OUTPUT_DIR):
    os.makedirs(OUTPUT_DIR)
    
import numpy as np
import pandas as pd
from timeit import default_timer as timer

# augumentations library
from albumentations.pytorch import ToTensorV2
from albumentations import (Compose, Normalize, Resize, CenterCrop)
import cv2

# DL library imports
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset

# timm import
import timm

In [2]:
# pipeline parameters
SEED        = 42
NUM_CLASSES = 5
TEST_BATCH_SIZE  = 16
SIZE             = [512,512]
NUM_WORKERS      = 4
N_FOLDS          = 5
DEBUG            = False

# model parameters
WGT_PATH    = '../input/cassava-final-submission-weight-files'
TEST_PATH   = '../input/cassava-leaf-disease-classification/test_images'
TRAIN_PATH  = '../input/cassava-leaf-disease-classification/train_images'

print('TRAIN_DF :')
train_df = pd.read_csv('../input/cassava-leaf-disease-classification/train.csv')
train_df = train_df.loc[0:50,:]
print(train_df.head())

print('TEST_DF :')
test_df  = pd.read_csv('../input/cassava-leaf-disease-classification/sample_submission.csv')
print(test_df.head())

TRAIN_DF :
         image_id  label
0  1000015157.jpg      0
1  1000201771.jpg      3
2   100042118.jpg      1
3  1000723321.jpg      1
4  1000812911.jpg      3
TEST_DF :
         image_id  label
0  2216849948.jpg      4


In [3]:
device = torch.device("cuda:0") if torch.cuda.is_available() else torch.device('cpu')
print(device)

cuda:0


In [4]:
def time_to_str(t, mode='min'):
    if mode == 'min':
        t = int(t) / 60
        hr = t // 60
        min = t % 60
        return '%2d hr %02d min' % (hr, min)

    elif mode == 'sec':
        t = int(t)
        min = t // 60
        sec = t % 60
        return '%2d min %02d sec' % (min, sec)

    else:
        raise NotImplementedError

In [5]:
eff_b4_cm = torch.Tensor([
                    [0.60537775, 0.06900182, 0.02298651, 0.05426373, 0.24837019],
                    [0.04111679, 0.80631433, 0.02512565, 0.04704868, 0.08039455],
                    [0.01299001, 0.02053806, 0.78458023, 0.11525486, 0.06663684],
                    [0.00136804, 0.00455988, 0.01299577, 0.97013247, 0.01094384],
                    [0.06286295, 0.04928953, 0.04346354, 0.07761195, 0.76677203]
                    ]).to(device)

eff_b3_cm = torch.Tensor([
                    [0.5961527, 0.08094956, 0.02852915, 0.04967657, 0.24469201],
                    [0.03837288, 0.81133089, 0.02695422, 0.04065599, 0.08268602],
                    [0.00922081, 0.01676447, 0.79757024, 0.10561389, 0.07083059],
                    [0.00144406, 0.00471191, 0.01497171, 0.96960033, 0.00927199],
                    [0.05937232, 0.05239332, 0.04308121, 0.07217732, 0.77297584]
                    ]).to(device)

vit_cm = torch.Tensor([
                    [0.64025282, 0.08276329, 0.0229992 , 0.0496808 , 0.20430389],
                    [0.04842168, 0.80996103, 0.02421554, 0.04659624, 0.07080551],
                    [0.01550573, 0.02388885, 0.78248467, 0.11231897, 0.06580178],
                    [0.00197606, 0.00699218, 0.01276809, 0.97119584, 0.00706782],
                    [0.08032061, 0.05781742, 0.05005344, 0.08032664, 0.7314819 ]
                    ]).to(device)

resnext50_v1_cm = torch.Tensor([
                [0.64858158, 0.06347609, 0.01746924, 0.03495962, 0.23551347],
                [0.04659624, 0.80539273, 0.02056048, 0.04704868, 0.08040187],
                [0.0117339 , 0.02179592, 0.7925309 , 0.10059209, 0.07334719],
                [0.00243199, 0.00547185, 0.01390783, 0.9677004 , 0.01048794],
                [0.069066  , 0.04656657, 0.04501543, 0.06558215, 0.77376985]
                ]).to(device)

resnext50_v2_cm = torch.Tensor([
                [0.64864499, 0.07078172, 0.02022576, 0.04414662, 0.2162009 ],
                [0.05345391, 0.81360563, 0.01919271, 0.04157027, 0.07217747],
                [0.01466453, 0.02389148, 0.78247151, 0.1160987 , 0.06287378],
                [0.00182391, 0.00440805, 0.01063968, 0.97545261, 0.00767575],
                [0.07100474, 0.05199217, 0.0438549 , 0.07140212, 0.76174607]
                ]).to(device)

In [6]:
print(resnext50_v1_cm.shape)
print(resnext50_v2_cm.shape)
print(vit_cm.shape)
print(eff_b3_cm.shape)
print(eff_b4_cm.shape)

torch.Size([5, 5])
torch.Size([5, 5])
torch.Size([5, 5])
torch.Size([5, 5])
torch.Size([5, 5])


In [7]:
class seResNext50Classifier(nn.Module):
    def __init__(self, model_arch, pretrained=False):
        super(seResNext50Classifier, self).__init__()
        self.model = timm.create_model(model_arch, pretrained=pretrained)
        n_features = self.model.fc.in_features
        self.model.fc = nn.Linear(n_features, NUM_CLASSES)

    def forward(self, x):
        x = self.model(x)
        return x
    
    
class ViTBase16Classifier(nn.Module):
    def __init__(self, model_arch, pretrained=False):
        super(ViTBase16Classifier, self).__init__()
        self.model = timm.create_model(model_arch, pretrained=pretrained)
        self.model.head = nn.Linear(self.model.head.in_features, NUM_CLASSES)
        
    def forward(self, x):
        x = self.model(x)
        return x

    
class EfficientnetClassifier(nn.Module):
    def __init__(self, model_arch, pretrained=False):
        super().__init__()
        self.model = timm.create_model(model_arch, pretrained=pretrained)
        n_features = self.model.classifier.in_features
        self.model.classifier = nn.Linear(n_features, NUM_CLASSES)
        
    def forward(self, x):
        x = self.model(x)
        return x
    
    
class MetaClassifier(nn.Module):
    def __init__(self, num_feature, num_class):
        super(MetaClassifier, self).__init__()
        self.layer_1 = nn.Linear(num_feature, 32)
        self.layer_2 = nn.Linear(32, 16)
        self.layer_3 = nn.Linear(16, 8)
        self.layer_out = nn.Linear(8, num_class) 
        
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(p=0.4)
        self.batchnorm1 = nn.BatchNorm1d(32)
        self.batchnorm2 = nn.BatchNorm1d(16)
        self.batchnorm3 = nn.BatchNorm1d(8)
        
    def forward(self, x):
        #print(x.shape)
        x = self.layer_1(x)
        x = self.batchnorm1(x)
        x = self.relu(x)
        
        x = self.layer_2(x)
        x = self.batchnorm2(x)
        x = self.relu(x)
        x = self.dropout(x)
        
        x = self.layer_3(x)
        x = self.batchnorm3(x)
        x = self.relu(x)
        x = self.dropout(x)
        
        x = self.layer_out(x)
        return x

In [8]:
# resnext models
renext50_v1_ckpt =  ['resnext50_32x4d_baseline_fold0.pth', 'resnext50_32x4d_baseline_fold1.pth',
                     'resnext50_32x4d_baseline_fold2.pth', 'resnext50_32x4d_baseline_fold3.pth',
                     'resnext50_32x4d_baseline_fold4.pth']

renext50_v2_ckpt =  ['resnext50_baseline_v2_fold0.pth', 'resnext50_baseline_v2_fold1.pth',
                     'resnext50_baseline_v2_fold2.pth', 'resnext50_baseline_v2_fold3.pth',
                     'resnext50_baseline_v2_fold4.pth']

# model instance
n = seResNext50Classifier(model_arch = 'resnext50_32x4d', pretrained=False)
n.to(device)

# v1 model weights
resnext50_v1_nets = []
for f in renext50_v1_ckpt:
    n.load_state_dict(torch.load(f'{WGT_PATH}/{f}')['model'] , strict=True)
    resnext50_v1_nets.append(n)
print('load checkppoint ok! resnext50_v1_nets', len(resnext50_v1_nets))

# v2 model weights
resnext50_v2_nets = []
for f in renext50_v2_ckpt:
    n.load_state_dict(torch.load(f'{WGT_PATH}/{f}')['model'] , strict=True)
    resnext50_v2_nets.append(n)
print('load checkppoint ok! resnext50_v2_nets', len(resnext50_v2_nets))

del n

load checkppoint ok! resnext50_v1_nets 5
load checkppoint ok! resnext50_v2_nets 5


In [9]:
# Vision transformer models ---
vit_ckpt =  ['vit_baseline_fold0.pth', 'vit_baseline_fold1.pth', 'vit_baseline_fold2.pth',
             'vit_baseline_fold3.pth', 'vit_baseline_fold4.pth']

n = ViTBase16Classifier(model_arch = 'vit_base_patch16_384', pretrained=False)
n.to(device)
vit_nets = []
for f in vit_ckpt:
    n.load_state_dict(torch.load(f'{WGT_PATH}/{f}')['model'] , strict=True)
    vit_nets.append(n)
print('load checkpoint ok! vit_nets', len(vit_nets))
del n

load checkpoint ok! vit_nets 5


In [10]:
# efficient_b3 net weights
eff_b3_ckpt =  ['eff_b3_baseline_fold0.pth', 'eff_b3_baseline_fold1.pth', 'eff_b3_baseline_fold2.pth', 
                'eff_b3_baseline_fold3.pth', 'eff_b3_baseline_fold4.pth']

efficient_b3_nets = []
n = EfficientnetClassifier(model_arch = 'tf_efficientnet_b3_ns', pretrained=False)
n.to(device)
for f in eff_b3_ckpt:
    n.load_state_dict(torch.load(f'{WGT_PATH}/{f}')['model'] , strict=True)
    efficient_b3_nets.append(n)
print('load checkpoint ok! efficient_b3_nets', len(efficient_b3_nets))
del n

# efficient_b4 net weights    
eff_b4_ckpt =  ['eff_b4_baseline_fold0.pth', 'eff_b4_baseline_fold1.pth',
                'eff_b4_baseline_fold2.pth', 'eff_b4_baseline_fold3.pth',
                'eff_b4_baseline_fold4.pth']
efficient_b4_nets = []
n = EfficientnetClassifier(model_arch = 'tf_efficientnet_b4_ns', pretrained=False)
n.to(device)
for f in eff_b4_ckpt:
    n.load_state_dict(torch.load(f'{WGT_PATH}/{f}')['model'] , strict=True)
    efficient_b4_nets.append(n)
print('load checkpoint ok! efficient_b4_nets', len(efficient_b4_nets))
del n

load checkpoint ok! efficient_b3_nets 5
load checkpoint ok! efficient_b4_nets 5


In [11]:
class TestDataset(Dataset):
    def __init__(self, df, PATH=TEST_PATH, transforms=None):
        self.df = df
        self.file_names = df['image_id'].values
        self.transforms = transforms
        self.image_path = PATH
        
    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        image = cv2.imread(f"{self.image_path}/{self.file_names[idx]}")
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        if self.transforms:
            augmented = self.transforms(image=image)
            image = augmented['image']
        return image

```python
%%time
for i in range(100):
    temp_img = torch.rand(16,3,512,512)
    tf_img = F.interpolate(temp_img, size=[384,384])
```

In [12]:
test_transforms = Compose([
        Resize(height=SIZE[0], width=SIZE[1]),
        #CenterCrop(height=SIZE[0], width=SIZE[1], p=1.0),
        Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225), max_pixel_value=255.0, p=1.0), 
        ToTensorV2(p=1.0)
    ])


if DEBUG == True:
    dataset = TestDataset(df=train_df, PATH=TRAIN_PATH, transforms = test_transforms)    
else:
    dataset = TestDataset(df=test_df, PATH=TEST_PATH, transforms = test_transforms)    

test_loader = DataLoader(dataset, batch_size=TEST_BATCH_SIZE, drop_last=False, 
                         shuffle=False, num_workers=NUM_WORKERS, pin_memory=False)
                        #sampler=SequentialSampler(dataset)

In [13]:
def eval_function(model_list, image):
    p = []
    for net in model_list: 
        net.eval()
        logit = net(image)
        p.append(F.softmax(logit, -1))
            
        logit = net(torch.flip(image, dims=(2,)).contiguous())
        p.append(F.softmax(logit, -1))

        logit = net(torch.flip(image, dims=(3,)).contiguous())
        p.append(F.softmax(logit, -1))

        logit = net(torch.flip(image, dims=(2,3)).contiguous())
        p.append(F.softmax(logit, -1))

        logit = net(image.permute(0,1,3,2).contiguous())
        p.append(F.softmax(logit, -1))
        
    p = torch.stack(p).mean(0)  ##F.softmax(p,-1)    
    #print(p.shape)
    return p

In [14]:
# start here! ------------------
probability = []

start_timer = timer()
with torch.no_grad():
    for t, batch in enumerate(test_loader):
        image = batch.to(device)
        
        # calculate predictions by avging all folds of each model
        resnext50_v1_preds = torch.matmul(resnext50_v1_cm, torch.transpose(eval_function(resnext50_v1_nets, image),0,1))
        #print(resnext50_v1_preds.shape)
        resnext50_v2_preds = torch.matmul(resnext50_v2_cm, torch.transpose(eval_function(resnext50_v2_nets, image),0,1))
        efficient_b3_preds = torch.matmul(eff_b3_cm,       torch.transpose(eval_function(efficient_b3_nets, image),0,1))
        efficient_b4_preds = torch.matmul(eff_b4_cm,       torch.transpose(eval_function(efficient_b4_nets, image),0,1))
        
        # change size for vit image
        image = F.interpolate(image, size=[384,384])
        vit_preds = torch.matmul(vit_cm, torch.transpose(eval_function(vit_nets, image),0,1))
        
        # add all model predictions
        model_avg_preds = resnext50_v1_preds + resnext50_v2_preds + vit_preds + efficient_b3_preds + efficient_b4_preds        
        probability.append(model_avg_preds.data.cpu().numpy().transpose())

probability = np.concatenate(probability)
print(probability.shape)
predict = probability.argmax(1)
print('estimated time for 15,000 test images = %s'%time_to_str((timer() - start_timer)/len(probability)*15000, 'min'))

if DEBUG == True:
    image_id = train_df['image_id'].values
    df_submit = pd.DataFrame({'image_id': image_id, 'label': predict})
    df_submit.to_csv(OUTPUT_DIR+'submission.csv', index=False)
    label = train_df['label'].values
    correct = (predict == label).mean()
    print('correct', correct)
    print('probability\n', probability[:5])
    print('predict\n', predict[:10])

else:
    image_id = test_df['image_id'].values
    df_submit = pd.DataFrame({'image_id': image_id, 'label': predict})
    df_submit.to_csv(OUTPUT_DIR+'submission.csv', index=False)
    df_submit.head()
    print('Inference completed')

(1, 5)
estimated time for 15,000 test images = 17 hr 06 min
Inference completed
