### Model developed for Inference on Cassava Competition

In [1]:
effnet_path = '../input/efficientnet-pytorch/'
import sys
sys.path.append(effnet_path)

In [2]:
import os
import time

import torch
import torch.nn as nn
import torchvision
from torch.optim import SGD
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import cv2
import efficientnet_pytorch

import albumentations as A
from albumentations.pytorch import ToTensorV2

In [3]:
# file with model parameters as from training
MODEL_FILE = '/kaggle/input/mycassava1/cassava1-run8.pt'

TEST_DIR = '/kaggle/input/cassava-leaf-disease-classification/test_images'

test_files = os.listdir(TEST_DIR)

# numero di test images
len(test_files)

1

In [4]:
# SAMPLE_SUB = '/kaggle/input/cassava-leaf-disease-classification/sample_submission.csv'

# sample_sub = pd.read_csv(SAMPLE_SUB)

In [5]:
# sample_sub.head()

In [6]:
#qui definisco le trasformazioni
# per run8
SIZE = 500

# trasformazioni aggiunte per TTA
albumentations_transform_valid = A.Compose([
    A.Resize(SIZE, SIZE),
    A.HorizontalFlip(p=0.5),
    A.VerticalFlip(p=0.5),
    A.Rotate(p=0.5),
    A.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225],
    ),
    ToTensorV2()
])

class CassavaTestDataset(Dataset):
    def __init__(self, files, transforms=None):
        self.items = files
        self.length = len(files)
        self.transforms = transforms
    
    def __getitem__(self, idx):
        f_name = self.items[idx]
        
        # img = Image.open(os.path.sep.join([TRAIN_DATA_PATH, f_name]))
        img = cv2.imread(os.path.sep.join([TEST_DIR, f_name]))
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            
        img = self.transforms(image=img)['image']
        
        return f_name, img
    
    def __len__(self):
        return self.length

In [7]:
test_ds = CassavaTestDataset(test_files, transforms=albumentations_transform_valid)

f_name, img = test_ds[0]

f_name, img.shape

('2216849948.jpg', torch.Size([3, 500, 500]))

In [8]:
# lowered to work with SIZE=500
BATCH_SIZE = 10

test_dl = DataLoader(test_ds, shuffle=False, batch_size=BATCH_SIZE)

In [9]:
N_CLASS = 5
model = efficientnet_pytorch.EfficientNet.from_name('efficientnet-b2')

# for name, param in model.named_parameters():
#    if ("bn" not in name):
#        param.requires_grad = False

num_ftrs = model._fc.in_features

model._fc = nn.Sequential(
    nn.Linear(num_ftrs, 512),
    nn.ReLU(),
    nn.Dropout(p=0.2),
    nn.Linear(512, N_CLASS)
)

In [10]:
# carico il modello
model.cuda()
model.load_state_dict(torch.load(MODEL_FILE))
model.eval()
# model

EfficientNet(
  (_conv_stem): Conv2dStaticSamePadding(
    3, 32, kernel_size=(3, 3), stride=(2, 2), bias=False
    (static_padding): ZeroPad2d(padding=(0, 1, 0, 1), value=0.0)
  )
  (_bn0): BatchNorm2d(32, eps=0.001, momentum=0.010000000000000009, affine=True, track_running_stats=True)
  (_blocks): ModuleList(
    (0): MBConvBlock(
      (_depthwise_conv): Conv2dStaticSamePadding(
        32, 32, kernel_size=(3, 3), stride=[1, 1], groups=32, bias=False
        (static_padding): ZeroPad2d(padding=(1, 1, 1, 1), value=0.0)
      )
      (_bn1): BatchNorm2d(32, eps=0.001, momentum=0.010000000000000009, affine=True, track_running_stats=True)
      (_se_reduce): Conv2dStaticSamePadding(
        32, 8, kernel_size=(1, 1), stride=(1, 1)
        (static_padding): Identity()
      )
      (_se_expand): Conv2dStaticSamePadding(
        8, 32, kernel_size=(1, 1), stride=(1, 1)
        (static_padding): Identity()
      )
      (_project_conv): Conv2dStaticSamePadding(
        32, 16, kernel_size=

In [11]:
NUM_TTA = 5

tStart = time.time()

# in questa accumulo le predizioni TTA
final_predictions = np.zeros((len(test_ds), N_CLASS))

for k in range(NUM_TTA):
    print('TTA iteration n.', k)
    
    names = []
    predictions = []
    for i, batch in enumerate(test_dl):
        # print('batch num. ', i)
    
        fnames, imgs = batch
        imgs = imgs.cuda()
    
        probs = model(imgs)
    
        # lo sposto dalla GPU e lo converto a numpy
        probs = list(probs.cpu().detach().numpy())
    
        names.append(fnames)
        predictions.append(probs)

    # make names and predictions flat
    flat_predictions = [item for sublist in predictions for item in sublist]
    predictions_arr = np.array(flat_predictions)
    flat_names = [item for sublist in names for item in sublist]

    tEla = round((time.time() - tStart), 1)

    print('Total prediction time: ', tEla)
    
    # accumulate for TTA
    final_predictions += predictions_arr

# finale
final_predictions = final_predictions/NUM_TTA

TTA iteration n. 0
Total prediction time:  0.7
TTA iteration n. 1
Total prediction time:  0.7
TTA iteration n. 2
Total prediction time:  0.8
TTA iteration n. 3
Total prediction time:  0.8
TTA iteration n. 4
Total prediction time:  0.8


In [12]:
# trasformo in labels
final_labels = np.argmax(final_predictions, axis = 1)

final_labels

array([4])

In [13]:
global_list = [flat_names, final_labels]

submission_df = pd.DataFrame(global_list, index=['image_id', 'label']).T

submission_df.head()

Unnamed: 0,image_id,label
0,2216849948.jpg,4


In [14]:
submission_df.shape

(1, 2)

In [15]:
# produce il submission file
submission_df.to_csv('submission.csv', index=False)