# This is my implementation of EfficientNetV2-M.

This notebook is inspired by the article: https://arxiv.org/pdf/2104.00298.pdf


In [None]:
import numpy as np 
import pandas as pd
from PIL import Image
import os
import time
import copy

import torch
from torch.optim import lr_scheduler
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader
from pytorch_lightning.metrics import Metric


In [None]:
import sys

sys.path.append('../input/torchcontrib/contrib-master/')
import torchcontrib
from torchcontrib.optim import SWA
from torch.optim.swa_utils import AveragedModel, SWALR

In [None]:
package_paths = ['../input/timm-pytorch-image-models/pytorch-image-models-master',]
import sys;
for pth in package_paths:
    sys.path.append(pth)
# load the external python package
import timm

Since my model is pretrained on 21k ImageNet and finetuned on  ILSVRC2012, i'm applying only drop out and stochastic depth with data augmentation. Weight decay disabled. 


In [None]:
"""
Initialize parameters
"""
class CFG:
    # directories
    TRAIN_CSV_PATH = '../input/plant-pathology-2021-fgvc8/train.csv'
    TRAIN_DIR = '../input/plant-pathology-resized/train_640'#to train faster use already resized images
    TEST_DIR = '../input/plant-pathology-2021-fgvc8/test_images'
    PRETRAINED_DIR = '../input/efficientnetv2/Efficient_net18'#directory for pretrained model
    MODELDIR = '../output/EfficientNet'#directory for saving model
    SWADIR = '../output/SWA'#directory for saving SWA model
    # data info
    label_num2str = {0: 'powdery_mildew',
                     1: 'scab',
                     2: 'complex',
                     3: 'frog_eye_leaf_spot',
                     4: 'rust'}
    
    label_str2num = {'powdery_mildew': 0,
                     'scab': 1,
                     'complex': 2,
                     'frog_eye_leaf_spot': 3,
                     'rust': 4}
    valnum=3700
    trainnum=14800
    num_classes = 5
    
    # parameters for training
    TRAIN = False
    THRESHOLD = False # turn on for tuning threshold
    BATCH = 6 # small batches working as a kind of regularization that improves generalization ability
    EPOCHS = 30
    WEIGHT_DECAY = 0.00000# disabled
    LR = 5e-4
    min_LR = 1e-7
    T_MAX = EPOCHS-1
    IM_SIZE = 640
    DPR = 0.2# Drop path rate a.k.a. Stochastic depth
    DR = 0.5# Drop out rate
    
    # SWA
    use_swa = False
    swa_lr = 5e-5
    swa_start = 18
    

    DEVICE = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")


    # training hyper-parameters
    fl_alpha = 1.0  # alpha of focal_loss
    fl_gamma = 2.0  # gamma of focal_loss
    cls_weight = [3.6480, 1.0000, 2.1840, 1.5001, 2.2901] # class weights for calculation of loss on train
    class_weights = torch.tensor([5.4663, 1.0223, 2.3602, 1.494, 2.5669, 1],  device=DEVICE)# class weights for validation of threshold, where distribution slightly different
    threshold = torch.tensor([0.3750, 0.4750, 0.3750, 0.4650, 0.4450], device=DEVICE)# calculated on tuning
    

In [None]:
"""
Read data
"""
train_df = pd.read_csv(CFG.TRAIN_CSV_PATH) 
train_df

In [None]:
"""
Analyze distribtion to calculate weight of each class
"""
train_df['labels'].value_counts()

There are three ways to fight class imbalance:
1. Downsampling - delete some elements to make equal distribution
2. Upsampling - add new elements with data augmentation to make equal distribution.
3. Weighting - use different weight to each class, so that smaller classes will have proportionately more weight.

The best way is to use 2nd method, but it requires more computations, so i choosed 3rd way. This method may lead to unstable learning step, because every batch will have different amount of "virtual" elements. 


In [None]:
"""
Add numerical labels for dataframe
"""
all_numeric_labels = []
for row_idx, row in train_df.iterrows():
    labels_list = row['labels'].split(" ")
    numeric_label_list = [CFG.label_str2num[each] for each in labels_list if each != 'healthy']
    all_numeric_labels.append(numeric_label_list)
train_df['numerical labels'] = all_numeric_labels
train_df

In [None]:
"""
Create class to get transformed data
"""
class GetData(Dataset):
    def __init__(self, Dir, FNames: list, Labels: list, Transform):
        self.dir = Dir
        self.fnames = FNames
        self.transform = Transform
        self.labels = Labels 
    
    def __len__(self):
        return len(self.fnames)

    def __getitem__(self, index):       
        x = Image.open(os.path.join(self.dir, self.fnames[index]))
    
        if "train" in self.dir: 
            return self.transform(x), self.labels[index]
        elif "test" in self.dir: 
            return self.transform(x), self.fnames[index]

In [None]:
"""
Create list of image names and labels
"""
all_img_names: list = train_df["image"].values.tolist()
all_img_labels: list = train_df["numerical labels"].values.tolist()
all_img_labels_ts = []
for tmp_lb in all_img_labels:
    tmp_label = np.zeros([CFG.num_classes])
    for idx in tmp_lb:
        tmp_label[idx] = 1.0
    all_img_labels_ts.append(tmp_label)

In [None]:
"""
Create train and validation data 
"""
X_Train = all_img_names[:CFG.trainnum]
Y_Train = all_img_labels_ts[:CFG.trainnum]
X_val = all_img_names[-CFG.valnum:]
Y_val = all_img_labels_ts[-CFG.valnum:]

In [None]:
"""
Transform training data
"""
Transform = transforms.Compose(
    [transforms.RandomCrop(CFG.IM_SIZE*0.8),
   #transforms.RandomApply([transforms.ColorJitter((0,0.3), (0,0.3), (0,0.3)),transforms.RandomPerspective(distortion_scale=(0.15)),], p=0.3),    
   #transforms.RandomApply([transforms.ColorJitter((0,0.3), (0,0.3), (0,0.3)),transforms.RandomAffine(degrees=15),], p=0.3),
   #transforms.RandomVerticalFlip(p=0.3),   
   #transforms.RandomHorizontalFlip(p=0.3),
    transforms.ColorJitter(brightness=0.5, contrast=0.5, saturation=0.5, hue=0),
    transforms.ToTensor(),
    transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))])
    


In [None]:
trainset = GetData(CFG.TRAIN_DIR, X_Train, Y_Train, Transform)
trainloader = DataLoader(trainset, batch_size=CFG.BATCH, shuffle=True)

In [None]:
Transformval = transforms.Compose(
    [transforms.ToTensor(),
    transforms.CenterCrop(CFG.IM_SIZE*0.8),
    transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))])

In [None]:
valset = GetData(CFG.TRAIN_DIR, X_val, Y_val, Transformval)
valloader = DataLoader(valset, batch_size=CFG.BATCH, shuffle=False)


In [None]:
next(iter(trainloader))[0].shape

In [None]:
"""
Define Focal-Loss
"""

class FocalLoss(nn.Module):
    """
    The focal loss for fighting against class-imbalance
    """
    def __init__(self, alpha=1, gamma=2):
        super(FocalLoss, self).__init__()
        self.alpha = alpha
        self.gamma = gamma
        self.epsilon = 1e-12  # prevent training from Nan-loss error
        self.cls_weights = torch.tensor([CFG.cls_weight],dtype=torch.float, requires_grad=False, device=CFG.DEVICE)

    def forward(self, logits, target):
        """
        logits & target should be tensors with shape [batch_size, num_classes]
        """
        probs = torch.sigmoid(logits)
        one_subtract_probs = 1.0 - probs
        # add epsilon
        probs_new = probs + self.epsilon
        one_subtract_probs_new = one_subtract_probs + self.epsilon
        # calculate focal loss
        log_pt = target * torch.log(probs_new) + (1.0 - target) * torch.log(one_subtract_probs_new)
        pt = torch.exp(log_pt)
        focal_loss = -1.0 * (self.alpha * (1 - pt) ** self.gamma) * log_pt
        focal_loss = focal_loss * self.cls_weights
        return torch.mean(focal_loss), probs
        

In [None]:
"""
Define F1 score metric
"""
class MyF1Score(Metric):
    def __init__(self, cfg, threshold = CFG.threshold, dist_sync_on_step=False):
        super().__init__(dist_sync_on_step=dist_sync_on_step)
        self.cfg = cfg
        self.threshold = threshold
        self.add_state("tp", default=torch.tensor(0), dist_reduce_fx="sum")
        self.add_state("fp", default=torch.tensor(0), dist_reduce_fx="sum")
        self.add_state("fn", default=torch.tensor(0), dist_reduce_fx="sum")

    def update(self, preds: torch.Tensor, target: torch.Tensor):
        assert preds.shape == target.shape
        preds_str_batch = self.num_to_str(torch.sigmoid(preds))
        target_str_batch = self.num_to_str(target)
        tp, fp, fn = 0, 0, 0
        for pred_str_list, target_str_list in zip(preds_str_batch, target_str_batch):
            for pred_str in pred_str_list:
                if pred_str in target_str_list:
                    tp += 1
                if pred_str not in target_str_list:
                    fp += 1

            for target_str in target_str_list:
                if target_str not in pred_str_list:
                    fn += 1
        self.tp += tp
        self.fp += fp
        self.fn += fn

    def compute(self):
        f1 = 2.0 * self.tp / (2.0 * self.tp + self.fn + self.fp)
        return f1
    
    def num_to_str(self, ts: torch.Tensor) -> list:
        batch_bool_list = (ts > self.threshold).detach().cpu().numpy().tolist()
        batch_str_list = []
        for one_sample_bool in batch_bool_list:
            lb_str_list = [self.cfg.label_num2str[lb_idx] for lb_idx, bool_val in enumerate(one_sample_bool) if bool_val]
            if len(lb_str_list) == 0:
                lb_str_list = ['healthy']
            batch_str_list.append(lb_str_list)
        return batch_str_list

In [None]:
"""
Create function that counts correct predictions
"""
def predictions(running_corrects, probs):
    predictions = probs > CFG.threshold
    for batch in range(len(predictions)):
        if torch.sum(predictions[batch] == labels.data[batch]) == CFG.num_classes:
            running_corrects+=1

    return running_corrects

In [None]:
"""
Initialize model
"""
model = timm.create_model('tf_efficientnetv2_m_21ft1k', pretrained=False, drop_path_rate=CFG.DPR, drop_rate= CFG.DR)

#for param in model.parameters():
#    param.requires_grad = False
# Parameters of newly constructed modules have requires_grad=True by default


model.classifier = nn.Linear(1280, CFG.num_classes, bias=True)
model.load_state_dict(torch.load(os.path.join(CFG.PRETRAINED_DIR)))
model.cuda()

criterion = FocalLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=CFG.LR)
exp_lr_scheduler = lr_scheduler.CosineAnnealingLR(optimizer, T_max = CFG.T_MAX, eta_min=CFG.min_LR, last_epoch=-1, verbose=True)
scheduler = exp_lr_scheduler
metric = MyF1Score(CFG)

if CFG.use_swa:
    swa_model = AveragedModel(model)
    #swa_model.load_state_dict(torch.load(os.path.join('./model','SWA.pth')))
    swa_model.cuda()
    swa_scheduler = SWALR(optimizer, swa_lr=CFG.swa_lr)
    print("SWA initialised")

# Train

In process of training i applied half-precision(FP16), that increases training speed and decreases memory usage.

In [None]:
if CFG.TRAIN:
    since = time.time()

    best_acc = 0.00
    best_f1 = 0.00
    best_model_wts = copy.deepcopy(model.state_dict())
    scaler = torch.cuda.amp.GradScaler() 
    autocast = torch.cuda.amp.autocast()

    for epoch in range(CFG.EPOCHS):

        print('Epoch {}'.format(epoch))
        print('-' * 10)

        f1 = 0
        f1_running = 0
        running_corrects = 0

        #training
        model = model.train()
        running_loss = 0.0
        running_corrects = 0
        iterations = int((CFG.trainnum + CFG.BATCH - 1) / CFG.BATCH)
        m = 1
        optimizer.zero_grad()
        for i, (images, labels) in enumerate(trainloader):        
            images = images.to(CFG.DEVICE, dtype=torch.int64)
            labels = labels.to(CFG.DEVICE, dtype=torch.int64)       


            print(f"Training {m}/{iterations}", end="\r")   
            m += 1

            # Runs the forward pass with autocasting.
            with torch.set_grad_enabled(True) and autocast:
                outputs = model(images.float()) 
                loss, probs = criterion(outputs, labels)#Variable(outputs), Variable(labels)
                f1_running += float(metric(outputs, labels))     

            # Scales loss.  Calls backward() on scaled loss to create scaled gradients.
            # Backward passes under autocast are not recommended.
            # Backward ops run in the same dtype autocast chose for corresponding forward ops.
            scaler.scale(loss).backward()


            # scaler.step() first unscales the gradients of the optimizer's assigned params.
            # If these gradients do not contain infs or NaNs, optimizer.step() is then called,
            # otherwise, optimizer.step() is skipped.
            scaler.step(optimizer)


            # Updates the scale for next iteration.
            scaler.update()
            optimizer.zero_grad()



            running_loss += loss.item() * images.size(0)
            running_corrects = predictions(running_corrects, probs)

        f1= f1_running / iterations    
        epoch_loss = running_loss / len(trainset)
        epoch_acc = running_corrects / len(trainset)
        print('Training Loss: {:.6f} Acc: {:.4f}, F1: {:.4f} '.format(
        epoch_loss, epoch_acc, f1))



        #validation
        model.eval()

        f1 = 0
        f1_running = 0
        running_loss = 0.0
        running_corrects = 0

        iterations = int((CFG.valnum + CFG.BATCH-1) / CFG.BATCH)
        m = 1

        for i, (images, labels) in enumerate(valloader):
            images = images.to(CFG.DEVICE, dtype=torch.int64)
            labels = labels.to(CFG.DEVICE, dtype=torch.int64)

            print(f"Validating {m}/{iterations}", end="\r")   
            m += 1

            with torch.set_grad_enabled(False) and autocast:
                outputs = model(images.float())
                loss, probs = criterion(outputs, labels)
                f1_running += float(metric(outputs, labels))


            # statistics
            running_loss += loss.item() * images.size(0)
            running_corrects = predictions(running_corrects, probs)


        f1= f1_running / iterations
        epoch_loss = running_loss / len(valset)
        epoch_acc = running_corrects / len(valset)
        time_elapsed = time.time() - since
        print('Validation Loss: {:.6f} Acc: {:.4f} F1:{:.4f}'.format(
        epoch_loss, epoch_acc, f1))
        print('Training of epoch {} completed in {:.0f}m {:.0f}s'.format(
                epoch, time_elapsed // 60, time_elapsed % 60))             
        #save best model
        if f1 > best_f1:
            best_f1 = f1
            best_model_wts = copy.deepcopy(model.state_dict())
            torch.save(model.state_dict(),os.path.join(CFG.MODELDIR))

        
        if epoch >= CFG.swa_start and CFG.use_swa:
            swa_model.update_parameters(model)
            swa_scheduler.step()
            print("SWA model update")
        else:
            scheduler.step()

        print()


    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    print('Best validation F1: {:.4f}'.format(best_f1))

    # load best model weights
    model.load_state_dict(best_model_wts)  


After training of SWA model you should do forward propagation to update statistics for BatchNorm layers.


In [None]:
"""
SWA BatchNorm updating
"""
if CFG.use_swa:
    since = time.time()
    running_corrects = 0
   
    
    
    f1 = 0
    f1_running = 0
    
    
    #training
    swa_model.train()
    running_loss = 0.0
    running_corrects = 0
    iterations = int((CFG.trainnum + CFG.BATCH - 1) / CFG.BATCH)
    m = 1
    for i, (images, labels) in enumerate(trainloader):        
        images = images.to(CFG.DEVICE, dtype=torch.int64)
        labels = labels.to(CFG.DEVICE, dtype=torch.int64)

        print(f"Upgrading {m}/{iterations}", end="\r")   
        m += 1

        with torch.set_grad_enabled(False) and autocast:
            outputs = swa_model(images.float()) 
            loss, probs = criterion(outputs, labels)
            f1_running += float(metric(outputs, labels))




        # statistics
        running_loss += loss.item() * images.size(0)
        running_corrects = predictions(running_corrects, probs)

  
    f1_SWA= f1_running / iterations
    epoch_loss = running_loss / len(trainset)
    epoch_acc = running_corrects / len(trainset)
    time_elapsed = time.time() - since
    print('Loss: {:.6f} Acc: {:.4f} F1:{:.4f}'.format(
    epoch_loss, epoch_acc, f1_SWA))
    print('Updating BN of SWA model completed in {:.0f}m {:.0f}s'.format(
    time_elapsed // 60, time_elapsed % 60)) 


In [None]:
"""
Save SWA model
"""
if CFG.use_swa:
    torch.save(swa_model.state_dict(),os.path.join(CFG.SWADIR))

In [None]:
"""
SWA validation
"""
if CFG.use_swa:
    since = time.time()
    running_corrects = 0

    f1 = 0
    f1_running = 0
    
    
    #training
    swa_model.eval()
    running_loss = 0.0
    running_corrects = 0
    iterations = int((CFG.valnum + CFG.BATCH - 1) / CFG.BATCH)
    m = 1
    for i, (images, labels) in enumerate(valloader):        
        images = images.to(CFG.DEVICE, dtype=torch.int64)
        labels = labels.to(CFG.DEVICE, dtype=torch.int64)

        print(f"Upgrading {m}/{iterations}", end="\r")   
        m += 1

        with torch.set_grad_enabled(False) and autocast:
            outputs = swa_model(images.float()) 
            loss, probs = criterion(outputs, labels)
            f1_running += float(metric(outputs, labels))

        # statistics
        running_loss += loss.item() * images.size(0)
        running_corrects = predictions(running_corrects, probs)


    f1_SWA= f1_running / iterations
    epoch_loss = running_loss / len(valset)
    epoch_acc = running_corrects / len(valset)
    time_elapsed = time.time() - since
    print('Loss: {:.6f} Acc: {:.4f} F1:{:.4f}'.format(
    epoch_loss, epoch_acc, f1_SWA))
    print('Validation of SWA model completed in {:.0f}m {:.0f}s'.format(
    time_elapsed // 60, time_elapsed % 60)) 

# Threshold tuning

In this chapter i minimized mean quantity of weighted mistakes over classes, where weighted mistake equals to amount of mistakes multiplied  by weight of this class. 

In [None]:
"""
Redifine prediction function
"""
def predictions(running_corrects, probs,count,mistakes):
    predictions = probs > CFG.threshold
    for batch in range(len(predictions)):
        ill = 0
        if torch.sum(predictions[batch] == labels.data[batch]) == CFG.num_classes:
            running_corrects += 1
        else:
            for i in range(CFG.num_classes): 
                if labels.data[batch][i] == 1 and float(probs[batch][i]) < CFG.threshold[i] : 
                    count[i] += 1
                    mistakes[i]=torch.add(mistakes[i], probs[batch][i])
                    ill = 1
                    
            if ill == 0:
                count[5] += 1
                         
    return running_corrects, count, mistakes

In [None]:
"""
Choosing right threshold
"""
if CFG.THRESHOLD:
    STEP = 0.005
    ITERATIONS = 25

    since = time.time()
    a_weightedbest=torch.tensor(100000, device=CFG.DEVICE)
    for num_cl in range(CFG.num_classes):

        best_threshold = 0.5

        for iterations in range(ITERATIONS):
            #cycle begins with changed threshold in order to not compute twice with threshold = 0.5
            CFG.threshold[num_cl] -= STEP
            metric = MyF1Score(CFG)
            print(" ")
            print("-"*10)
            print("Iteration: ",iterations, "for class: ", num_cl)
            print('Threshold = ', CFG.threshold)

            running_loss = 0.0
            running_corrects = 0
            f1_running = 0

            count=torch.zeros(CFG.num_classes + 1, device=CFG.DEVICE )
            mistakes = torch.zeros(CFG.num_classes + 1, device=CFG.DEVICE )


            model.eval()

            iterations = int((CFG.valnum + CFG.BATCH - 1) / CFG.BATCH)
            m = 1

            for i, (images, labels) in enumerate(valloader):        
                images = images.to(CFG.DEVICE, dtype=torch.int64)
                labels = labels.to(CFG.DEVICE, dtype=torch.int64)       
                optimizer.zero_grad()

                print(f"Training {m}/{iterations}", end="\r")   
                m += 1

                # Runs the forward pass with autocasting.
                with torch.set_grad_enabled(False) and autocast:
                    outputs = model(images.float())
                    loss, probs = criterion(outputs, labels)
                    f1_running += float(metric(outputs, labels))     

                # statistics
                f1= f1_running / iterations
                running_loss += loss.item() * images.size(0)
                running_corrects, count, mistakes = predictions(running_corrects, probs,count,mistakes)



            count_weighted = count * CFG.class_weights # vector of weighted amount of mistakes
            print('count_weighted:', count_weighted)
            print('count_weighted.mean: ', count_weighted.mean())   

            #print('mistakes: ', mistakes/a) # Shows mean prediction of wrongly rejected class
            #print('count:', count) #Vector with each element as amount of mistakes on particular class(last class is healthy)

            if count_weighted.mean() <= count_weightedbest:
                count_weightedbest = count_weighted.mean()
                best_threshold = float(CFG.threshold[num_cl])
                f1best = f1
                print("Best threshold added with weighted mean mistakes: ", float(count_weightedbest))

            epoch_loss = running_loss / len(valset)
            epoch_acc = running_corrects / len(valset)
            time_elapsed = time.time() - since
            print('Validation Loss: {:.6f} Acc: {:.4f} F1:{:.4f}'.format(
            epoch_loss, epoch_acc, f1))
            print('Validation of threshold completed in {:.0f}m {:.0f}s'.format(
            time_elapsed // 60, time_elapsed % 60)) 
        CFG.threshold[num_cl] = best_threshold
        print("Best threshold for class{} is:{:.3f} with a_weighted.mean: {}".format(num_cl,best_threshold,float(count_weightedbest)))

    print("Best weighted threshold is: ", CFG.threshold) 
    print("F1: ", f1best)


# Make a prediction


On inference i used Images larger on 25% than on training.

In [None]:
Transformtest = transforms.Compose(
    [transforms.ToTensor(),
    transforms.Resize((CFG.IM_SIZE,CFG.IM_SIZE)),
    transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))])

In [None]:
X_Test = [name for name in (os.listdir(CFG.TEST_DIR))] 

In [None]:
testset = GetData(CFG.TEST_DIR, X_Test, None, Transformtest)
testloader = DataLoader(testset, batch_size=1, shuffle=False)

In [None]:
def convert_num_to_str(pred: np.ndarray) -> str:
    """convert the numerical labels to string labels"""
    lb_str_list = []
    for lb_idx, bool_val in enumerate(pred):
        if bool_val:
            lb_str = CFG.label_num2str[lb_idx]
            lb_str_list.append(lb_str)
    if len(lb_str_list) == 0:
        final_label = 'healthy'
    else:
        final_label = ' '.join(lb_str_list)
    return final_label

In [None]:
submit_df = pd.DataFrame(columns=['image', 'labels'])
submit_df['image'] = X_Test
submit_df

In [None]:
"""
Use SWA if it's available
"""
if CFG.use_swa and f1_SWA > f1:
    model = swa_model

In [None]:
model.eval()

with torch.no_grad():
    test_img_idx = 0
    for img_ts, lb_ts in testloader:
        img_ts = img_ts.cuda()
        n_fold_pred_list = [] 
        pred_ts = torch.sigmoid(model(img_ts)).detach().cuda()
        n_fold_pred_list.append(pred_ts)
        pred_np = torch.cat(n_fold_pred_list).mean(dim=0)
        print(pred_np)
        pred = (pred_np > CFG.threshold).tolist()
        
        # convert numerical label into string
        final_label = convert_num_to_str(pred)
        img_name = lb_ts[test_img_idx]
        row_idx = submit_df[submit_df.image == img_name].index.tolist()[0]
        submit_df.iloc[row_idx, 1] = final_label

In [None]:
"""
Save prediction
"""
submit_df.to_csv("./submission.csv", index=False)

In [None]:
submit_df