In [None]:
import numpy as np
import pandas as pd
from sklearn.metrics import f1_score

In [None]:
train_df = pd.read_csv('../input/plant-pathology-2021-fgvc8/train.csv').sample(frac=1, random_state=666)
train_df['path'] =  train_df['image'].apply(lambda x: '../input/plant2021-downscaled-images-dataset/' + x)
train_df.head(5)

In [None]:
train_df['label_id'] = train_df['labels'].str.replace('scab', '1').str.replace('rust', '2') \
    .str.replace('healthy', '0').str.replace('frog_eye_leaf_spot', '3') \
    .str.replace('complex', '4').str.replace('powdery_mildew', '5').str.split(" ")

In [None]:
train_df['label_id'] = [[int(j) for j in i] for i in train_df['label_id'].values]

In [None]:
from random import sample

In [None]:
valid_df = train_df.iloc[17000:, :]
train_df = train_df.iloc[:17000, :]

In [None]:
label_dic = {
    0: 'healthy', 
    1: 'scab',
    2: 'rust',
    3: 'frog_eye_leaf_spot',
    4: 'complex', 
    5: 'powdery_mildew'
}
print(label_dic)
classes = 6#len(train_df['labels'].value_counts()) #12

del train_df['labels'] 

image_labels = np.array(train_df['label_id'].values)
image_list = np.array(train_df['path'].values)

image_labels_v = np.array(valid_df['label_id'].values)
image_list_v = np.array(valid_df['path'].values)

print(image_list.shape)

In [None]:
[5193 / sum([j in i for i in list(image_labels)]) for j in range(6)]

In [None]:
cls_weight = [1.24, 1.0001, 2.72, 1.31, 2.61, 4.4]

In [None]:
!apt install ../input/pyturbojpeg/libturbojpeg_1.4.2-0ubuntu3.4_amd64.deb
!pip install ../input/pyturbojpeg/PyTurboJPEG-1.4.1

In [None]:
import matplotlib.pyplot as plt
import albumentations as A
import cv2, torch
import torchvision.transforms as transforms
from tqdm.notebook import tqdm
from torch.utils.data import Dataset, DataLoader
from turbojpeg import TurboJPEG

device = torch.device('cuda' if torch.cuda.is_available else 'cpu')

#######################################

from albumentations.pytorch import ToTensor

def get_training_augmentation():
    
    augmentation_pipeline = A.Compose(
         [
            A.OneOf([
                A.Compose([
                    A.SmallestMaxSize(224),
                    A.RandomCrop(224, 224),
                ], p=1),
                A.Compose([
                    A.SmallestMaxSize(400),
                    A.RandomCrop(224, 224),
                ], p=1)
            ], p=1),
            
            A.OneOf(
                [
                    A.HueSaturationValue(),
                    A.RandomBrightness(limit=1.58),
                    A.RandomGamma(gamma_limit=(500, 1500)), 
                    A.RandomContrast(), 
                    A.Blur(blur_limit=30),
                    A.GaussNoise()
                ],
                p = 0.4
            ),
            A.OneOf(
                [
                    A.Transpose(p = 0.5),
                    A.Rotate(limit = 360), 
                    A.Flip(p = 0.5),
                ],
                p = 0.3
            ),
            A.Normalize(
                mean=[0.485, 0.456, 0.406],
                std=[0.229, 0.224, 0.225]
                ),
            ToTensor() 
        ],
        p = 1
    )
    return lambda img:augmentation_pipeline(image=np.array(img))['image']



def transform_valid():
    
    augmentation_pipeline = A.Compose(
        [
            A.SmallestMaxSize(224),
            A.RandomCrop(224, 224),
            A.Normalize(
                mean=[0.485, 0.456, 0.406],
                std=[0.229, 0.224, 0.225]
                ),
            ToTensor() 
        ],
        p = 1
    )
    return lambda img:augmentation_pipeline(image=np.array(img))['image']

######################################

jpeg_reader = TurboJPEG()

def read_img(img):
    with open(img, "rb") as f:
        return jpeg_reader.decode(f.read(), 0) 
    

class dataset(Dataset) :
    def __init__(self, image_list, image_labels, transform, device) :
        self.image_list = image_list
        self.image_labels = image_labels
        self.transform = transform
    
    def __len__(self) :
        return len(self.image_list)
    
    def __getitem__(self, index) :
        x = read_img(self.image_list[index])
        x = self.transform(x).to(device)
        
        y = self.image_labels[index]
        y = torch.nn.functional.one_hot(torch.tensor(y), 6).sum(0).to(device)
        
        return x, y


train_data = dataset(image_list, image_labels, get_training_augmentation(), device)

print(len(train_data))

train_data = DataLoader(train_data, batch_size = 32, shuffle = True)

##########
# validation loader
valid_data = dataset(image_list_v, image_labels_v, transform_valid(), device)
print(len(valid_data))
valid_data = DataLoader(valid_data, batch_size = 32, shuffle = True)
########

In [None]:
dataloaders = {
    'train': train_data , 
    'val': valid_data
}

dataset_sizes = {
    'train': 17000, 
    'val': 1632
}

In [None]:
import sys

sys.path.append("../input/efficientnet-pytorch/EfficientNet-PyTorch/EfficientNet-PyTorch-master")

from efficientnet_pytorch import model as enet

In [None]:
model = enet.EfficientNet.from_name('efficientnet-b4')

model.load_state_dict(torch.load('../input/efficientnet-pytorch/efficientnet-b4-e116e8b3.pth'))

In [None]:
model._fc

In [None]:
import torch.nn as nn

class FocalLoss(nn.Module):
    """
    The focal loss for fighting against class-imbalance
    """
    def __init__(self, alpha=1, gamma=2):
        super(FocalLoss, self).__init__()
        self.alpha = alpha
        self.gamma = gamma
        self.epsilon = 1e-12  # prevent training from Nan-loss error
        self.cls_weights = torch.tensor([cls_weight],dtype=torch.float, requires_grad=False, device=device)

    def forward(self, logits, target):
        """
        logits & target should be tensors with shape [batch_size, num_classes]
        """
        probs = torch.sigmoid(logits)
        one_subtract_probs = 1.0 - probs
        # add epsilon
        probs_new = probs + self.epsilon
        one_subtract_probs_new = one_subtract_probs + self.epsilon
        # calculate focal loss
        log_pt = target * torch.log(probs_new) + (1.0 - target) * torch.log(one_subtract_probs_new)
        pt = torch.exp(log_pt)
        focal_loss = -1.0 * (self.alpha * (1 - pt) ** self.gamma) * log_pt
        focal_loss = focal_loss * self.cls_weights
        return torch.mean(focal_loss)

In [None]:
import torch.nn as nn
from torch.optim import Adam, SGD
from torch.optim import lr_scheduler

model._fc = torch.nn.Linear(in_features=1792, out_features=classes) #change the last FC layer

model = model.to(device)
criterion = FocalLoss().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr = 0.001) # lr, SGD

exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=9, gamma=0.1)

In [None]:
dataset_sizes

In [None]:
def to_lab(preds):
    return ((preds > 0) + torch.nn.functional.one_hot(preds.argmax(1), 6) != 0).long()

In [None]:
import time
import copy

def train_model(model, criterion, optimizer, scheduler, num_epochs=25):
    since = time.time()

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                optimizer.step()
                scheduler.step()
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0
            f1l = 0

            # Iterate over data.
            for inputs, labels in tqdm(dataloaders[phase]):
                inputs = inputs.to(device)
                labels = labels.to(device) #
                #print(labels)
                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    #outputs = torch.nn.Sigmoid()(outputs)
                    #_, preds = torch.max(outputs, 1)
                    preds = to_lab(outputs)
                    loss = criterion(outputs, labels.float())

                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()
                        
                #torch.cuda.empty_cache()

                # statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += ((preds == labels.data).sum(1)==6).sum()
                f1l += f1_score(preds.cpu().numpy(), labels.cpu().numpy(), average='macro', zero_division=True) * inputs.size(0)

            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.double() / dataset_sizes[phase]
            epoch_f1 = f1l / dataset_sizes[phase]

            print('{} Loss: {:.4f} Acc: {:.4f} F1: {:.4f}'.format(phase, epoch_loss, epoch_acc, epoch_f1))

            # deep copy the model
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())
                torch.save(model.state_dict(), 'best_model.pth')

        print()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model


In [None]:
model = train_model(model, criterion, optimizer, exp_lr_scheduler, num_epochs=27)

In [None]:
transform_valid()

In [None]:
from glob import glob

valid_image_list = glob('../input/plant-pathology-2021-fgvc8/test_images/*.jpg')

model.eval()
predict_list = []
image_name_list = []
for i, image in tqdm(enumerate(valid_image_list)) :
    image_name = image[48:]
    
    img = read_img(image)
    img = transform_valid()(img)
    
    result_list = torch.FloatTensor(np.zeros((classes))).to(device)
    img = img.to(device)
    img = img.reshape(-1, 3, 224, 224)
    predict = model(img)
    predict = predict.reshape(-1)
    result_list += predict
    
    predict_list.append(torch.argmax(result_list).item())
    image_name_list.append(image_name)
    
predict_list = np.array(predict_list)
image_name_list = np.array(image_name_list)
print(image_name_list)

submission_df = pd.DataFrame()
submission_df['image'] = image_name_list
submission_df['label_id'] = predict_list
submission_df['labels'] = submission_df['label_id'].map(label_dic)
del submission_df['label_id']
submission_df.head()

In [None]:
img.shape

In [None]:
submission_df.to_csv("submission.csv", index = False)