In [None]:
import glob
import os.path as osp

import pandas as pdN
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

from tqdm.notebook import tqdm
from PIL import Image

import matplotlib.pyplot as plt
%matplotlib inline

import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data as data
import torchvision
from torchvision import models, transforms
import cv2
import albumentations as A

# Import Data

In [None]:
import pandas as pd
df_train = pd.read_csv("/kaggle/input/plant-pathology-2021-fgvc8/train.csv")
df_sub = pd.read_csv("/kaggle/input/plant-pathology-2021-fgvc8/sample_submission.csv")

In [None]:
d_set = set()
for k in df_train.labels.unique():
    d_set = d_set | set(k.split(" "))
print(f"num of labels: {len(d_set)}  {d_set}")

In [None]:
def to_label(df):
    """
    Function for Label encoding.
    """
    le = LabelEncoder()
    df["labels_n"] = le.fit_transform(df.labels.values)
    return df

df_train = to_label(df_train)
df_labels_idx = df_train.loc[df_train.duplicated(["labels", "labels_n"])==False]\
                [["labels_n", "labels"]].set_index("labels_n").sort_index()
display(df_labels_idx)

In [None]:
! ls /kaggle/input/finetuningmodelzoo

In [None]:
TEST_IMAGE_PATH = '../input/plant-pathology-2021-fgvc8/test_images'

In [None]:
def make_datapath_list(phase="train", val_size=0.25):
    """
    Function to create a PATH to the data.
    
    Parameters
    ----------
    phase : 'train' or 'val' or 'test'
        Specify whether to use Train data or test data.
    val_size : float
        Ratio of validation data to train data
        
    Returns
    -------
    path_lsit : list
        A list containing the PATH to the data.
    """
    
    if phase in ["train", "val"]:
        phase_path = "train_images"
    elif phase in ["test"]:
        phase_path = "test_images"
    else:
        print(f"{phase} not in path")
    rootpath = "/kaggle/input/plant-pathology-2021-fgvc8/"
#     rootpath = "/kaggle/input/resized-plant2021/img_sz_256/"
    target_path = osp.join(TRAIN_IMAGE_PATH , '*.jpg') if  phase in ['train', 'val'] else osp.join(TEST_IMAGE_PATH, "*.jpg")

    path_list = []
    
    for path in glob.glob(target_path):
        path_list.append(path)
        
    if phase in ["train", "val"]:
        train, val = train_test_split(path_list, test_size=val_size, random_state=0, shuffle=True)
        if phase == "train":
            path_list = train
        else:
            path_list = val
    
    return path_list

In [None]:
class ImageTransform():
    """
    Class for image preprocessing.
    
    Attributes
    ----------
    resize : int
        224
    mean : (R, G, B)
        Average value for each color channel
    std : (R, G, B)
        Standard deviation for each color channel
    """
    
    def __init__(self, resize, mean, std):
        self.data_transform = {
#             'train': A.Compose(albumentation_list),
            'train': transforms.Compose([
                transforms.Resize(resize),
                transforms.RandomResizedCrop(resize, scale=(0.5, 1.0)),
                transforms.RandomHorizontalFlip(),
                transforms.RandomPerspective(),
                transforms.ToTensor(),
#                 transforms.RandomRotation(),
                transforms.Normalize(mean, std)
            ]),
            'val': transforms.Compose([
                transforms.Resize(resize),
                transforms.CenterCrop(resize),
                transforms.ToTensor(),
                transforms.Normalize(mean, std)
            ]),
            'test': transforms.Compose([
                transforms.Resize(resize),
                transforms.CenterCrop(resize),
                transforms.ToTensor(),
                transforms.Normalize(mean, std)
            ])
        }
    
    def __call__(self, img, phase="train"):
        """
        Parameters
        ----------
        phase: 'train' or 'val' or 'test'
            Specify the mode of preprocessing
        """
        
        return self.data_transform[phase](img)
#         return self.data_transform[phase](image=img).get('image')

In [None]:
class PlantDataset(data.Dataset):
    """
    Class to create a Dataset
    
    Attributes
    ----------
    df_train : DataFrame
        DataFrame containing the image labels.
    file_list : list
        A list containing the paths to the images
    transform : object
        Instance of the preprocessing class (ImageTransform)
    phase : 'train' or 'val' or 'test'
        Specify whether to use train, validation, or test
    """
    def __init__(self, df_train, file_list, transform=None, phase='train'):
        self.df_train = df_train
        self.df_labels_idx = df_labels_idx
        self.file_list = file_list
        self.transform = transform
        self.phase = phase
   
    def __len__(self):
        """
        Returns the number of images.
        """
        return len(self.file_list)
    
    def __getitem__(self, index):
        """
        Get data in Tensor format and labels of preprocessed images.
        """
        #print(index)
        
        # Load the index number image.
        img_path = self.file_list[index]
        img = Image.open(img_path)
#         img = cv2.imread(img_path)
#         img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        
        # Preprocessing images
        img_transformed = self.transform(img, self.phase)
        
        # image name
        image_name = img_path[-20:]
        
        # Extract the labels
        if self.phase in ["train", "val"]:
            label = df_train.loc[df_train["image"]==image_name]["labels_n"].values[0]
        elif self.phase in ["test"]:
            label = -1
        
        return img_transformed, label, image_name

In [None]:
size=224
mean = (0.485, 0.456, 0.406)
std = (0.229, 0.224, 0.225)
test_list = make_datapath_list(phase="test")
test_dataset = PlantDataset(df_train, test_list, transform=ImageTransform(size, mean, std), phase='test')

In [None]:
batch_size = 128

# Create DataLoader
test_dataloader = data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# to Dictionary
dataloaders_dict = {"test": test_dataloader}

# Operation check
#batch_iterator = iter(dataloaders_dict["train"])
#inputs, labels = next(batch_iterator)
#print(inputs.size())  # torch.Size([3, 3, 224, 224]) : [batch_size, Channel, H, W]
#print(labels)

# Load Model

In [None]:
SELECTION_MODEL = 'VGG16' # AlexNet, VGG16, DenseNet

In [None]:
if SELECTION_MODEL == 'AlexNet':
    net = models.alexnet(pretrained=False)
    net.classifier[6] = nn.Linear(in_features=4096, out_features=12)

In [None]:
if SELECTION_MODEL == 'VGG16':
    net = models.vgg16(pretrained=False)
    net.classifier[6] = nn.Linear(in_features=4096, out_features=12)

In [None]:
if SELECTION_MODEL == 'DenseNet':
    net = models.densenet161(pretrained=False)
    prev_out_feature = net.classifier.in_features
    
    new_last_layer = [
        nn.Linear(prev_out_feature, 2208),
        nn.ReLU(inplace = True),
        nn.Dropout(p=0.5, inplace=False),
        
        nn.Linear(2208, 1104),
        nn.ReLU(inplace = True),
        nn.Dropout(p=0.5, inplace=False),
        
        nn.Linear(1104, 552),
        nn.ReLU(inplace = True),
        nn.Dropout(p=0.2, inplace=False),
        
        nn.Linear(552, 138),
        nn.ReLU(inplace = True),
        nn.Dropout(p=0.2, inplace=False),
        
        nn.Linear(138, 12),
    ]
    
    net.classifier = nn.Sequential(*new_last_layer)

In [None]:
load_path = ''
if SELECTION_MODEL == 'DenseNet':
    load_path = "../input/finetuningmodelzoo/densenet_second10epoch_fine_tuning_v1.h"
elif SELECTION_MODEL == 'AlexNet':
    load_path = '/kaggle/input/finetuningmodelzoo/alexnet_final200epoch_fine_tuning_v1.h'
elif SELECTION_MODEL == 'VGG16':
    load_path = '../input/finetuningmodelzoo/vgg16_final-50epoch_fine_tuning_v1.h'

In [None]:
if torch.cuda.is_available():
    load_weights = torch.load(load_path)
    net.load_state_dict(load_weights)
else:
    load_weights = torch.load(load_path, map_location={"cuda:0": "cpu"})
    net.load_state_dict(load_weights)

In [None]:
class PlantPredictor():
    """
    Class for predicting labels from output results
    
    Attributes
    ----------
    df_labels_idx: DataFrame
        DataFrame that associates INDEX with a label name
    """
    
    def __init__(self, net, df_labels_idx, dataloaders_dict):
        self.net = net
        self.df_labels_idx = df_labels_idx
        self.dataloaders_dict = dataloaders_dict
        self.df_submit = pd.DataFrame()
        
    
    def __predict_max(self, out):
        """
        Get the label name with the highest probability.
        
        Parameters
        ----------
        predicted_label_name: str
            Name of the label with the highest prediction probability
        """
        maxid = np.argmax(out.detach().numpy(), axis=1)
        df_predicted_label_name = self.df_labels_idx.iloc[maxid]
        
        return df_predicted_label_name
    
    def inference(self):
        device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
        print(f"Devices to be used : {device}")
        df_pred_list = []
        for inputs, _, image_name in tqdm(self.dataloaders_dict['test']):
            device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
            self.net.to(device)
            inputs = inputs.to(device)
            out = self.net(inputs)
            device = torch.device("cpu")
            out = out.to(device)
            df_pred = self.__predict_max(out).reset_index(drop=True)
            df_pred["image"] = image_name
            df_pred_list.append(df_pred)
            
        self.df_submit = pd.concat(df_pred_list, axis=0)
        self.df_submit = self.df_submit[["image", "labels"]].reset_index(drop=True)

In [None]:
predictor = PlantPredictor(net, df_labels_idx, dataloaders_dict)
predictor.inference()

In [None]:
df_submit = predictor.df_submit.copy()
df_submit.to_csv("/kaggle/working/submission.csv", index=False)