In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

# import os
# for dirname, _, filenames in os.walk('/kaggle/input'):
#     for filename in filenames:
#         print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import glob
import os.path as osp

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

import pickle

import torch
import torch.nn as nn
from torch.optim import Adam, SGD
from torch.utils.data import DataLoader, Dataset

from torchvision import datasets, models
from torchvision.utils import make_grid

import os
from PIL import Image
from IPython.display import display
import tqdm

import warnings
warnings.filterwarnings('ignore')

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
num_classes = 12
pretrained = False
criterion = nn.CrossEntropyLoss()
        
class Config:
    ep1= {
        "img_size" : 224,
        "batch_size" : 64,
        "epoch": 20,
        "optimizer": Adam
    }
    ep2 = {
        "img_size" : 224,
        "batch_size" : 64,
        "epoch" : 20,
        "optimizer": SGD
    }    

img_size, batch_size, epoch, optimizer = Config.ep2.values()

In [None]:
df_train = pd.read_csv("/kaggle/input/plant-pathology-2021-fgvc8/train.csv")
df_sub = pd.read_csv("/kaggle/input/plant-pathology-2021-fgvc8/sample_submission.csv")

In [None]:
#The number of labels
len(df_train.labels.unique())

#The no.values per label
df_train.labels.value_counts()

In [None]:
from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()

In [None]:
def encode_label(df):
    df['encoded_label'] = le.fit_transform(df.labels.values)
    return df

encode_label(df_train)
    
df_labels_idx = df_train.loc[df_train.duplicated(["labels", "encoded_label"])==False]\
                [["encoded_label", "labels"]].set_index("encoded_label").sort_index()
display(df_labels_idx)

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
def make_datapath_list(phase='train', val_size=0.25):
    if phase in ["train", "val"]:
        phase_path = "train_images"
    elif phase in ["test"]:
        phase_path = "test_images"
    else:
        print(f"{phase} not in path")    
        
    if phase == 'train' or phase == 'val':
        rootpath = "/kaggle/input/resized-plant2021/img_sz_640/"
    else:
        rootpath = "/kaggle/input/plant-pathology-2021-fgvc8/test_images/"
    
    target_path = osp.join(rootpath+"/*.jpg")
    path_list = []
    
    for path in glob.glob(target_path):
        path_list.append(path)
        
    if phase in ["train", "val"]:
        train, val = train_test_split(path_list, test_size=val_size, random_state=0, shuffle=True)
        if phase == "train":
            path_list = train
        else:
            path_list = val
    
    return path_list

In [None]:
# train_list = make_datapath_list(phase='train')
# print(f'The length of training set: {len(train_list)}')
# val_list = make_datapath_list(phase='val')
# print(f'The length of valuation set: {len(val_list)}')
test_list = make_datapath_list(phase='test')
print(f'The length of testing set: {len(test_list)}')

In [None]:
import albumentations as A
from albumentations import Compose
from albumentations.pytorch import ToTensorV2
import cv2

In [None]:
transform = {
    'train': Compose([
        A.Rotate(
            always_apply=False, 
            p=0.1, 
            limit=(-68, 178), 
            interpolation=1, 
            border_mode=0, 
            value=(0, 0, 0), 
            mask_value=None
        ),
        A.RandomShadow(
            num_shadows_lower=1, 
            num_shadows_upper=1, 
            shadow_dimension=3, 
            shadow_roi=(0, 0.6, 1, 1), 
            p=0.4
        ),
        A.ShiftScaleRotate(
            shift_limit=0.05, 
            scale_limit=0.05, 
            rotate_limit=15, 
            p=0.6
        ),
        A.RandomFog(
            fog_coef_lower=0.2, 
            fog_coef_upper=0.2, 
            alpha_coef=0.2, 
            p=0.3
        ),
        A.RGBShift(
            r_shift_limit=15, 
            g_shift_limit=15, 
            b_shift_limit=15, 
            p=0.3
        ),
        A.RandomBrightnessContrast(
            p=0.3
        ),
        A.GaussNoise(
            var_limit=(50, 70),  
            always_apply=False, 
            p=0.3
        ),
        A.Resize(
            height=img_size,
            width=img_size,
        ),
        A.CoarseDropout(
            max_holes=5, 
            max_height=5, 
            max_width=5, 
            min_holes=3, 
            min_height=5, 
            min_width=5,
            always_apply=False, 
            p=0.2
        ),
        A.Normalize(
            mean=(0.485, 0.456, 0.406), 
            std=(0.229, 0.224, 0.225)
        ),
        ToTensorV2()
    ]),
    'val': Compose([
        A.Resize(img_size, img_size),
        A.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),            
        ToTensorV2()
    ]),
    'test': Compose([
        A.Resize(img_size, img_size),
        A.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
        ToTensorV2()
    ])
}

In [None]:
class PlantDataset(Dataset):
    """
    Class to create a Dataset
    
    Attributes
    ----------
    df_train : DataFrame
        DataFrame containing the image labels.
    file_list : list
        A list containing the paths to the images
    transform : object
        Instance of the preprocessing class (ImageTransform)
    phase : 'train' or 'val' or 'test'
        Specify whether to use train, validation, or test
    """
    def __init__(self, df_train, file_list, transform=None, phase='train'):
        self.df_train = df_train
        self.df_labels_idx = df_labels_idx
        self.file_list = file_list
        self.transform = transform[phase]
        self.phase = phase
        
    def __len__(self):
        """
        Returns the number of images.
        """
        return len(self.file_list)
    
    def __getitem__(self, index):
        """
        Get data in Tensor format and labels of preprocessed images.
        """
        
        # Load the index number image.
        img_path = self.file_list[index]
        img = Image.open(img_path)
        
        # Preprocessing images
        img = cv2.imread(img_path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img_transformed = self.transform(image=img)
        
        # image name
        image_name = img_path[-20:]
        
        # Extract the labels
        if self.phase in ["train", "val"]:
            label = df_train.loc[df_train["image"]==image_name]["encoded_label"].values[0]
        elif self.phase in ["test"]:
            label = -1
        
        return img_transformed, label, image_name

In [None]:
# train_dataset = PlantDataset(df_train, train_list, transform=transform, phase='train')
# val_dataset = PlantDataset(df_train, val_list, transform=transform, phase='val')
test_dataset = PlantDataset(df_train, test_list, transform=transform, phase='test')

index = 0

# print("【train dataset】")
# print(f"img num : {train_dataset.__len__()}")
# # print(f"img : {train_dataset.__getitem__(index)[0].size()}")
# print(f"label : {train_dataset.__getitem__(index)[1]}")
# print(f"image name : {train_dataset.__getitem__(index)[2]}")

# print("\n【validation dataset】")
# print(f"img num : {val_dataset.__len__()}")
# # print(f"img : {val_dataset.__getitem__(index)[0].size()}")
# print(f"label : {val_dataset.__getitem__(index)[1]}")
# print(f"image name : {val_dataset.__getitem__(index)[2]}")

print("\n【test dataset】")
print(f"img num : {test_dataset.__len__()}")
# print(f"img : {test_dataset.__getitem__(index)[0].size()}")
print(f"label : {test_dataset.__getitem__(index)[1]}")
print(f"image name : {test_dataset.__getitem__(index)[2]}")

In [None]:
# train_dataloader = DataLoader(train_dataset, batch_size=batch_size, num_workers=2,shuffle=True)
# val_dataloader = DataLoader(val_dataset, batch_size=batch_size, num_workers=2, shuffle=False)
test_dataloader = DataLoader(test_dataset, batch_size=batch_size, num_workers=2, shuffle=False)

# to Dictionary
dataloaders_dict = {"test": test_dataloader}

In [None]:
Pkl_Filename = '../input/finalproj/Resmodel50_trained_1fc.pkl'
with open(Pkl_Filename, 'rb') as file:  
    model = pickle.load(file)
    
# class CPU_Unpickler(pickle.Unpickler):
#     def find_class(self, module, name):
#         if module == 'torch.storage' and name == '_load_from_bytes':
#             return lambda b: torch.load(io.BytesIO(b), map_location='cpu')
#         else: return super().find_class(module, name)
        
# model = CPU_Unpickler("../input/final2/Resmodel50_trained_1fc.pkl").load()


In [None]:
class PlantPredictor():
    """
    Class for predicting labels from output results
    
    Attributes
    ----------
    df_labels_idx: DataFrame
        DataFrame that associates INDEX with a label name
    """
    
    def __init__(self, model, df_labels_idx, dataloaders_dict):
        self.model = model
        self.df_labels_idx = df_labels_idx
        self.dataloaders_dict = dataloaders_dict
        self.df_submit = pd.DataFrame()
        
    
    def __predict_max(self, out):
        """
        Get the label name with the highest probability.
        
        Parameters
        ----------
        predicted_label_name: str
            Name of the label with the highest prediction probability
        """
        maxid = np.argmax(out.detach().numpy(), axis=1)
        df_predicted_label_name = self.df_labels_idx.iloc[maxid]
        
        return df_predicted_label_name
    
    def inference(self):
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

        print(f"Devices to be used : {device}")
        df_pred_list = []
        for i, data in enumerate(self.dataloaders_dict['test']):
            image_name = data[2]
            self.model.to(device)
            inputs = data[0]['image']
            inputs = inputs.to(device)
            out = self.model(inputs)
            device = torch.device("cpu")
            out = out.to(device)
            df_pred = self.__predict_max(out).reset_index(drop=True)
            df_pred["image"] = image_name
            df_pred_list.append(df_pred)
            
        self.df_submit = pd.concat(df_pred_list, axis=0)
        self.df_submit = self.df_submit[["image", "labels"]].reset_index(drop=True)

In [None]:
predictor = PlantPredictor(model, df_labels_idx, dataloaders_dict)
predictor.inference()

In [None]:
df_submit = predictor.df_submit.copy()

df_submit.to_csv('submission.csv', index=False)
df_submit