In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import cv2
import glob
import matplotlib.pyplot as plt
import gc
import albumentations as A
import torchmetrics
import seaborn as sns
from torch.utils.data import Dataset, DataLoader
import torch
import torchvision
from albumentations.pytorch import ToTensorV2
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.model_selection import train_test_split
import torchvision.models as models

In [None]:
torch.cuda.empty_cache()
gc.collect()
DEBUG = False
DIMENTION = (256, 171) # image size
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

train_data = pd.read_csv('/kaggle/input/plant-pathology-2021-fgvc8/train.csv')
# train_data = train_data.head(300)
test_images_path = '/kaggle/input/plant-pathology-2021-fgvc8/test_images/'
test_images_names = glob.glob(test_images_path + '*.jpg')

train_data['labels'] = train_data['labels'].apply(lambda string: string.split(' ')) # Create a list of labels
s = list(train_data['labels'])
mlb = MultiLabelBinarizer() 
train_labels = pd.DataFrame(mlb.fit_transform(s), columns=mlb.classes_, index=train_data.index) # onehot encoding the labels in the list in 6 columns

labels_size = len(train_labels.columns)

In [None]:
class PlantDataSet(Dataset):
    def __init__(self, dataset, images_path, transform=None):
        super(PlantDataSet, self).__init__() 
        self.dataset = dataset
        self.images_path = images_path
        self.transform = transform

    def __getitem__(self, idx):  
        if self.images_path is not None:
            image = cv2.imread(self.images_path+self.dataset.image[idx])
            labels = torch.tensor(self.dataset.loc[idx, self.dataset.columns != 'image'].tolist())
        else:
            image = cv2.imread(self.dataset[idx])
            labels = np.array([])
            
        image  = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        image = cv2.resize(image, DIMENTION)
        
        if self.transform:
            image = self.transform(image=image)['image']
        
        return image, labels

    def __len__(self):
        return len(self.dataset)

In [None]:
transform = A.Compose([A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
                       ToTensorV2()])
test_dataset = PlantDataSet(test_images_names, None, transform)
BS = 30
plants_test_data_loader = DataLoader(dataset=test_dataset, batch_size=BS, shuffle=False)

In [None]:
def test(test_dataloader, model):
    predictions = None
    model.eval()
    model = model.to(device)
    with torch.no_grad():
        for i, (images, _) in enumerate(test_dataloader):
            images = images.float()    
            images = images.to(device)
            
            output = model(images)
            probabilities = torch.sigmoid(output)
            if i == 0:
                predictions = probabilities.detach().cpu().numpy()
            else:
                predictions = np.concatenate((predictions, probabilities.detach().cpu().numpy()), axis=0)

            del images
            torch.cuda.empty_cache()
            gc.collect()
            
    return np.array(predictions)

In [None]:
def create_submission(test_images_path ,predictions):
    submission_df = pd.DataFrame(columns=['image', 'labels'])
    for image_name, prediction in zip(test_images_path, predictions):
        name = image_name.split('/')[-1]
        arr = [name for pred, name in zip(prediction, train_labels.columns) if pred > 0.75]
        if len(arr) == 0:
            arr = ['healthy']
            
        prediction_labels = " ".join(arr)

        row = pd.DataFrame([[name, prediction_labels]], columns=['image', 'labels'])
        submission_df = submission_df.append(row)
                

    submission_df = submission_df.reset_index(drop=True)
    display(submission_df)
    submission_df.to_csv('submission.csv', index=False)

# ResNet50 Inference Submission

In [None]:
# Load model
# resnet50 = models.resnet50(pretrained=False, num_classes=labels_size)
# resnet50.load_state_dict(torch.load('../input/resnet50-final/resnet50_final.pth'))

In [None]:
# test_predictions = test(plants_test_data_loader, resnet50)
# print(test_predictions)
# create_submission(test_images_names, test_predictions)

# ResNext50_32x4d Inference Submission

In [None]:
# Load model
resnext50 = models.resnext50_32x4d(pretrained=False, num_classes=labels_size)
resnext50.load_state_dict(torch.load('../input/resnext50-32x4d-final/resnext50_32x4d_final.pth'))

In [None]:
test_predictions = test(plants_test_data_loader, resnext50)
print(test_predictions)
create_submission(test_images_names, test_predictions)