In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from PIL import Image
import cv2
import matplotlib.pyplot as plt
import sklearn
from sklearn.model_selection import StratifiedKFold
from glob import glob

import pytorch_lightning as pl
from pytorch_lightning import Trainer, seed_everything
from pytorch_lightning import Callback
from pytorch_lightning.loggers import CSVLogger
from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping
from pytorch_lightning.metrics.classification import AUROC

import torch
from torch.utils.data import DataLoader, Dataset
from torchvision.models import resnet18

import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2


# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os

RANDOM_SEED = 42
TRAIN_BATCH = 4
WORKING_DIR = "/kaggle/input/plant-pathology-2021-fgvc8/"
MODELS_DIR = '/kaggle/working'
K_FOLD_NUM = 1
CLASSES_NUM = 6

WEIGHT_DECAY = 1e-5
EPOCH_NUM = 20

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# for dirname, _, filenames in os.walk('/kaggle/input'):
#     for filename in filenames:
#         print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
train_df = pd.read_csv(WORKING_DIR + "train.csv")

print(train_df.head())
train_df["labels"].value_counts()

In [None]:
label_name = "scab frog_eye_leaf_spot complex"
train_img_names = train_df.loc[train_df["labels"] == label_name].head().image

subplot_num = 511
for name in train_img_names:
    train_img_path = WORKING_DIR + "train_images/" + name
    print(train_img_path)

    im = cv2.imread(train_img_path) 
#     plt.subplot(subplot_num)
    plt.title(name + " label: " + label_name)
    plt.imshow(im)
    plt.show()
    subplot_num += 1



In [None]:
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

df_folds = train_df[['image']].copy()

df_folds = df_folds.groupby('image').count()
df_folds.loc[:, 'labels'] = train_df[['image', 'labels']].groupby('image').min()['labels']
df_folds.loc[:, 'stratify_group'] = np.char.add(
    df_folds['labels'].apply(lambda x: "_".join(x.split(" "))).values.astype(str), 
    df_folds['labels'].apply(lambda x: len(x.split(" "))).values.astype(str)
)
df_folds.loc[:, 'fold'] = 0

for fold_number, (train_index, val_index) in enumerate(skf.split(X=df_folds.index, y=df_folds['stratify_group'])):
    df_folds.loc[df_folds.iloc[val_index].index, 'fold'] = fold_number

df_folds.reset_index(inplace=True)

fold_number = K_FOLD_NUM
train_df_raw = train_df.copy()


In [None]:
df_folds.loc[df_folds["fold"] == 0]["labels"].value_counts()

In [None]:
valid_df = train_df_raw.loc[train_df_raw['image'].isin(df_folds[df_folds['fold'] == fold_number].image)].copy()
train_df = train_df_raw.loc[train_df_raw['image'].isin(df_folds[df_folds['fold'] != fold_number].image)].copy()

train_df.reset_index(drop=True, inplace=True)
valid_df.reset_index(drop=True, inplace=True)

In [None]:
train_df.labels.value_counts()

In [None]:
class PlantDataset(Dataset):
    def __init__(self, df, dir_path, training=True):
        
        self.dir_path = dir_path
        self.df = df
        self.img_ids = self.df.image.unique()
        self.training = training
        
    def __getitem__(self, index):
        img_id = self.img_ids[index]
        target = self.df[self.df["image"] == img_id].labels.iloc[0]
        target = self.encode_target(target)
        
        image = cv2.imread(self.dir_path + img_id, cv2.IMREAD_COLOR)
        
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB).astype(np.float32)
         
        image = self.transform()(image=image)["image"]
        
        return {"image": image, "target": torch.FloatTensor(target), "img_id": img_id}
    
    def get_by_id(self,img_id):
        index = np.where(self.img_ids == img_id)[0][0]
        return self.__getitem__(index)
    
    def transform(self):
        if self.training:
            transforms = A.Compose([              
                A.Cutout(num_holes=8, max_h_size=32, max_w_size=32, fill_value=0, p=0.5),
                A.Blur(p=0.5),
                A.Resize(512,512),
                A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
                ToTensorV2(p=1.0) 
            ])
        else:
            transforms = A.Compose([
                A.Resize(512,512),
                A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
                ToTensorV2(p=1.0) 
            ])
            
        return transforms
    
    def encode_target(self, target):
#         scab healthy frog_eye_leaf_spot rust complex powdery_mildew    
        encoded = list(map(int, ['scab' in target, "healthy" in target, "frog_eye_leaf_spot" in target, "rust" in target,  "complex" in target, "powdery_mildew" in target]))
        
        return encoded
    
    def __len__(self):
        return self.img_ids.shape[0]
       

In [None]:
# train_df.count()
# train_path = WORKING_DIR + "train_images/"
train_path = "/kaggle/input/resized-plant2021/img_sz_512/"
train_dataset = PlantDataset(train_df, train_path)
valid_dataset = PlantDataset(valid_df, train_path, training=False)

# sample = train_dataset[0]
# print(sample)
# # print(torch.histc(train_dataset[3]["image"][0]))
# plt.hist(sample["image"][0])
# plt.show()
# plt.hist(sample["image"][1])
# plt.show()
# plt.hist(sample["image"][2])
# plt.show()

In [None]:
subplot_num = 511

# train_iter = iter(train_dataset)
for i in range(3):
    img_dict = train_dataset[i]

    img = img_dict["image"]
#     print(img)
    name = img_dict["img_id"]

    train_img_path = WORKING_DIR + "train_images/" + name
    print(img, train_img_path)

    im = cv2.imread(train_img_path) 
    plt.figure(figsize=(64,64))
    plt.subplot(subplot_num)
    plt.title(name )
    plt.imshow(img.permute(1, 2, 0 ).numpy())
#     plt.imshow(img.permute(1, 2, 0 ))
#     plt.show()
    subplot_num += 1
# plt.show()

In [None]:
train_dataset.get_by_id("8002cb321f8bfcdf.jpg")

In [None]:
# def collate_fn(batch):
#     return tuple(zip(*batch))

train_data_loader = DataLoader(
    train_dataset,
    batch_size=TRAIN_BATCH,
    shuffle=True,
    num_workers=4,
#     collate_fn=collate_fn
)

valid_data_loader = DataLoader(
    valid_dataset,
    batch_size=TRAIN_BATCH,
    shuffle=False,
    num_workers=4,
#     collate_fn=collate_fn
)
# x, y, z = next(iter(train_data_loader))
# print(x,y,z)

In [None]:
### Lightning usage

class LitModel(pl.LightningModule):
    def __init__(self, model):
        super(LitModel, self).__init__()
        self.model = model
        self.metric = pl.metrics.F1(CLASSES_NUM=CLASSES_NUM)
        self.criterion = torch.nn.BCEWithLogitsLoss()
        self.lr = 5e-3
        self.threshold = 0.5
        
    def forward(self, x, *args, **kwargs):
        return self.model(x)
    
    def configure_optimizers(self):
        self.optimizer = torch.optim.Adam(self.parameters(), lr=self.lr,weight_decay=WEIGHT_DECAY)
        self.scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(self.optimizer, T_max=20, eta_min=1e-6)
        
        return {"optimizer": self.optimizer, "lr_scheduler": self.scheduler}
    
    def training_step(self, batch, batch_idx):
        image = batch["image"]
        target = batch["target"]
        
        output = self.model(image)
        loss = self.criterion(output, target)
        metric = self.metric(output, target)
        
        logs = {"training_loss": loss, "train_f1": metric, "lr": self.optimizer.param_groups[0]["lr"]}
        
        self.log_dict(logs, on_step=False, on_epoch=True, prog_bar=True, logger=True)
        return loss
    
    def validation_step(self, batch, batch_idx):
        image = batch["image"]
        target = batch["target"]
        
        output = self.model(image)
        loss = self.criterion(output, target)
        logits = torch.nn.Sigmoid()(output.detach())
        preds = logits > self.threshold
        preds = preds.double()
        
#         if torch.any(torch.all(preds == 0, dim=1)):
#             preds = self.fill_zero_preds(logits, preds)
                
        metric = self.metric(preds, target)
        
        logs = {"valid_loss": loss, "valid_f1": metric}
        
        self.log_dict(logs, on_step=False, on_epoch=True, prog_bar=True, logger=True)
        return logs
    
    def test_step(self, batch, batch_idx):
        metrics = self.validation_step(batch, batch_idx)
        metrics = {'test_f1': metrics['valid_f1'], 'test_loss': metrics['valid_loss']}
        self.log_dict(metrics)
        
    def fill_zero_preds(self, logits, preds):       
        idx = torch.argmax(preds, dim=-1)
        mask = torch.zeros(preds.shape)
        mask[(torch.arange(preds.shape[0]), idx)] = 1

        result = mask + preds
        preds_no_zeros = torch.where(result > 1., 1., result.double())
            
        return preds_no_zeros

In [None]:
model = resnet18(pretrained=True)
# dir(model)

In [None]:
model.add_module(name="fc", module=torch.nn.Linear(in_features=512, out_features=6, bias=True))

print(next(model.modules()))


In [None]:
# TESTING
# path = "../input/pp21-resnet-e10-fold1/pp21_resnet_e10_fold1--valid_f18661.ckpt"
# lit_model = LitModel(model).load_from_checkpoint(path,model=model)

# logger = CSVLogger(save_dir='logs_test/', name="Resnet")
# trainer = Trainer(deterministic=True, logger=logger)
# trainer.test(model=lit_model,test_dataloaders=valid_data_loader,verbose=True)

In [None]:
lit_model = LitModel(model)

In [None]:
logger = CSVLogger(save_dir='logs/', name="Resnet")
checkpoint_callback = ModelCheckpoint(monitor='valid_loss',
                                      save_top_k=1,
                                      save_last=True,
                                      save_weights_only=True,
                                      filename='checkpoint/{fold:02d}-{epoch:02d}-{valid_loss:.4f}-{valid_f1:.4f}',
                                      verbose=False,
                                      mode='min')

trainer = Trainer(
    max_epochs=EPOCH_NUM,
    gpus=1,
    accumulate_grad_batches=1,
    precision=16,
    # callbacks=[EarlyStopping(monitor='valid_loss', patience=3, mode='min')],
    checkpoint_callback=checkpoint_callback,
    logger=logger,
    weights_summary='top',
    profiler="simple"
)

In [None]:
trainer.fit(lit_model, train_dataloader=train_data_loader, val_dataloaders=valid_data_loader)

In [None]:
metrics = pd.read_csv(f'{trainer.logger.log_dir}/metrics.csv')

train_acc = metrics['train_f1'].dropna().reset_index(drop=True)
valid_acc = metrics['valid_f1'].dropna().reset_index(drop=True)
    
fig = plt.figure(figsize=(7, 6))
plt.grid(True)
plt.plot(train_acc, color="r", marker="o", label='train/f1')
plt.plot(valid_acc, color="b", marker="x", label='valid/f1')
plt.ylabel('F1', fontsize=24)
plt.xlabel('Epoch', fontsize=24)
plt.legend(loc='lower right', fontsize=18)
plt.savefig(f'{trainer.logger.log_dir}/f1.png')

train_loss = metrics['training_loss'].dropna().reset_index(drop=True)
valid_loss = metrics['valid_loss'].dropna().reset_index(drop=True)

fig = plt.figure(figsize=(7, 6))
plt.grid(True)
plt.plot(train_loss, color="r", marker="o", label='train/loss')
plt.plot(valid_loss, color="b", marker="x", label='valid/loss')
plt.ylabel('Loss', fontsize=24)
plt.xlabel('Epoch', fontsize=24)
plt.legend(loc='upper right', fontsize=18)
plt.savefig(f'{trainer.logger.log_dir}/loss.png')\

lr = metrics['lr'].dropna().reset_index(drop=True)

fig = plt.figure(figsize=(7, 6))
plt.grid(True)
plt.plot(lr, color="g", marker="o", label='learning rate')
plt.ylabel('LR', fontsize=24)
plt.xlabel('Epoch', fontsize=24)
plt.legend(loc='upper right', fontsize=18)
plt.savefig(f'{trainer.logger.log_dir}/lr.png')