In [None]:
!pip install wtfml==0.0.2
!pip install efficientnet_pytorch

In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os
from PIL import Image

from sklearn import model_selection
from sklearn import metrics

import torch
import torch.nn as nn
from torch.nn import functional as F
import torch.optim as optim
import efficientnet_pytorch

import albumentations as A

from wtfml.utils import EarlyStopping
from wtfml.engine import Engine
from wtfml.data_loaders.image import ClassificationLoader

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
train_dir = '../input/cassava-leaf-disease-classification/train_images'
test_dir = '../input/cassava-leaf-disease-classification/test_images'
t = os.listdir(train_dir)

t1 = os.listdir(test_dir)
print(len(t),len(t1),len(t)+len(t1))

In [None]:
df = pd.read_csv("../input/cassava-leaf-disease-classification/train.csv")
df.head()

In [None]:
label_to_disease = pd.read_json(("../input/cassava-leaf-disease-classification/label_num_to_disease_map.json"), typ='series')
df['disease'] = df['label'].map(label_to_disease)
df.head()

In [None]:
'''check the count of the various disease types'''

#visualization imports
import matplotlib.pyplot as plt
from matplotlib.image import imread
import seaborn as sns
%matplotlib inline


sns.countplot(df['label'])
plt.title('Count of the various disease types in Cassava leaves')
plt.grid()
plt.show()

In [None]:
import pandas as pd
from sklearn import model_selection


# we create a new column called kfold and fill it with -1
df["kfold"] = -1
# the next step is to randomize the rows of the data
df = df.sample(frac=1).reset_index(drop=True)
# fetch targets
y = df.label.values
# initiate the kfold class from model_selection module
kf = model_selection.StratifiedKFold(n_splits=5)
# fill the new kfold column

for f, (t_, v_) in enumerate(kf.split(X=df, y=y)):
    df.loc[v_, 'kfold'] = f
# save the new csv with kfold column
df.to_csv("train_folds.csv", index=False)

In [None]:
df_train_folds =pd.read_csv("train_folds.csv")
df_train_folds

In [None]:
'''check the count after applying Stratified kfold'''

#visualization imports
import matplotlib.pyplot as plt
from matplotlib.image import imread
import seaborn as sns
%matplotlib inline


sns.countplot(df['kfold'])
plt.title('Count of the various disease types in Cassava leaves')
plt.grid()
plt.show()

In [None]:
#done train test split and stratified folds on original train.csv file again.

#reading only first 200rows for experimentation

dfx = pd.read_csv('../input/cassava-leaf-disease-classification/train.csv',nrows =200)
df_train, df_valid = model_selection.train_test_split(
        dfx, test_size=0.1, random_state=42, stratify=dfx.label.values
)
len(df_train),len(df_valid)


In [None]:
#reset the index and than drop the index
df_train = df_train.reset_index(drop=True)
df_valid = df_valid.reset_index(drop=True)
df_train.shape


In [None]:
#join image name with path to make a list of training images

train_images = [os.path.join(train_dir,x) for x in df_train.image_id.values]
train_images[1]

In [None]:
valid_images = [os.path.join(train_dir,x) for x in df_valid.image_id.values]
valid_images[:5]

In [None]:
train_targets = df_train.label.values
valid_targets = df_valid.label.values

In [None]:
train_targets[1]

In [None]:
!pip install tez
import tez
from tez.datasets import ImageDataset
from tez.callbacks import EarlyStopping

In [None]:
import albumentations

train_aug = albumentations.Compose([
            albumentations.RandomResizedCrop(256, 256),
            albumentations.Transpose(p=0.5),
            albumentations.HorizontalFlip(p=0.5),
            albumentations.VerticalFlip(p=0.5),
            albumentations.ShiftScaleRotate(p=0.5),
            albumentations.HueSaturationValue(
                hue_shift_limit=0.2, 
                sat_shift_limit=0.2, 
                val_shift_limit=0.2, 
                p=0.5
            ),
            albumentations.RandomBrightnessContrast(
                brightness_limit=(-0.1,0.1), 
                contrast_limit=(-0.1, 0.1), 
                p=0.5
            ),
            albumentations.Normalize(
                mean=[0.485, 0.456, 0.406], 
                std=[0.229, 0.224, 0.225], 
                max_pixel_value=255.0, 
                p=1.0
            ),
            albumentations.CoarseDropout(p=0.5),
            albumentations.Cutout(p=0.5)], p=1.)
  
        
valid_aug = albumentations.Compose([
            albumentations.CenterCrop(256, 256, p=1.),
            albumentations.Resize(256, 256),
            albumentations.Normalize(
                mean=[0.485, 0.456, 0.406], 
                std=[0.229, 0.224, 0.225], 
                max_pixel_value=255.0, 
                p=1.0
            )], p=1.)

print("hello")

In [None]:

train_dataset = ImageDataset(
    image_paths=train_images,
    targets=train_targets,
    augmentations=train_aug,
)

valid_dataset = ImageDataset(
    image_paths=valid_images,
    targets=valid_targets,
    augmentations=valid_aug,
)

In [None]:
#forward function should return 3 things, if we are using tez.
#multiclass classification problem : loss == crossentropy


class LeafModel(tez.Model):
    def __init__(self, num_classes,pretrained = True):
        super().__init__()

        self.convnet = torchvision.models.resnet18(pretrained= pretrained)

        #changing last fc layer of resnet 18 as it gives 1000 output features and 512 input
        #last layer was Linear and changed layer is also same but output ==num_classes
        self.convnet.fc = nn.Linear(512, num_classes)
        self.step_scheduler_after = "epoch"
        
    def loss(self, outputs, targets):
        if targets is None:
            return None
        return nn.CrossEntropyLoss()(outputs,targets)

    def monitor_metrics(self, outputs, targets):
        if targets is None:
            return {}
        outputs = torch.argmax(outputs, dim=1).cpu().detach().numpy()
        targets = targets.cpu().detach().numpy()
        accuracy = metrics.accuracy_score(targets, outputs)
        return {"accuracy": accuracy}

    def fetch_optimizer(self):
        opt = torch.optim.Adam(self.parameters(), lr=3e-4)
        return opt

    def fetch_scheduler(self):
        sch = torch.optim.lr_scheduler.StepLR(
            self.optimizer, step_size =0.7
        )
        return sch

    #image and targets from dataset
    def forward(self,image,targets =None):
        outputs = self.convnet(image)
        if targets is not None:
            #calculate loss and metrics
            loss = self.loss(outputs, targets)
            mon_metrics=self.monitor_metrics(outputs, targets)
            return outputs, loss, mon_metrics
        return outputs, None, None

In [None]:
import torchvision

model = LeafModel(num_classes = dfx.label.nunique(), pretrained = True)
es = EarlyStopping(
    monitor="valid_accuracy", model_path="model.bin", patience=3, mode="min"
)
model.fit(
    train_dataset,
    valid_dataset=valid_dataset,
    train_bs=32,
    valid_bs=64,
    device=device,
    epochs=50,
    callbacks=[es],
    fp16=True,
)
model.save("model.bin")

In [None]:
model

In [None]:
#It was easy :)