In [None]:
pip install tez

In [None]:
!pip install efficientnet-pytorch

In [None]:
pip show tez

In [None]:
import os
import albumentations #for augmentation
import matplotlib.pyplot as plt
import pandas as pd

import tez
from tez.datasets import ImageDataset
from tez.callbacks import EarlyStopping

import torch
import torch.nn as nn
from efficientnet_pytorch import EfficientNet
from torch.nn import functional as F

import torchvision #required on using pre trained model

from sklearn import metrics, model_selection

%matplotlib inline 
#above command shows just below the code and also gets saved in the document

In [None]:
dfx = pd.read_csv("../input/cassava-leaf-disease-classification/train.csv")

In [None]:
dfx.head()

In [None]:
dfx.label.value_counts() 

* Biased toward class 3

In [None]:
df_train,df_valid = model_selection.train_test_split( 
    dfx,
    test_size = 0.1,
    random_state = 42,
    stratify = dfx.label.values
)

df_train = df_train.reset_index(drop=True)
df_valid = df_valid.reset_index(drop=True)

* We used stratify because there is not equal examples for the various classes wehere we want to classsify so we do it so that the train and test both have equal amount of proportions of the classes (it is used mostly in multi class classification)
* The reset index will be used to add another column with the index values like 0,1,2.....

In [None]:
df_train.head

In [None]:
df_valid.shape

In [None]:
df_train.image_id.values # .values return the values in that column

In [None]:
image_path = "../input/cassava-leaf-disease-classification/train_images"

In [None]:
#now we will make a list to store the address of each image for train and valid

train_image_paths = [
    os.path.join(image_path, x) for x in df_train.image_id.values 
]


valid_image_paths = [
    os.path.join(image_path, x) for x in df_valid.image_id.values
]

* Above we used "list comprehension" other way of writing the same code is

```
train_image_paths = []
for x in df_train.image_id.values:
    train_image_paths.append(os.path.join(image_path, x)) 
    
```    

In [None]:
train_image_paths[:5]

In [None]:
train_targets = df_train.label.values
valid_targets = df_valid.label.values

* we can write down the torch standard way proceeding forward by first defining our dataset but we will now use dataset method from the tez library for that.

In [None]:
train_dataset = ImageDataset(
    image_paths = train_image_paths,
    targets = train_targets,
    augmentations = None 
)

In [None]:
#code copied from the newer example in the tez repo
TRAIN_BATCH_SIZE = 256
VALID_BATCH_SIZE = 256
EPOCHS = 20
IMAGE_SIZE = 256

* The above code will return a dictionary with "image" and "targets" as key 

In [None]:
def plot_img(img_dict):
    
    image_tensor = img_dict["image"] #extracting the tensor inside the image
    target = img_dict["targets"] #extracting the target
    print(target)
    plt.figure(figsize=(5,5))
    image = image_tensor.permute(1,2,0) / 255 
    plt.imshow(image)
    
    

 .permute() rearranges the original tensor according to the desired ordering and returns a new multidimensional rotated tensor

In [None]:
plot_img(train_dataset[45])

In [None]:
#if using tez use albumentations
train_aug = albumentations.Compose(
    
    [
        albumentations.RandomResizedCrop(TRAIN_BATCH_SIZE,TRAIN_BATCH_SIZE), #this is how we want to resize the image
        albumentations.Transpose(p=0.5), #Transpose the input by swapping rows and columns.
        albumentations.HorizontalFlip(p=0.5),
        albumentations.VerticalFlip(p=0.5),
        albumentations.CoarseDropout(p=0.5)
    ]
    
) 

#now we have added the augmentation to the train dataset

train_dataset = ImageDataset(
    image_paths = train_image_paths,
    targets = train_targets,
    augmentations = train_aug 
)

In [None]:
#now we do the same for valid

valid_aug = albumentations.Compose(
    
    [
        albumentations.CenterCrop(TRAIN_BATCH_SIZE,TRAIN_BATCH_SIZE,p=1.0),
        albumentations.Resize(TRAIN_BATCH_SIZE,TRAIN_BATCH_SIZE),
        albumentations.Transpose(p=0.5), 
        albumentations.HorizontalFlip(p=0.5),
        albumentations.VerticalFlip(p=0.5)
    ]
    
) 

valid_dataset = ImageDataset(
    image_paths = valid_image_paths,
    targets = valid_targets,
    augmentations = valid_aug 
)

In [None]:
plot_img(train_dataset[45])

In [None]:
class LeafModel(tez.Model):
    def __init__(self,num_classes,pretrained=True):
        super().__init__()
        self.effnet = EfficientNet.from_pretrained("efficientnet-b3") #from the new code
        self.dropout = nn.Dropout(0.1)
        self.out = nn.Linear(1536,num_classes) #from the new code
        # it has 1536 input and num classes output
        self.step_scheduler_after = "epoch" # we step it after every batch
        
        
    def loss(self, outputs, targets):
        if targets is None:
            return None
        return nn.CrossEntropyLoss()(outputs,targets) #because it is multi class classification
    
    
    def monitor_metrics(self, outputs, targets):
        outputs = torch.argmax(outputs, dim=1).cpu().detach().numpy() #max value in each row 
        #we are taking the argmax of the output
        targets = targets.cpu().detach().numpy()
        acc =  metrics.accuracy_score(targets, outputs)
        
        return {
            "accuracy" : acc
        }
    
    def fetch_optimizer(self):
        opt = torch.optim.Adam(self.parameters(), lr=3e-4) #now it is inheriting from nn.module
        return opt
    
    def fetch_scheduler(self):
        
        sch = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(
            self.optimizer, T_0=10, T_mult=1, eta_min=1e-6, last_epoch=-1
        )
        return sch
    
    def forward(self,image,targets=None):
        ##########
        batch_size, _, _, _ = image.shape

        x = self.effnet.extract_features(image)
        x = F.adaptive_avg_pool2d(x, 1).reshape(batch_size, -1)
        
        ##########
        
        outputs = self.out(self.dropout(x)) #the output is a pass through "out"
        #when using tez the forward function should return three things
        if targets is not None:
            loss = self.loss(outputs, targets)
            mon_metrics =  self.monitor_metrics(outputs, targets)
            
            return outputs, loss, mon_metrics
            
        return outputs, None, None    #if no targets only return outputs
        

In [None]:
model = LeafModel(num_classes= dfx.label.nunique(), pretrained = True)

* Now we look at our model

In [None]:
model

In [None]:
es = EarlyStopping(
    monitor="valid_loss", model_path="model.bin", patience=3, mode="min"
)
#early stopping is when our model starts doing bad so we stop it from going fut=rther in epochs
model.fit(
    train_dataset,
    valid_dataset=valid_dataset,
    train_bs=32,
    valid_bs=64,
    device="cuda",
    epochs=10,
    callbacks=[es],
    fp16=True,
)

In [None]:
model.save("model.bin")