In [None]:
import time
import torchvision
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms
from torch.optim import lr_scheduler
import torchvision , torchvision.models as mdl
import os , cv2
from tqdm import tqdm_notebook as tqdm
import pandas as pd
from PIL import Image, ImageFile
from torch.utils.data import Dataset , DataLoader
import torch
import numpy as np


import albumentations as A
device = torch.device("cuda:0")

In [None]:
dataf = pd.read_csv("../input/cassava-leaf-disease-classification/train.csv")

In [None]:
eval_data = pd.DataFrame()
for label in [0,1,2,3,4]:
    sample = dataf[dataf["label"]== label].head(100)
    eval_data = pd.concat([eval_data , sample] , sort = False )
train_data = dataf.drop(eval_data.index)
train_data = train_data.reset_index(drop = True)
eval_data  = eval_data.reset_index(drop  = True)


class_3_idx = train_data[train_data["label"] == 3].sample(9000).index
train_data =train_data.drop(class_3_idx).reset_index(drop = True)
print(train_data.shape , eval_data.shape)
train_data["label"].value_counts()

In [None]:
image_size = 256
selected_aug = A.Compose([A.RandomCrop(height = 500, width = 500 ) ,
                          A.Transpose(p=0.3) , 
                          A.VerticalFlip(p=0.5),
                          A.HorizontalFlip(p=0.5),
                          A.RandomContrast(limit=0.05, p=0.5),
                          A.OneOf([ A.MedianBlur(blur_limit=3),
                                    #A.GaussianBlur(blur_limit=3),
                                    A.GaussNoise(var_limit=(5.0, 30.0)) ,], p=0.6),
                          A.OneOf([ A.OpticalDistortion(distort_limit=0.7), 
                                    A.GridDistortion(num_steps=2, distort_limit=0.2),
                                    A.ElasticTransform(alpha=3),  ], p=0.7),
                          A.CLAHE(clip_limit=4.0, p=0.7),
                          A.HueSaturationValue(hue_shift_limit=10, sat_shift_limit=20, val_shift_limit=10, p=0.5) , 
                          A.ShiftScaleRotate(shift_limit=0.1, scale_limit=0.1, rotate_limit=15, border_mode=0, p=0.8),
                          A.Cutout(max_h_size=int(image_size * 0.2), max_w_size=int(image_size * 0.2), num_holes=1, p=0.5), 
                          A.Cutout(max_h_size=int(image_size * 0.1), max_w_size=int(image_size * 0.1), num_holes=3, p=0.5), 
                          A.Resize(image_size ,image_size )])

In [None]:
class Cassava_dataset(Dataset):

    def __init__(self, dataframe , transfrm):

        self.data = dataframe
        self.transform = transfrm

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        img_path = "../input/cassava-leaf-disease-classification/train_images"
        img_name = os.path.join( img_path ,self.data.loc[idx, 'image_id'] )
        im_bgr = cv2.imread(img_name)
        im_rgb = im_bgr[:, :, ::-1]
#         image = Image.open(img_name)
        image = self.transform(image=im_rgb)
        #image = image.resize((256, 256), resample=Image.BILINEAR)
        label = torch.tensor(self.data.loc[idx, 'label'])
        return {'image': transforms.ToTensor()(image["image"]),
                'label': label
                }

In [None]:
class MY_RESNet34(nn.Module):
    def __init__(self):
        super(MY_RESNet34,self).__init__()
        self.model = mdl.resnet34(pretrained = False)
        self.model.conv1 = nn.Conv2d(3, 64, kernel_size=(3, 3), stride=(2, 2), bias=False)
        
        self.fc1 = nn.Linear(1000,5)
        
    def forward(self,x):
        x = self.model(x)
        x1 = self.fc1(x)
        return x1


In [None]:
model = MY_RESNet34().to(device)
optimizer = optimizer = torch.optim.Adam(model.parameters(), lr=4e-4)
#scheduler = torch.optim.lr_scheduler.CyclicLR(optimizer, base_lr=1e-4, max_lr=0.05)
criterion = nn.CrossEntropyLoss()
batch_size = 32

epochs = 40

In [None]:
losses = []
accs = []
for epoch in range(epochs):
    train_image = Cassava_dataset(train_data , selected_aug)
    train_loader = DataLoader(train_image,batch_size=batch_size,shuffle=True)
    
    print('epochs {}/{} '.format(epoch,epochs))
    running_loss = 0.0
    running_acc = 0.0
    for idx, data_t in enumerate(train_loader):
        if idx % 50 == 0 :
            print(epoch , idx)
        input_img = data_t["image"].to(device)
        label = data_t["label"].to(device)
        #print("check data size " ,input_img.shape , label.shape)
        optimizer.zero_grad()
        output= model(input_img)
        loss = criterion(output,label)
        running_loss += loss
        running_acc += (output.argmax(1)==label).float().mean()
    
        (loss).backward()
        optimizer.step()
    #scheduler.step()
    losses.append(running_loss/len(train_loader))
    accs.append(running_acc/(len(train_loader)))
    print('acc : {:.2f}%'.format(running_acc/(len(train_loader))))
    print('loss : {:.4f}'.format(running_loss/len(train_loader)))
    
    if epoch in [0, 1 , 2 , 3 , 5 ,10 , 20 , 29 , 35, 39 ]:
        model.eval()
        for idd in eval_data.index :
            img_name = eval_data.loc[idd , "image_id"]
            img_path = "../input/cassava-leaf-disease-classification/train_images/" + img_name
            img = Image.open(img_path)
            image = img.resize((256, 256), resample=Image.BILINEAR)
            image = transforms.ToTensor()(image).cuda()
            image = image.reshape(-1,3,256,256)
            eval_data.loc[idd , "pred"] =int( model(image).argmax().item())
        actual_labels =  eval_data["label"].values
        predictions   = eval_data["pred"].values.astype(int)
        print("accuracy  =" ,(actual_labels == predictions).sum()/500)
        
        cmt = torch.zeros(5, 5, dtype=torch.int32)
        for i in range(len(actual_labels)):
            cmt[actual_labels[i], predictions[i]] += 1
        print(cmt)
        model.train()

In [None]:
submission = pd.read_csv("../input/cassava-leaf-disease-classification/sample_submission.csv")

model.eval()
for idd in submission.index :
    img_name = submission.loc[idd , "image_id"]
    img_path = "../input/cassava-leaf-disease-classification/test_images/" + img_name
    img = Image.open(img_path)
    image = img.resize((256, 256), resample=Image.BILINEAR)
    image = transforms.ToTensor()(image).cuda()
    image = image.reshape(-1,3,256,256)
    submission.loc[idd , "label"] = int( model(image).argmax().item())
    submission["label"] = submission["label"].astype(int)
submission.to_csv("submission.csv" , index = False)
submission.shape