In [None]:
from glob import glob
from sklearn.model_selection import GroupKFold, StratifiedKFold
import cv2
from skimage import io
import torch
from torch import nn
import os
from datetime import datetime
import time
import random
import cv2
import torchvision
from torchvision import transforms
import pandas as pd
import numpy as np
from tqdm import tqdm

import matplotlib.pyplot as plt
from torch.utils.data import Dataset,DataLoader
from torch.utils.data.sampler import SequentialSampler, RandomSampler
from torch.cuda.amp import autocast, GradScaler
from torch.nn.modules.loss import _WeightedLoss
import torch.nn.functional as F

#import timm

import sklearn
import warnings
import joblib
from sklearn.metrics import roc_auc_score, log_loss
from sklearn import metrics
import warnings
import cv2
import pydicom
#from efficientnet_pytorch import EfficientNet
from scipy.ndimage.interpolation import zoom
from PIL import Image
import albumentations
import gc

In [None]:
train_csv = pd.read_csv("../input/cassava-leaf-disease-classification/train.csv")
train_csv.shape

In [None]:
train_csv = train_csv[:1000]

In [None]:
CFG = {
    'fold_num': 5,
    'seed': 719,
    'model_arch': 'tf_efficientnet_b4_ns',
    'img_size': 512,
    'epochs': 10,
    'train_bs': 16,
    'valid_bs': 32,
    'T_0': 10,
    'lr': 1e-4,
    'min_lr': 1e-6,
    'weight_decay':1e-6,
    'num_workers': 4,
    'accum_iter': 2, # suppoprt to do batch accumulation for backprop with effectively larger batch size
    'verbose_step': 1,
    'device': 'cuda:0'
}

# **Helper function**

In [None]:
def seed_everything():
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True
    
    
def get_img(path):
    im_brg = cv2.imread(path)

In [None]:
img = cv2.imread("../input/cassava-leaf-disease-classification/train_images/1000201771.jpg")
plt.imshow(img)

In [None]:
img2 = "../input/cassava-leaf-disease-classification/train_images/1000201771.jpg"
img2 = Image.open(img2)
img2

In [None]:
transform = transforms.Compose([transforms.Resize((256,256))])


In [None]:
im = transform(img2)

In [None]:
im

In [None]:
from sklearn.preprocessing import OneHotEncoder

In [None]:
target = train_csv.label

In [None]:
target = pd.get_dummies(train_csv.label)

In [None]:
np.array(target)

In [None]:
train_csv = train_csv[:1000] 

In [None]:
image_path = "../input/cassava-leaf-disease-classification/train_images/"

In [None]:
from sklearn import model_selection
from sklearn import ensemble

In [None]:
train_csv["kfold"] = -1

train_csv = train_csv.sample(frac=1).reset_index(drop= True)
y = train_csv.label

kf = model_selection.StratifiedKFold(n_splits=5)

for f, (t_,v_) in enumerate(kf.split(X=train_csv, y=y)):
    train_csv.loc[v_, 'kfold'] = f

In [None]:
train_csv

In [None]:
img = Image.open("../input/cassava-leaf-disease-classification/train_images/1000201771.jpg")

In [None]:
img = np.array(img)

In [None]:
img = np.transpose(img, (2,0,1)).astype(np.float32)

In [None]:
img.shape

In [None]:
import torch
import torchvision

class ClassificationDataset:
    def __init__(self, image_path, targets, augmentations = None, transforms = None):
        self.image_path = image_path
        self.targets = targets
       # self.resize = resize
        self.augmentations = augmentations
        self.transforms = transforms
        
        
    def __len__(self):
        return len(self.image_path)
    
    def __getitem__(self, item):
        
        image = Image.open(self.image_path[item])
        
        targets = self.targets[item]
        
#         targets = pd.get_dummies(targets)
#         targets = np.array(targets)
        
        if self.transforms is not None:
            
            transform = self.transforms(image=image)
            image = transform["image"]
            
            
            

            
        image = np.array(image)
        
        if self.augmentations is not None:
            
            augment = self.augmentations(image = image)
            image = augment["image"]
            
        image = np.transpose(image, (2,0,1)).astype(np.float32)
        
        
        return {
            
            "image": torch.tensor(image, dtype = torch.float),
            "target": torch.tensor(targets, dtype = torch.long),
        }

In [None]:
images = train_csv.image_id.values.tolist()
data_path = "../input/cassava-leaf-disease-classification/train_images/"
images = [
    os.path.join(data_path,i) for i in images
]

#targets = train_csv.label.values

targets = pd.get_dummies(train_csv.label)
targets = targets.values

mean = (0.485, 0.456, 0.406)
std = (0.229, 0.229, 0.225)

aug = albumentations.Compose(
    [  
        albumentations.Normalize(
        mean, std, max_pixel_value=255.0,always_apply=True
        ),
         albumentations.Resize(height=256, width=256, always_apply=True)
    ], p= 1.
)


transform = transforms.Compose([transforms.Resize((256,256))])

train_dataset = ClassificationDataset(
            image_path=images,
            targets=targets,
            #transforms=transform,
            augmentations=aug,
#             transforms=transform(),
    )


In [None]:
train_loader = torch.utils.data.DataLoader(train_dataset,
                                          batch_size = 10,
                                          shuffle = True,
                                          num_workers = 4)

In [None]:
import torch.nn as nn

def train(data_loader, model, optimizer, device, loss_output=0):
    
    model.train()
    
   
    
    for data in data_loader:
        inputs = data["image"]
        targets = data["target"]
        
        inputs = inputs.to(device, dtype = torch.float)
        targets = targets.to(device, dtype = torch.float)
        
        
        optimizer.zero_grad()
        
        outputs = model(inputs)
        
        loss = nn.BCEWithLogitsLoss()(outputs, targets)
        
        loss_output = loss + loss_output
        
        loss.backward()
        
        optimizer.step()
        
        
    return loss_output
        
def evaluate(data_loader, model, device):
    
    
    model.eval()
    
    final_target = []
    final_output = []
    
    with torch.no_grad():
        
        for data in data_loader:
            
            inputs = data["image"]
            targets = data["target"]
            inputs = inputs.to(device, dtype = torch.float)
            targets = targets.to(device, dtype = torch.float)
            targets = targets.view(-1,1)
            output = model(inputs)
            
            targets = targets.detach().cpu().numpy().tolist()
            
            output = output.detach().cpu().numpy().tolist()
            
            
            final_target.extend(targets)
            final_output.extend(output)
            
    return final_output, final_target

In [None]:
pip install pretrainedmodels

In [None]:
import pretrainedmodels

def get_model(pretrained):
    if pretrained:
        model = pretrainedmodels.__dict__["resnet18"](
                pretrained="imagenet"
        )
        
    else:
        model = pretrainedmodels.__dict__["resnet18"](
                pretrined = None
        )
        
    model.last_linear = nn.Sequential(
                nn.BatchNorm1d(512),
            nn.Dropout(p=0.25),
            nn.Linear(in_features = 512, out_features = 512 ),
            nn.ReLU(),
                nn.BatchNorm1d(512),
            nn.Dropout(p=0.5),
            nn.Linear(in_features = 512, out_features = 5 ),
            )
    return model

In [None]:
device = "cuda"
model = get_model(pretrained=True)
model = model.to(device)
#model

In [None]:
epochs = 10
for fold in range(5):
    for epoch in enumerate(range(epochs)):
  

        train_df = train_csv[train_csv.kfold != fold].reset_index(drop=True)

        test_df = train_csv[train_csv.kfold == fold].reset_index(drop=True)

        images = train_df.image_id.values.tolist()
        data_path = "../input/cassava-leaf-disease-classification/train_images/"
        images = [
            os.path.join(data_path,i) for i in images
        ]

       # targets = train_df.label.values
        targets = pd.get_dummies(train_df.label)
        targets = targets.values
    

        train_dataset = ClassificationDataset(
                    image_path=images,
                    targets=targets,
                    #transforms=transform,
                    augmentations=aug,
        #             transforms=transform(),
            )


        train_loader = torch.utils.data.DataLoader(train_dataset,
                                              batch_size = 10,
                                                   pin_memory=False,
                                              shuffle = True,
                                              num_workers = 4)




        images = test_df.image_id.values.tolist()
        data_path = "../input/cassava-leaf-disease-classification/train_images/"
        images = [
            os.path.join(data_path,i) for i in images
        ]

        
        targets = pd.get_dummies(test_df.label)
        targets = targets.values


        test_dataset = ClassificationDataset(
                    image_path=images,
                    targets=targets,
                    #transforms=transform,
                    augmentations=aug,
        #             transforms=transform(),
            )


        test_loader = torch.utils.data.DataLoader(test_dataset,
                                              batch_size = 10,
                                              pin_memory=False,
                                              shuffle = True,
                                              num_workers = 4)

        optimizer = torch.optim.Adam(model.parameters(), lr= 5e-2)
    #     epochs = 10
    #     for epoch in range(epochs):
        loss = train(train_loader,model, optimizer, device = "cuda", loss_output=0)
        predictions, valid_targets = evaluate(test_loader, model, device="cuda")
        

       # roc_auc = roc_auc_score(valid_targets, predictions)
        print(
            f"epoch = {epoch}",
            f"folds = {fold}",
            f" loss = {loss/10}"
        )
        
        gc.collect()

In [None]:
torch.sav