In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os

In [None]:
input_path='/kaggle/input/cassava-leaf-disease-classification/'
train=pd.read_csv(input_path+"train.csv")
train.head()

In [None]:
train["label"].value_counts()

In [None]:
from sklearn.model_selection import train_test_split
train, val, _, _ =train_test_split(train, train['label'],test_size=0.1,stratify=train['label'])

In [None]:
import torch
import torchvision
import cv2
import time
import copy
from PIL import Image
from tqdm import tqdm
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms as T
from torchvision.transforms import functional as TF

In [None]:
class CassavaDataset(Dataset):
    def __init__(self, root_dir, csv):
        self.root_dir=root_dir
        self.img_list=csv["image_id"].values
        self.labels=csv["label"].values
    def __len__(self):
        return len(self.labels)
    def __getitem__(self,idx):
        image=Image.open( self.root_dir+self.img_list[idx] )
        image=TF.to_tensor(image)
        label=self.labels[idx]
        return image, label

In [None]:
train_dataset = CassavaDataset(input_path+"train_images/", train)
val_dataset = CassavaDataset(input_path+"val_images/", val)

In [None]:
transformers=dict()
keys=['orig','orig2','hflip','vflip','hvflip']

transformers['orig']=T.Compose([T.Resize(512),
                        T.Normalize( [0.485,0.456,0.406],[0.229,0.224,0.225] ) ] )
transformers['orig2']=T.Compose([T.Resize(256),
                        T.CenterCrop(224),
                        T.Normalize( [0.485,0.456,0.406],[0.229,0.224,0.225] ) ] )
transformers['hflip']=T.Compose([
                            T.RandomHorizontalFlip(p=1.0),
                            T.Resize(256),
                            T.RandomCrop(224),
                            T.Normalize( [0.485,0.456,0.406],[0.229,0.224,0.225] )])
transformers['vflip']=T.Compose([
                            T.RandomVerticalFlip(p=1.0),
                            T.Resize(256),
                            T.RandomCrop(224),
                            T.Normalize( [0.485,0.456,0.406],[0.229,0.224,0.225] )])
transformers['hvflip']=T.Compose([
                            T.RandomHorizontalFlip(p=1.0),
                            T.RandomVerticalFlip(p=1.0),
                            T.Resize(256),
                            T.RandomCrop(224),
                            T.Normalize( [0.485,0.456,0.406],[0.229,0.224,0.225] )])

In [None]:
def train_epoch(model,criterion,optimizer,dataset,epoch):
    train_dataset=dataset
    data_loader=DataLoader(dataset,batch_size=4,shuffle=True,num_workers=4,pin_memory=True)
    dataset_size=len(dataset)
    print(f"Epoch#{epoch}. Train")
    
    model.train()
    
    running_loss=0.0   #накопление лосса
    running_corrects=0.0 #накопление для accuracy
    
    epoch_loss=0.0
    epoch_acc=0.0
    #keys2=[ keys[(epoch-1)%len(keys)] ]
    keys2 = ["orig2"]
    for inputs,labels in tqdm( data_loader):
        inputs=inputs.to('cuda').type(torch.float)
        labels=labels.to('cuda') #передаем батч на GPU(cuda)
        optimizer.zero_grad()
        
        out_list=[]
        for key in ['orig']:
            img=transformers[key](inputs)
            output=model(img)
            out_list.append(output)
        
        outputs=sum(out_list)/len(out_list)
        _,preds=torch.max(outputs,dim=1)
        loss=criterion(outputs,labels)
        loss.backward() # обратное распостранение градиента
        optimizer.step() # шаг оптимизатора
        running_loss+=loss.item()*inputs.size(0)
        running_corrects += torch.sum(preds == labels.data)
    
    epoch_loss = running_loss / dataset_size
    epoch_acc = running_corrects / dataset_size
    
    print(f'Loss: { epoch_loss } Acc: { epoch_acc }')
    print(f"Epoch#{epoch} (Train) completed. ")
    return model, epoch_loss, epoch_acc

In [None]:
def valid_epoch(model,criterion,optimizer,dataset,epoch):
    val_dataset=dataset
    data_loader=DataLoader(dataset,batch_size=4,shuffle=True,num_workers=4,pin_memory=True)
    dataset_size=len(val_dataset)
    print(f"Epoch#{epoch}. Validation")
    model.eval()
    running_loss=0.0 # накопление лосса
    running_corrects=0
    
    epoch_loss=0.0
    epoch_acc=0.0
    with torch.no_grad():
        for inputs,labels in tqdm( data_loader):
            inputs=inputs.to('cuda').type(torch.float)
            labels=labels.to('cuda') #передаем батч на GPU(cuda)
        
            out_list=[]
            for key in ['orig']:
                img=transformers[key](inputs)
                output=model(img)
                out_list.append(output)
        
            outputs=sum(out_list)/len(out_list)
            _,preds=torch.max(outputs,dim=1)
            loss=criterion(outputs,labels)
        
            running_loss+=loss.item()*inputs.size(0)
            running_corrects += torch.sum(preds == labels.data)
    
    epoch_loss = running_loss / dataset_size
    epoch_acc = running_corrects / dataset_size
    
    print(f'Loss: { epoch_loss } Acc: { epoch_acc }')
    print(f"Epoch#{epoch} (Validation) completed.  ")
    return model, epoch_loss, epoch_acc

In [None]:
model_ft = torchvision.models.resnext50_32x4d(pretrained=True)
model_ft.fc=torch.nn.Linear(model_ft.fc.in_features,5)
model_ft = model_ft.to('cuda') #передали сетку на cuda

In [None]:
criterion = torch.nn.CrossEntropyLoss()
optimizer=torch.optim.Adam(params=model_ft.parameters(),lr=3e-5)

In [None]:
best_model=model_ft
best_acc=0.0
best_epoch=1

num_epochs=5

train_loss_history=[]
train_acc_history=[]

val_loss_history=[]
val_acc_history=[]

for epoch in range(1,num_epochs+1):
    #тренировка
    root_dir='/kaggle/input/cassava-leaf-disease-classification/train_images/'
    train_dataset=CassavaDataset(root_dir=root_dir,csv=train)
    model_ft, train_loss, train_acc=train_epoch(model_ft,criterion,optimizer,train_dataset,epoch)
    train_loss_history.append(train_loss)
    train_acc_history.append(train_acc)
    #валидация
    val_dataset = CassavaDataset(root_dir=root_dir, csv = val)
    model_ft, val_loss, val_acc=valid_epoch(model_ft,criterion,optimizer,val_dataset,epoch)
    val_loss_history.append(val_loss)
    val_acc_history.append(val_acc)
    
    if(val_acc>best_acc):
        best_acc=val_acc
        best_model=model_ft
        best_epoch=epoch

In [None]:
#saving
output_model_file = 'best_model.bin'
torch.save(best_model, output_model_file)

In [None]:
test = pd.read_csv(input_path+"sample_submission.csv")
test

In [None]:
test['label'] = 0

In [None]:
test_dataset=CassavaDataset(root_dir=input_path+'test_images/',csv = test)
data_loader=DataLoader(test_dataset,batch_size=32,shuffle=False,num_workers=4,pin_memory=True)
dataset_size=len(test_dataset)

pred_list=[]
best_model.eval()
with torch.no_grad():
    for inputs,labels in tqdm( data_loader):
        inputs=inputs.to('cuda').type(torch.float)
        labels=labels.to('cuda') #передаем батч на GPU(cuda)
        
        out_list=[]
        for key in ['orig']:
            img=transformers[key](inputs)
            out_list.append(best_model(img))
        outputs=sum(out_list)/len(out_list)
        _,preds=torch.max(outputs,dim=1)
        pred_list.append(preds)

torch.cat(pred_list)

In [None]:
y = torch.cat(pred_list).cpu().detach().numpy()

In [None]:
test['label']=y
test.to_csv("submission_resnext.csv",index=False)
test