In [None]:
#!pip install efficientnet-pytorch
!pip uninstall dataclasses -y

In [None]:
import pandas as pd 
import tqdm as tqdm 
from functools import partial 
import numpy as np 
import os 
import torch 
import torch.nn as nn 
import torch.optim as optim 
from torch.optim import lr_scheduler
import torch.nn.functional as F 
from torch.utils.data import Dataset, DataLoader 
from torch.utils.data import random_split 
import torchvision 
from torchvision import datasets, models, transforms 
import time 
import copy
import matplotlib.pyplot as plt 
from PIL import Image

from ray import tune 
from ray.tune import CLIReporter 
from ray.tune.schedulers import ASHAScheduler 
from sklearn.model_selection import train_test_split 

#from efficientnet_pytorch import EfficientNet


#from ipynb.fs.full.train_baseline import MyDataset  # to access functions and class from jupyter notebook
# import nbimporter # to import class and methods of other jupyter notebook 
# from train_baseline import MyDataset
# from train_baseline import set_transform_mode
# from train_baseline import train_model

from torch.optim import Adam, SGD
import albumentations as A

In [None]:
def set_transform_mode(mode=None): 
        if mode == "train":
            transform = transforms.Compose([
                        transforms.RandomResizedCrop(299),  # changed from 224 to 299 
                        transforms.RandomHorizontalFlip(),
                        transforms.ToTensor(),
                        transforms.Normalize((0.485,0.456,0.406), (0.229, 0.224,0.225))]) 
        elif mode == "val":
            transform = transforms.Compose([
                        transforms.Resize(299), # changed from 256 to 299
                        transforms.CenterCrop(254), # changed from 224 to 254
                        transforms.ToTensor(),
                        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))])
        elif mode == "test": 
            pass 

        return transform

In [None]:
# Create custom dataset with tensor for each image and label for each image
# source: https://stackoverflow.com/questions/61391919/loading-image-data-from-pandas-to-pytorch 

class MyDataset(Dataset): 
    def __init__(self, dataframe, transform = None): 
        self.dataframe = dataframe 
        self.transform = transform #self.transform_mode(transform, mode)
      
    def __len__(self): 
        return len(self.dataframe) 

    def __getitem__(self, index): 
        row = self.dataframe.iloc[index] 
        img = Image.open(row["path"])  
        #tensor = torchvision.transforms.functional.to_tensor(img)
        label = row["label"]
        
        if self.transform: 
            img = self.transform(img) 
        
        return (img, label)

In [None]:
# load and transform data, that we have 

def load_data(data=None, mode=None): 

    if mode=="train":
        train_transform = set_transform_mode(mode)
        dataset = MyDataset(data, train_transform) 
    elif mode=="val":
        val_transform = set_transform_mode(mode)
        dataset = MyDataset(data, val_transform) 
    elif mode=="test": 
        test_transform = set_transform_mode(mode)
        dataset = MyDataset(data, val_transform)       

    return dataset

In [None]:
def train_model(config, model, traindata, testdata, criterion, num_epochs=25, checkpoint_dir=None):
    since = time.time()
    
    # set optimizer and tune lr with help of ray tune
    #optimizer = optim.SGD(resnext.parameters(), lr=config["lr"], momentum=0.9)
    optimizer = Adam(resnext.parameters(), lr=config["lr"], weight_decay=config["weight_decay"], amsgrad=False)
 
    device = "cpu"
    if torch.cuda.is_available():
        device = "cuda:0"
        if torch.cuda.device_count() > 1:
            model = nn.DataParallel(model)
    model.to(device)
    #device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 

    
    # Create Dataloader for train and validation set
    trainloader = DataLoader(traindata, batch_size=int(config["batch_size"]), shuffle = True, num_workers=4) 
    valloader = DataLoader(testdata, batch_size=int(config["batch_size"]), shuffle=True, num_workers=4) 

    # Train and validation loader in a dict
    dataloaders = {"train": trainloader, "val": valloader}
    dataset_sizes = {j: len(dataloaders[j].dataset) for j in ["train", "val"]} 

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    # The checkpoints_dir" parameter gets passed by Ray Tune when a checkpoint should be restored 
    # Reference: https://docs.ray.io/en/master/tune/tutorials/tune-pytorch-cifar.html 
    if checkpoint_dir: 
        checkpoint = os.path.join(checkpoint_dir, "checkpoint") 
        model_state, optimizer_state = torch.load(checkpoint)#, map_location="cpu") 
        model.load_state_dict(optimizer_state)
        optimizer.load_state_dict(optimizer_state)
    
    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ["train", "val"]:      
            if phase == "train":
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            # Validation loss for ray tune, referenced from here: https://docs.ray.io/en/master/tune/tutorials/tune-pytorch-cifar.html
            if phase == "val": 
                val_loss = 0.0
                val_steps = 0
                total = 0
                correct = 0

            # Iterate over data.
            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)

                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                    # for ray tune, referenced from here: https://docs.ray.io/en/master/tune/tutorials/tune-pytorch-cifar.html
                    if phase == "val": 
                        total += labels.size(0) 
                        correct += (preds == labels).sum().item()
                        val_loss += loss.cpu().numpy() 
                        val_steps += 1


                # statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)
            
            # if phase == 'train':
            #     scheduler.step()

            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.double() / dataset_sizes[phase]

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(
                phase, epoch_loss, epoch_acc))

            # deep copy the model
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict()) 

            # Here we save a checkpoint. It is automatically registered with
            # Ray Tune and will potentially be passed as the `checkpoint_dir`
            # parameter in future iterations.
            if phase == "val": 
                with tune.checkpoint_dir(step=epoch) as checkpoint_dir:
                    path = os.path.join(checkpoint_dir, "checkpoint")
                    torch.save(
                        (model.state_dict(), optimizer.state_dict()), path)

                tune.report(loss=(val_loss / val_steps), accuracy=correct / total)

        print()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    # load best model weights
    model.load_state_dict(best_model_wts)
    
    print("Finished Training")
    return model


In [None]:
# Loading data 
# in Kaggle:
dataset_dir = "/kaggle/input/cassava-leaf-disease-classification/"
# Outside of Kaggle
#dataset_dir = "/home/data2/yan/cassava/"

data_df = pd.read_csv(dataset_dir + "train.csv")   

# Add to column Image_ID the image path in dataframe 
data_df["path"] = dataset_dir + "train_images/" + data_df["image_id"] 

# Rearrange column order
data_df = data_df[["image_id", "path", "label"]] 
# data_df = data_df[:500] # for test purpose 

In [None]:
# split data in tran and validation data 
train, val = train_test_split(data_df, test_size=0.15) 

# create dataset with tensors and their belonging labels for each image
train_data = load_data(data=train, mode="train")
val_data = load_data(data=val, mode="val") 

In [None]:
# load pretrained model and finetune covnet
# load first time online and save offline
resnext = models.resnext50_32x4d(pretrained="True")

num_feature = resnext.fc.in_features  # of the final layer

resnext.fc = nn.Linear(num_feature, 5) # change output to 5 classes, expl: https://discuss.pytorch.org/t/how-to-modify-the-final-fc-layer-based-on-the-torch-model/766

# from ray tune: https://docs.ray.io/en/master/tune/tutorials/tune-pytorch-cifar.html
# device = "cpu" 
# if torch.cuda.is_available(): 
#     device = "cuda:2" 
    # if torch.cuda.device_count() > 1:
    #     efficient_net = nn.DataParallel(efficient_net) 
#efficient_net

criterion = nn.CrossEntropyLoss() 

# Decay LR by a factor of 0.1 every 7 epochs
#exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)  # not with ray tune used


# load pretrained model and finetune covnet
# load first time online and save offline

#efficient_net = EfficientNet.from_pretrained("efficientnet-b3").to(device)

# Observe that all parameters are being optimized
optimizer_feature = optim.SGD(resnext.parameters(), lr=0.001, momentum=0.9) 

# Decay LR by a factor of 0.1 every 7 epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_feature, step_size=7, gamma=0.1)

In [None]:
# Check Fully Connected Layer of Efficient Net 
#efficient_net._fc

In [None]:
# Config ray tune parameters 
num_samples = 5 
num_epochs = 5
gpus_per_trial = 1
checkpoint_dir = "results"

config = {
    "lr": tune.loguniform(1e-4, 1e-1),
    "batch_size": tune.choice([4, 8, 16, 32]),
    "weight_decay": tune.loguniform(1e-8, 1e-3),
    }

scheduler = ASHAScheduler( 
    metric="loss",
    mode="min",
    max_t=num_epochs, 
    grace_period=1,
    reduction_factor=2
    )

reporter = CLIReporter( 
    # parameter_columns=["lr", "batch_size"] 
    metric_columns=["loss", "accuracy", "training_iteration"]
    )

result = tune.run( 
    partial(train_model, model=resnext, traindata=train_data, testdata=val_data, criterion=criterion, num_epochs=num_epochs),
    resources_per_trial={"gpu": gpus_per_trial},
    config=config, 
    num_samples=num_samples,
    scheduler=scheduler, 
    progress_reporter=reporter
    )

In [None]:
best_trial = result.get_best_trial("loss", "min", "last")
print("Best trial config: {}".format(best_trial.config))
print("Best trial final validation loss: {}".format(
    best_trial.last_result["loss"]))
print("Best trial final validation accuracy: {}".format(
    best_trial.last_result["accuracy"]))

In [None]:
# # train pre-trained model with my parameters 
# efficient_net = train_model(efficient_net, train_data, val_data, criterion, num_epochs=25)

In [None]:
# Save trained model in output/kaggle/working
path = "./resnext.pt"

# Save outside of kaggle 
#path = "./train/resnext.pt"

# save uni server
#path = "../../../data2/yan/cassava/save/efficient_net_b7_finetune.pt"

# Save model (parameters: weights and biases of each layer) 
torch.save(resnext.state_dict(), path) 