This is my first ever Image competition and I wanted to try my hands on ResNet50. This is a very basic model and I'm still trying to learn and improve on image based data. I felt I should share this with the kaggle crowd. Any comments and suggestions are most welcomed. Thanks

To get started with I have built the model validating it on a holdout dataset for max 10 epochs. I always have a nack of calling the entire train and test dataset as train_prod, test_prod and output of train_test_split() as train_local and test_local just incase if you guys are wondering.

In [None]:
# # This Python 3 environment comes with many helpful analytics libraries installed
# # It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# # For example, here's several helpful packages to load

# import numpy as np # linear algebra
# import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# # Input data files are available in the read-only "../input/" directory
# # For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

# import os
# for dirname, _, filenames in os.walk('/kaggle/input'):
#     for filename in filenames:
#         print(os.path.join(dirname, filename))

# # You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# # You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import os
import glob

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
import torch

from torchvision.models import resnet18, resnet50

import random
from tqdm import tqdm

from PIL import Image

from sklearn.metrics import confusion_matrix

# from efficientnet_pytorch import EfficientNet

import seaborn as sns
import warnings
warnings.filterwarnings(action='ignore')

In [None]:
kaggle = True

if kaggle:
    data_label_path = "/kaggle/input/cassava-leaf-disease-classification/train.csv"
    train_images_path =  "/kaggle/input/cassava-leaf-disease-classification/train_images/"
    test_images_path =  "/kaggle/input/cassava-leaf-disease-classification/test_images/"
    
else:
    data_label_path = "../data/train.csv"
    train_images_path = "../data/train_images"
    test_images_path = "../data/test_images"
          
        
config = {'data_label_path': data_label_path,
          'train_images_path': train_images_path,
          'test_images_path': test_images_path,
          'resize_length': 226,
          'resize_breath': 226,
          'train_batch_size': 8, 
          'test_batch_size': 16,
          'num_workers':8, 
          'device':'cuda' if torch.cuda.is_available() else 'cpu', 
          'model_name':'ResNet50'}

In [None]:
# Reading the training dataset
train_df = pd.read_csv(config['data_label_path'])
num_targets = len(train_df.label.unique())

if ~kaggle:
    train_df = train_df.sample(len(train_df))
    
# Forming the test dataset
test_df = pd.DataFrame()
test_df['image_id'] = list(os.listdir(config['test_images_path']))

print(train_df.shape, test_df.shape)

In [None]:
test_df

In [None]:
train_local, test_local = train_test_split(train_df, test_size=0.2)
train_local, test_local = train_local.reset_index(drop=True), test_local.reset_index(drop=True)

train_local.shape, test_local.shape

In [None]:
class Dataset():
    def __init__(self, dataframe, img_path, resize_length, resize_breath, data_type, do_resize, transpose):
        self.dataframe = dataframe
        self.img_path = img_path
        self.length = resize_length
        self.breath = resize_breath
        self.do_resize = do_resize
        self.data_type = data_type
        self.transpose = transpose
        
    def __len__(self):
        return len(self.dataframe)
    
    def __getitem__(self, idx):
        image_path = os.path.join(self.img_path, self.dataframe['image_id'][idx])
        image = Image.open(image_path)
        
        if self.do_resize:
            image = image.resize((self.length, self.breath))
            
        image = np.array(image)
        
        if self.transpose:
            image = image.transpose(2,0,1)
        
        if self.data_type  == 'test':
            label = 1
        else:          
            label = self.dataframe['label'][idx]

        
        return {#'image': torch.tensor(image, dtype=torch.float).view(3, 128, -1),
                'image': torch.tensor(image, dtype=torch.float),
                'label': torch.tensor(label, dtype=torch.long)}

In [None]:
train_local_images = Dataset(dataframe=train_local, 
                             img_path= config['train_images_path'],
                             resize_length=config['resize_length'],
                             resize_breath=config['resize_breath'], 
                             data_type='train',
                             do_resize=True, 
                             transpose=True)

test_local_images = Dataset(dataframe=test_local,
                            img_path= config['train_images_path'],
                            resize_length=config['resize_length'],
                            resize_breath=config['resize_breath'], 
                            data_type='train',
                            do_resize=True, 
                            transpose=True)

train_prod_images = Dataset(dataframe=train_df,
                            img_path= config['train_images_path'],
                            resize_length=config['resize_length'],
                            resize_breath=config['resize_breath'], 
                            data_type='train',
                            do_resize=True, 
                            transpose=True)

test_prod_images = Dataset(dataframe=test_df,
                           img_path= config['test_images_path'],
                           resize_length=config['resize_length'],
                           resize_breath=config['resize_breath'], 
                           data_type='test',
                           do_resize=True, 
                           transpose=True)

# plt.imshow(train_local_images[10]['image'])

In [None]:
train_local_images_dataloader = DataLoader(train_local_images, 
                                           batch_size=config['train_batch_size'], 
                                           num_workers=config['num_workers'])
test_local_images_dataloader = DataLoader(test_local_images, 
                                           batch_size=config['test_batch_size'], 
                                           num_workers=config['num_workers'])
train_prod_images_dataloader = DataLoader(train_prod_images, 
                                           batch_size=config['train_batch_size'], 
                                           num_workers=config['num_workers'])
test_prod_images_dataloader = DataLoader(test_prod_images, 
                                           batch_size=config['test_batch_size'], 
                                           num_workers=config['num_workers'])

In [None]:
class ResNet50_classifier_for_cassava(nn.Module):
    def __init__(self, num_labels):
        super(ResNet50_classifier_for_cassava, self).__init__()
        self.num_labels = num_labels
        
        ####################
        # ResNet 18 model

        #self.model = resnet50(pretrained=True)
        
        self.model = resnet50()
        model_weights = torch.load("../input/resetnet50/resnet50-19c8e357.pth")
        self.model.load_state_dict(model_weights)

        for param in self.model.parameters():
            param.requires_grad = True
        self.last_layer_size = self.model.fc.in_features #get the in feature of the last layer
        self.model.fc= nn.Linear(self.last_layer_size, self.num_labels)         
        
    def forward(self, x):
        
        output = self.model(x)
        
        return output
        

In [None]:
criterion = nn.CrossEntropyLoss()

def setting_seed(seed_no):
    random.seed(seed_no)
    np.random.seed(seed_no)
    torch.manual_seed(seed_no)
    torch.cuda.manual_seed_all(seed_no)    

    
def model_saving(model):
    
    if kaggle:
        model_name = './best_' + config['model_name'] + ".bin"
        torch.save(model, model_name)
    else:
        model_name = '../saved_models/best_' + config['model_name'] + ".bin"
        torch.save(model, model_name)

In [None]:
def train_fn(data_loader, model, optimizer, params):
    
    model.train()
    setting_seed(seed_no = seed)
    
    train_loss  = 0
    for index, dataset in tqdm(enumerate(data_loader), total = len(data_loader)):
        image = dataset['image'].to(config['device'], dtype = torch.float)
        target = dataset['label'].to(config['device'], dtype = torch.long)
    
        prediction = model(image)
        
        step_loss = criterion(prediction, target)
        
        step_loss.sum().backward()
        optimizer.step()
        #scheduler.step()
        optimizer.zero_grad()
        
        #torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
        
        train_loss += step_loss
    
    avg_train_loss = train_loss/len(data_loader)
    
    return avg_train_loss
    

In [None]:
def eval_fn(data_loader, model):
    
    model.eval()
    eval_loss = 0
    
    actual_output = torch.tensor([]).to(config['device'], dtype=torch.long)
    predicted_prob = torch.tensor([]).to(config['device'], dtype=torch.float)
    
    with torch.no_grad():
        for index, dataset in tqdm(enumerate(data_loader), total = len(data_loader)):
            image = dataset['image'].to(config['device'], dtype = torch.float)
            target = dataset['label'].to(config['device'], dtype = torch.long)
            
            prediction = model(image)
            
            step_loss = criterion(prediction, target)
            eval_loss += step_loss
            
            actual_output = torch.cat((actual_output, target))
            predicted_prob = torch.cat((predicted_prob, prediction))
                        
            #print("Prediction", prediction.shape, prediction)
            #print("predicted_output", predicted_output.shape, predicted_output)
            #print("###########################################")
            
    actual_class = np.array(actual_output.detach().cpu())
    predicted_class = np.argmax(np.array(predicted_prob.detach().cpu()), axis = 1)

    conf_mat = confusion_matrix(actual_class, predicted_class)
    print(conf_mat)
    
    avg_eval_loss = eval_loss/len(data_loader)        

    return avg_eval_loss, actual_class, predicted_prob,  predicted_class


In [None]:
def training_engine(EPOCHS, train_data, valid_data, patience):
       
    setting_seed(seed_no = seed)
    model = ResNet50_classifier_for_cassava(num_labels=num_targets)
    model = nn.DataParallel(model)
    model.to(config['device'])
    
    optimizer_grouped_parameters = model.parameters() #params_2_tune(model)
    optimizer = torch.optim.Adam(optimizer_grouped_parameters, lr = 0.0005)
    
    total_steps = len(train_data) * EPOCHS
    
    # Set up the learning rate scheduler
    # scheduler = transformers.get_linear_schedule_with_warmup(optimizer,
    #                                                         num_warmup_steps=0, # Default value
    #                                                         num_training_steps=total_steps)

    best_accuracy=0
    counter=0
    for epoch in range(EPOCHS):
        # Training
        avg_train_loss = train_fn(data_loader = train_data,
                                  model = model,
                                  optimizer = optimizer, 
                                  #scheduler = scheduler, 
                                  params = optimizer_grouped_parameters)

        # Evaluation
        avg_eval_loss, actual_class, predicted_prob, predicted_class = eval_fn(data_loader = valid_data,
                                                                         model = model)
        
        acc = accuracy_score(actual_class, predicted_class)

        print(f"Epoch {epoch}/{EPOCHS} train_loss: {avg_train_loss}, eval_loss {avg_eval_loss}, Accuracy: {acc}")

        if (acc>best_accuracy):
            best_accuracy = acc
            counter = 0
            
            print('Saving the model')
            model_saving(model)
        
        else:
            counter+=1
            print(f"Accuracy did not improve from the best {best_accuracy}, patience is {counter}/{patience}")
            
            if counter==patience:
                print(f"Maximum patience level {patience} reached so exiting the training")
                break            
        
    return model, actual_class, predicted_prob, predicted_class


In [None]:
def prediction_fn(data_loader, model):
    
    model.eval()
    
    predicted_output = torch.tensor([]).to(config['device'])
    
    with torch.no_grad():
        for index, dataset in tqdm(enumerate(data_loader), total = len(data_loader)):
            image = dataset['image'].to(config['device'], dtype = torch.float)
            
            prediction = model(image)
            
            predicted_output = torch.cat((predicted_output, prediction))
            predicted_class = np.argmax(np.array(predicted_output.detach().cpu()), axis = 1)
                    
        return predicted_class


In [None]:
# # Training on the Local dataset
# seed = 50
# model, actual, predicted_prob, predicted_class = training_engine(EPOCHS=50, 
#                                                                  train_data=train_local_images_dataloader, 
#                                                                  valid_data=test_local_images_dataloader, 
#                                                                  patience=5)

In [None]:
# Training on the entire dataset

seed = 50
model, actual, predicted_prob, predicted_class = training_engine(EPOCHS=10, 
                                                                 train_data=train_prod_images_dataloader, 
                                                                 valid_data=test_local_images_dataloader, 
                                                                 patience=5)

In [None]:
# Loading the best model

if kaggle:
    model_name = './best_' + config['model_name'] + ".bin"
    model = torch.load(model_name)
else:
    model_name = '../saved_models/best_' + config['model_name'] + ".bin"
    model = torch.load(model_name)

In [None]:
# Making the prediction on the test data and Creating the submission file

if kaggle:
    predicted_output = prediction_fn(data_loader=test_prod_images_dataloader, 
                                     model=model)
    
    submission = test_df.copy()
    submission['label'] = predicted_output
    submission.to_csv('./submission.csv', index=False)