# Herbarium 2021 - Half-Earth Challenge - FGVC8 - ResNet

**Author:** Ricardo B. Sousa (ORCID: [0000-0003-4537-5095](https://orcid.org/0000-0003-4537-5095))

**Affilitation:**: Faculty of Engineering of the University of Porto, INESC TEC - Instituto de Engenharia de Sistemas e Computadores, Tecnologia e Ciência

**Scope of this work:** Project of the curricular unit Computacional Vision of PDEEC@FEUP (Doctoral Program in Electrical and Computer Engineering)

## Setup

### Load libraries

In [None]:
# Math and Image
import numpy as np               # Math
import matplotlib.pyplot as plt  # Plot
import cv2                       # OpenCV
import PIL                       # Data Augmentation
import albumentations            # Data Augmentation
from albumentations.pytorch import ToTensorV2
# Operating System
import os
import time
import copy
# Utilities
import pandas as pd  # Handling CSV files
import random
import json
import tqdm          # Testing
import sklearn       # label encoding and metrics
from sklearn import preprocessing
# Torch
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
# TensorFlow
import tensorflow as tf

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter)

# You can write up to 20GB to the current directory (/kaggle/working/) that gets
# preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved
# outside of the current session will list all files under the input directory

print("PyTorch Version: ",torch.__version__)
print("Torchvision Version: ",torchvision.__version__)

### PC Configuration

In [None]:
# CPU Details - Number & Model
!lscpu | grep "CPU(s):"
!lscpu | grep Hz
# Total Memory
!cat /proc/meminfo | grep MemTotal
# CUDA Check
print("Is CUDA Available?",
      torch.cuda.is_available())
if torch.cuda.is_available() == True: 
    print("Current CUDA device:",
        torch.cuda.get_device_name(0))

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Device: ",DEVICE)

### Random Seed

**Note:** change the type of the next cell from Code to Markdown when using data augmentation (if you train 1 epoch per session with seed fixed, the same images would be augmented in all epochs; the purpose is augmenting online all the images if possible, and not only a subset and always the same subset).

In [None]:
def seed_fix(seed=42):
    random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True

seed_fix()
print("Seed fixed")

### Configuration

In [None]:
ROOT_DATA   = "/kaggle/input/herbarium-2021-fgvc8/"
DATA_TRAIN  = ROOT_DATA + "train/"
DATA_TEST   = ROOT_DATA + "test/"
ROOT_OUTPUT = "/kaggle/working/"
META        = "metadata.json"
BATCH_SIZE  = 128    # number of training examples utilized in one iteration
BATCH_EVAL  = 128
SHUFFLE     = True
EPOCHS      = 1
LEARN_RATE  = 4e-4
IMG_HEIGHT  = 224
IMG_WIDTH   = 224
NUM_CLASSES = None   # define below
NUM_WORKERS = 4
PRE_TRAINED = False

PATH_SAVE_MODEL = "/kaggle/working/ResNet50_da_run-09.pth"

ESTIMATED_MAX_TRAINING_TIME = 480 # hours (8h * 60 = 480min, leaving 1h to the test time)

print(ROOT_DATA)

## Create dataset

### Generic information

In [None]:
# Training dataset
with open(os.path.join(DATA_TRAIN,META),"r",encoding="ISO-8859-1") as file:
    meta_train = json.load(file)
    print("Number of images (training dataset): ",
          len(meta_train["images"]),)
    for i in list(meta_train.keys()):
        print("  - sample and number of elements in category {}: ".format(i),
              len(list(meta_train[i])),)
        print("\t[0] ",
              list(meta_train[i])[0], end="\n")

NUM_CLASSES   = len(meta_train["categories"])
NUM_IMG_TRAIN = len(meta_train["annotations"])

# Validation dataset
with open(os.path.join(DATA_TEST,META),"r",encoding="ISO-8859-1") as file:
    meta_test = json.load(file)
    print("\nNumber of images (training dataset): ",
          len(meta_test["images"]),)
    for i in list(meta_test.keys()):
        print("  - sample and number of elements in category {}: ".format(i),
              len(list(meta_test[i])),)
        print("\t[0] ",
              list(meta_test[i])[0], end="\n")

NUM_IMG_TEST  = len(meta_test["images"]) 

# Print variables
print("\n\n"
      "Number of images for training: ",NUM_IMG_TRAIN)
print("Number of images for testing : ",NUM_IMG_TEST)
print("Number of classes            : ",NUM_CLASSES)

### Process training and evaluation metadata

Merge training images and annotations as a dataframe. The database-based joint operation merge is performed on the ids "image_id" (from images dataframe) and "id" (from annotations dataframe).

The problem with the validation data is that the `.json` does not provide the labels to check if the model is classifying correctly or not the validation images.

In [None]:
# Process metadata json for training images into a DataFrame
train_img = pd.DataFrame(meta_train["images"])
train_ann = pd.DataFrame(meta_train["annotations"]).drop(columns="image_id")
train_df  = train_img.merge(train_ann,on="id") # Performs a database-style joint

# Check number of classes
print("Number of classes (expected): ",NUM_CLASSES)
print("Number of classes (computed): ",
      train_df["category_id"].max() - train_df["category_id"].min()+1)
print("\nShape training dataframe    :",train_df.shape)

# Process metadata json for test images into a DataFrame
test_df = pd.DataFrame(meta_test["images"])

## Prepare Submission

In [None]:
sample_submission = pd.read_csv(ROOT_DATA + "sample_submission.csv")

## Label Encoder

In [None]:
# Fit the label encoder instance
label_encoder = preprocessing.LabelEncoder()
label_encoder.fit(train_df["category_id"])

# Transform labels to normalized encoding
train_df["category_id_le"] = label_encoder.transform(train_df["category_id"])
class_map = dict(sorted(train_df[["category_id_le","category_id"]].values.tolist()))

print("Labels converted to normalized encoding")

### Dataloaders

In [None]:
class TrainDataset(torch.utils.data.Dataset):
    def __init__(self, df, labels, transform=None):
        self.df = df
        self.labels = labels
        self.transform = transform
        
    def __len__(self) -> int:
        return len(self.df)

    def __getitem__(self, idx):
        file_name = self.df["file_name"].values[idx]
        file_path = DATA_TRAIN + file_name
        img = cv2.imread(file_path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        label = self.labels.values[idx]
        
        if self.transform:
            img = self.transform(image=img)["image"]
        
        return img, label

In [None]:
class TestDataset(torch.utils.data.Dataset):
    def __init__(self, df, transform=None):
        self.df = df
        self.transform = transform
        
    def __len__(self) -> int:
        return len(self.df)

    def __getitem__(self, idx):
        file_name = self.df["file_name"].values[idx]
        file_path = DATA_TEST + file_name
        img = cv2.imread(file_path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        
        if self.transform:
            img = self.transform(image=img)["image"]
        
        return img

### Image Augmentation

**Note:** uncomment the `albumentations.HorizontalFlip` + `albumentations.VerticalFlip` + `albumentations.Rotate` to have data augmentation with geometric transformations.

In [None]:
def get_transforms(*, data: str):
    assert data in ("train","test")
    
    if data == "train":
        return albumentations.Compose([
            albumentations.Resize(IMG_HEIGHT,IMG_WIDTH),
            albumentations.Normalize(
                mean=[0.485, 0.456, 0.406],
                std=[0.229, 0.224, 0.225],
            ),
            #albumentations.HorizontalFlip(p=0.25),
            #albumentations.VerticalFlip(p=0.25),
            #albumentations.Rotate(limit=10,p=0.05),
            ToTensorV2(),
        ])

    elif data == "test":
        return albumentations.Compose([
            albumentations.Resize(IMG_HEIGHT,IMG_WIDTH),
            albumentations.Normalize(
                mean=[0.485, 0.456, 0.406],
                std=[0.229, 0.224, 0.225],
            ),
            ToTensorV2(),
        ])

# Check One Sample

**Note:** change the type of the next cell from Markdown to Code to have an example of the data augmentation techniquies used in this work.

im = PIL.Image.open(DATA_TRAIN + train_df.file_name[0])
newsize=(IMG_HEIGHT,IMG_WIDTH)
im=im.resize(newsize)
im=im.transpose(PIL.Image.FLIP_LEFT_RIGHT)
im=im.transpose(PIL.Image.FLIP_TOP_BOTTOM)
im=im.rotate(10)
im.save("original-image_left_2right_top2bottom_rotate-10.png")
im

## Datasets

### Training Dataset

In [None]:
train_dataset = TrainDataset(
    train_df,train_df["category_id_le"],
    transform=get_transforms(data="train"))
train_loader = torch.utils.data.DataLoader(
    train_dataset,
    batch_size=BATCH_SIZE,shuffle=SHUFFLE,num_workers=NUM_WORKERS)
print("Train data loader created")

### Test Dataset

In [None]:
test_df.head(n=5)

In [None]:
test_dataset = TestDataset(
    test_df,
    transform=get_transforms(data="test"))
test_loader = torch.utils.data.DataLoader(
    test_dataset,
    batch_size=BATCH_EVAL,shuffle=False,num_workers=NUM_WORKERS)
print("Test data loader created")

## Model

In [None]:
def train_model(model,dataloader,criterion,optimizer,num_epochs=1):
    since = time.time()
    
    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)
        
        model.train()
        
        running_loss = 0.0
        running_corrects = 0
        
        #for inputs, labels in tqdm.tqdm(enumerate(dataloader)):
        i = 0
        len_dataset = len(dataloader.dataset)
        for inputs, labels in dataloader:
            since_tmp = time.time()
            
            inputs = inputs.to(DEVICE)
            labels = labels.to(DEVICE)
            
            optimizer.zero_grad()
            
            with torch.set_grad_enabled(True):
                outputs = model(inputs)
                loss = criterion(outputs,labels)
                
                _,preds = torch.max(outputs,1)
                
                loss.backward()
                optimizer.step()
              
            loss_tmp = loss.item()
            pred_tmp = torch.sum(preds == labels.data)
            
            running_loss += loss_tmp * inputs.size(0)
            running_corrects += pred_tmp
            
            time_elapsed_tmp = time.time() - since_tmp
            
            print('  loss: {:.04f} corr: {:d} ({:d}/{:d}) completed in {:.0f}m {:.03f}s'.format(
                loss_tmp, pred_tmp,(i+1)*BATCH_SIZE,len_dataset,
                time_elapsed_tmp // 60, time_elapsed_tmp % 60))
            
            if ((time.time() - since) // 60 > ESTIMATED_MAX_TRAINING_TIME):
                break;
            
            i += 1
            
        epoch_loss = running_loss / len_dataset
        epoch_acc = running_corrects.double() / len_dataset
        
        torch.save(model.state_dict(),PATH_SAVE_MODEL)
        print(PATH_SAVE_MODEL)
        print('Loss: {:.04f} Acc: {:.04f}'.format(epoch_loss, epoch_acc))
        
        print()
            
        if ((time.time() - since) // 60 > ESTIMATED_MAX_TRAINING_TIME):
            break;
        
    time_elapsed = time.time() - since
    print('Loss: {:.04f} Acc: {:.04f}'.format(epoch_loss, epoch_acc))
    print('Training complete in {:.0f}m {:.03f}s'.format(time_elapsed // 60, time_elapsed % 60))
    
    #model.load_state_dict(best_model_wts)
    return model

### Construct model

In [None]:
model = torchvision.models.resnet50(pretrained=PRE_TRAINED)
model.avgpool = torch.nn.AdaptiveAvgPool2d(1)
model.fc = torch.nn.Linear(model.fc.in_features,NUM_CLASSES)

### Use existent trained model

In [None]:
model.load_state_dict(torch.load("../input/herbarium-2021-rbs/ResNet50_da_run-08.pth"))

### Train

In [None]:
model.to(DEVICE)

optimizer = torch.optim.Adam(model.parameters(), lr=LEARN_RATE, amsgrad=False)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
    optimizer, 'min', factor=0.75, patience=5, verbose=True, eps=1e-6)

criterion = torch.nn.CrossEntropyLoss()

In [None]:
model = train_model(model,train_loader,criterion,optimizer,EPOCHS)

## Inference

In [None]:
model.eval()
model.to(DEVICE)

predictions = np.zeros((len(test_dataset)))

for i,images in tqdm.tqdm(enumerate(test_loader)):
    images = images.to(DEVICE)
    with torch.no_grad():
        y_predictions = model(images)

    predictions[i*BATCH_EVAL:(i+1)*BATCH_EVAL] = y_predictions.to(DEVICE).cpu().numpy().argmax(1)

## Submit

In [None]:
test_df["preds"] = predictions.astype(int)
submission = sample_submission.merge(
    test_df.rename(columns = {"id":"Id"})[["Id","preds"]],on="Id"
).drop(columns="Predicted")
submission["Predicted"] = submission["preds"].map(class_map)
submission = submission.drop(columns="preds")
submission.to_csv("submission.csv",index=False)
submission.head()