In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
# unzip images
# !unzip /content/drive/MyDrive/data/ClassifyLeaves/classify-leaves.zip -d /content/drive/MyDrive/data/ClassifyLeaves

In [2]:
import os
path = '/content/drive/MyDrive'
os.chdir(path)

!source venv_d2l/bin/activate

In [None]:
!pip install git+https://github.com/d2l-ai/d2l-en.git

In [4]:
# two additional libraries that you will need to install for this notebook if you run it on kaggle or colab
!pip install timm
!pip install madgrad

Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch->timm)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch->timm)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch->timm)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch->timm)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch->timm)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch->timm)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-curand-cu12==10.3.5.147 (from torch->tim

In [3]:
path = '/content/drive/MyDrive/d2l-zh'
os.chdir(path)

In [4]:
# import torch
# import torch.nn as nn
# import pandas as pd
# import numpy as np
# from torch.utils.data import Dataset, DataLoader
# from torchvision import transforms
# from PIL import Image
# import os
# import matplotlib.pyplot as plt
# import torchvision.models as models
# # This is for the progress bar.
# from tqdm import tqdm
# import seaborn as sns

# ====================================================
# Library
# ====================================================

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
from torch.optim import Adam, SGD
from torchvision import datasets, models, transforms
from torch.optim.lr_scheduler import CosineAnnealingWarmRestarts, CosineAnnealingLR, ReduceLROnPlateau
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import cv2
import timm

import time
import os
import copy
from tqdm import tqdm
import random
from madgrad import MADGRAD
from sklearn.model_selection import StratifiedKFold, GroupKFold, KFold

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [5]:
# ====================================================
# Data Loading
# ====================================================
train = pd.read_csv('/content/drive/MyDrive/data/ClassifyLeaves/train.csv')

def get_image_file_path(image_path):
    INPUT_DIR = '/content/drive/MyDrive/data/ClassifyLeaves/images'
    return INPUT_DIR+image_path

# ====================================================
# Leave labels mapping
# ====================================================
species_name_list = sorted(set(train['label']))
species_to_num = dict(zip(species_name_list, range(len(species_name_list))))
num_to_species = {value : key for key, value in species_to_num.items()}
num_class = len(species_name_list)

In [8]:
# ====================================================
# CFG
# ====================================================
class CFG:
    debug=False
    print_freq=100
    num_workers=4
    model_name='inception_resnet_v2'
    size=299
    epochs=50 # not to exceed 9h
    batch_size=32
    learning_rate=1e-4
    weight_decay=1e-9
    scheduler='ReduceLROnPlateau' # ['ReduceLROnPlateau', 'CosineAnnealingLR', 'CosineAnnealingWarmRestarts']
    factor=0.2 # ReduceLROnPlateau
    patience=4 # ReduceLROnPlateau
    eps=1e-6 # ReduceLROnPlateau
    T_max=20 # CosineAnnealingLR
    #T_0=4 # CosineAnnealingWarmRestarts
    min_lr=1e-6 # ['CosineAnnealingLR', 'CosineAnnealingWarmRestarts']
    seed=42 #[42,2021]
    n_fold=5
    train=True

In [9]:
# ====================================================
# Utils
# ====================================================
def init_logger(log_file='./train.log'):
    from logging import getLogger, INFO, FileHandler,  Formatter,  StreamHandler
    logger = getLogger(__name__)
    logger.setLevel(INFO)
    handler1 = StreamHandler()
    handler1.setFormatter(Formatter("%(message)s"))
    handler2 = FileHandler(filename=log_file)
    handler2.setFormatter(Formatter("%(message)s"))
    logger.addHandler(handler1)
    logger.addHandler(handler2)
    return logger

LOGGER = init_logger()

def seed_torch(seed=42):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True

seed_torch(seed=CFG.seed)

In [10]:
# ====================================================
# CV split
# ====================================================
folds = train.copy()
Fold = StratifiedKFold(n_splits=CFG.n_fold, shuffle=True, random_state=CFG.seed)
for n, (train_index, val_index) in enumerate(Fold.split(folds, folds['label'])):
    folds.loc[val_index, 'fold'] = int(n)
folds['fold'] = folds['fold'].astype(int)
print(folds.groupby(['fold']).size())

fold
0    3671
1    3671
2    3671
3    3670
4    3670
dtype: int64


In [11]:
# ====================================================
# Dataset
# ====================================================
# note: here I made a small mistake. I did not notice the cv2.imread generate BGR image until the last week when I went through other's codes, so most of my models were trained using BGR image
# I think using RGB image may make the training converge faster when we start from imagenet pre-trained model
class TrainDataset(Dataset):
    def __init__(self, df, species_to_num, transform=None):
        super().__init__()
        self.df = df
        self.species_to_num = species_to_num
        self.file_paths = get_image_file_path(df['image'].values)
        self.labels = df['label'].values
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        file_path = self.file_paths[idx]
        image = cv2.imread(file_path)
#         image = Image.open(file_path)
        if self.transform:
            image = self.transform(image)
        label = self.labels[idx]
        label = self.species_to_num[label]
        return image, label


class TestDataset(Dataset):
    def __init__(self, df, transform=None):
        super().__init__()
        self.df = df
        self.file_paths = get_image_file_path(df['image'].values)
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        file_path = self.file_paths[idx]
        image = cv2.imread(file_path)
#         image = Image.open(file_path)
        if self.transform:
            image = self.transform(image)
        return image

In [12]:
# ====================================================
# Data transforms
# ====================================================
# I applied slightly different transforms for different groups of models
# resnet50d, efficientnet_b3: flip only
# resnext50_32x4d, resnest50d tf_efficientnet_b4_ns, resnest200e, mixnet_s: add ColorJitter
# inception_resnet_v2, vit_base_patch16_224, tf_efficientnet_b3_ns: use RandomResizedCrop instead of Resize, and use [0.5, 0.5, 0.5] as mean and std
data_transforms = {
    'train': transforms.Compose([
        transforms.ToTensor(),
        transforms.RandomHorizontalFlip(p=0.5),
        transforms.RandomVerticalFlip(p=0.5),
        transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0),
        transforms.RandomResizedCrop([CFG.size, CFG.size]),
#         transforms.Resize([CFG.size, CFG.size]),
        transforms.Normalize(
            mean=[0.5, 0.5, 0.5],
            std=[0.5, 0.5, 0.5],
#             mean=[0.485, 0.456, 0.406],
#             std=[0.229, 0.224, 0.225],
        ),
        #transforms.RandomErasing(),
    ]),
    'valid': transforms.Compose([
        transforms.ToTensor(),
        transforms.Resize([CFG.size, CFG.size]),
        transforms.Normalize(
            mean=[0.5, 0.5, 0.5],
            std=[0.5, 0.5, 0.5],
#             mean=[0.485, 0.456, 0.406],
#             std=[0.229, 0.224, 0.225],
        ),
    ]),
}

In [13]:
# ====================================================
# scheduler
# ====================================================
def get_scheduler(optimizer):
    if CFG.scheduler=='ReduceLROnPlateau':
        scheduler = ReduceLROnPlateau(optimizer, mode='max', factor=CFG.factor, patience=CFG.patience, verbose=True, eps=CFG.eps)
    elif CFG.scheduler=='CosineAnnealingLR':
        scheduler = CosineAnnealingLR(optimizer, T_max=CFG.T_max, eta_min=CFG.min_lr, last_epoch=-1)
    elif CFG.scheduler=='CosineAnnealingWarmRestarts':
        scheduler = CosineAnnealingWarmRestarts(optimizer, T_0=CFG.T_0, T_mult=1, eta_min=CFG.min_lr, last_epoch=-1)
    return scheduler

In [14]:
# ====================================================
# training
# ====================================================
def train_model(fold):
    since = time.time()
    model_path = './models/' + CFG.model_name + '_fold' + str(fold) + '_best.pth'
    LOGGER.info(f"============================== fold: {fold} result ==============================")
    # ====================================================
    # Data Loader
    # ====================================================
    trn_idx = folds[folds['fold'] != fold].index
    val_idx = folds[folds['fold'] == fold].index
    train_folds = folds.loc[trn_idx].reset_index(drop=True)
    valid_folds = folds.loc[val_idx].reset_index(drop=True)
    valid_labels = valid_folds['label'].values
    train_dataset = TrainDataset(train_folds, species_to_num, transform=data_transforms['train'])
    valid_dataset = TrainDataset(valid_folds, species_to_num, transform=data_transforms['valid'])

    train_loader = DataLoader(train_dataset, batch_size=CFG.batch_size,
                              shuffle=True, num_workers=CFG.num_workers,
                              pin_memory=True, drop_last=True,)
    valid_loader = DataLoader(valid_dataset, batch_size=CFG.batch_size,
                              shuffle=False, num_workers=CFG.num_workers,
                              pin_memory=True, drop_last=False)

    # ====================================================
    # model, optimizer, scheduler & loss function
    # ====================================================
    model = timm.create_model(CFG.model_name, pretrained=True, num_classes=num_class)
    # use the following model function if you want to fine-tune the last layer only, which is not what I did here. But I explored it during the competition.
    #model = leave_classifier(CFG.model_name, num_class)
    model = model.to(device)

    #optimizer = Adam(model.parameters(), lr=CFG.learning_rate, weight_decay=CFG.weight_decay, amsgrad=False)
    optimizer = MADGRAD(model.parameters(), lr=CFG.learning_rate, weight_decay=CFG.weight_decay)
    scheduler = get_scheduler(optimizer)

    criterion = nn.CrossEntropyLoss()
    # ====================================================
    # loop
    # ====================================================

    best_acc = 0.95 # do not save the model if the acc is less than 0.95

    for epoch in range(CFG.epochs):

        # ---------- Training ----------
        # Make sure the model is in train mode before training.
        model.train()
        # These are used to record information in training.
        train_loss = []
        train_accs = []

        global_step = 0
        # Iterate the training set by batches.
        for step, (imgs, labels) in enumerate(train_loader):
            # A batch consists of image data and corresponding labels.
            imgs = imgs.to(device)
            labels = labels.to(device)
            # Forward the data. (Make sure data and model are on the same device.)
            logits = model(imgs)
            # Calculate the cross-entropy loss.
            # We don't need to apply softmax before computing cross-entropy as it is done automatically.
            loss = criterion(logits, labels)

            # Gradients stored in the parameters in the previous step should be cleared out first.
            optimizer.zero_grad()
            # Compute the gradients for parameters.
            loss.backward()
            # Update the parameters with computed gradients.
            optimizer.step()

            # Compute the accuracy for current batch.
            acc = (logits.argmax(dim=-1) == labels).float().mean()

            # Record the loss and accuracy.
            train_loss.append(loss.item())
            train_accs.append(acc)

        # The average loss and accuracy of the training set is the average of the recorded values.
        train_loss = sum(train_loss) / len(train_loss)
        train_acc = sum(train_accs) / len(train_accs)


        # ---------- Validation ----------
        # Make sure the model is in eval mode so that some modules like dropout are disabled and work normally.
        model.eval()
        # These are used to record information in validation.
        valid_loss = []
        valid_accs = []

        # Iterate the validation set by batches.
        for step, (imgs, labels) in enumerate(valid_loader):
            # We don't need gradient in validation.
            # Using torch.no_grad() accelerates the forward process.
            with torch.no_grad():
                logits = model(imgs.to(device))

            # We can still compute the loss (but not the gradient).
            loss = criterion(logits, labels.to(device))

            # Compute the accuracy for current batch.
            acc = (logits.argmax(dim=-1) == labels.to(device)).float().mean()

            # Record the loss and accuracy.
            valid_loss.append(loss.item())
            valid_accs.append(acc)

        # The average loss and accuracy for entire validation set is the average of the recorded values.
        valid_loss = sum(valid_loss) / len(valid_loss)
        valid_acc = sum(valid_accs) / len(valid_accs)


        # learning rate update
        if isinstance(scheduler, ReduceLROnPlateau):
            scheduler.step(valid_acc)
        else:
            scheduler.step()

        elapsed = time.time() - since

        # Print the information.
        LOGGER.info(f'Epoch {epoch+1}/{CFG.epochs}: train_loss = {train_loss:.4f}, valid_loss = {valid_loss:.4f}, train_acc = {train_acc:.4f}, valid_acc = {valid_acc:.4f}, time: {elapsed:.0f}s')
        #print(f'learning_rate = {scheduler.optimizer.param_groups[0]['lr']}')
        # if the model improves, save a checkpoint at this epoch

        if valid_acc > best_acc:
            best_acc = valid_acc
            torch.save(model.state_dict(), model_path)
            LOGGER.info(f'Save Best Score: {best_acc:.4f} Model to {model_path}')
            #print('saving model with acc {:.3f}'.format(best_acc))

In [15]:
# once you fix the random seeds, you can train different folds and different models on different machines, e.g., kaggle, colab, etc.
# set appropriate epochs and the number of folds to remote training.
# the models I used in this competition usually takes 1-4 min for each epoch on V100, and the time cost may double up on other GPUs like P100 and T4
# train_model(2)

In [16]:
# Model Selection
# Here I used out-of-fold predictions to develop ensemble models
def cv_prob(fold):
    test_dataset = TestDataset(folds[folds['fold']==fold], transform=data_transforms['valid'])
    test_loader = DataLoader(test_dataset, batch_size=CFG.batch_size, shuffle=False, num_workers=CFG.num_workers, pin_memory=True, drop_last=False)
    model_path = CFG.model_dir + CFG.model_name + '_fold' + str(fold) + '_best.pth'
    model = timm.create_model(CFG.model_name, pretrained=False, num_classes=num_class)
    model = model.to(device)
    model.load_state_dict(torch.load(model_path))
    model.eval()

    prob_list = []

    # Iterate the testing set by batches.
    for batch in tqdm(test_loader):
        imgs = batch
        with torch.no_grad():
            logits = model(imgs.to(device))
            prob_list.append(logits.softmax(1))
    probs_np = torch.cat(prob_list, axis=0).to('cpu').numpy()
    return probs_np

In [17]:
for fold in range(CFG.n_fold):
    probs_np = cv_prob(fold)
    folds.loc[(folds['fold']==fold),CFG.model_name] = np.argmax(probs_np,axis=1)
    if(fold==0):
        probs_np_copy = probs_np
    else:
        probs_np_copy = np.concatenate((probs_np_copy, probs_np), axis=0)
    print(probs_np_copy.shape)



AttributeError: type object 'CFG' has no attribute 'model_dir'

In [18]:
# after I collected all fine-tuned models, I created a 3D array probs_3D = np.zeros([train.shape[0],num_class,num_models]) to save all softmax outputs for all the models
# probs_3D = np.zeros([train.shape[0],num_class,num_models])
# probs_3D[:,:,2] = probs_np_copy

In [19]:
# convert the species name to the index for easier comparison
folds['label_num'] = folds['label'].map(species_to_num)

In [None]:
# Stacked mean combinations
from itertools import combinations, chain
combined = []
num_model = probs_3D.shape[2]
for i in range(num_model):
    combined.append(list(combinations(range(num_model), i+1)))
# sort the folds to match the index used in probs_3D, the softmax output
fold_sorted = folds.rename_axis('MyIdx').sort_values(by = ['fold', 'MyIdx'], ascending = [True, True])

comb_results = dict()
with tqdm(total=len(list(chain(*combined)))) as process_bar:
    for c in list(chain(*combined)):
        # the result indicates how many out-of-fold predictions are incorrect
        comb_results[c] = (fold_sorted['label_num']!=np.argmax(probs_3D[:,:,c].sum(2),axis=1)).sum()

In [None]:
{k: comb_results[k] for k in sorted(comb_results, key=comb_results.get, reverse=False)[0:20]}

In [None]:
# Weighted average
num_model = probs_3D.shape[2]
weights = np.array([1.0/n_models for _ in range(num_model)])
bounds = [(0.0, 1.0) for _ in range(num_model)]

In [None]:
def loss_func(w):
    # use 1 - accuracy as the loss function to find weights
    w= np.ceil(np.array(w)*20) # this operation is to lower the resolution of the weights
    return (fold_sorted['label_num']!=np.argmax(np.matmul(probs_3D,w).reshape(-1, num_class),axis=1)).sum()


In [None]:
from scipy.optimize import differential_evolution
sol = differential_evolution(loss_func, bounds, maxiter=20, tol=1e-4, disp=True)
# sol.x is the final weight vector
# In fact, I did not use weighted average for my submission. I used it to check the importance of each model and confirmed that stacked mean method is good enough

In [None]:
# Inference
test = pd.read_csv('../input/test.csv')

In [None]:
test_dataset = TestDataset(test, transform=data_transforms['valid'])
test_loader = DataLoader(test_dataset, batch_size=CFG.batch_size, shuffle=False,
                         num_workers=CFG.num_workers, pin_memory=True, drop_last=False)

In [None]:
def test_prob():
    model = timm.create_model(CFG.model_name, pretrained=False, num_classes=num_class)
    model = model.to(device)
    avg_preds = []
    for fold in range(CFG.n_fold):
        model_path = CFG.model_dir + CFG.model_name + '_fold' + str(fold) + '_best.pth'
        model.load_state_dict(torch.load(model_path))
        model.eval()
        prob_list = []
        for batch in tqdm(test_loader):
            imgs = batch
            with torch.no_grad():
                logits = model(imgs.to(device))
            prob_list.append(logits.softmax(1)) #(batch_size x num_class)
        probs_np = torch.cat(prob_list, axis=0).to('cpu').numpy()
        avg_preds.append(probs_np) #(test_size x num_class)
    probs = np.mean(avg_preds, axis=0) # average of 5-fold prediction
    return probs

In [None]:
# I implement the method with TTA, but I did not submit any predictions with TTA
# def test_prob_with_TTA():
#     model = timm.create_model(CFG.model_name, pretrained=False, num_classes=num_class)
#     model = model.to(device)
#     avg_preds = []
#     for fold in range(CFG.n_fold):
#         model_path = CFG.model_dir + CFG.model_name + '_fold' + str(fold) + '_best.pth'
#         model.load_state_dict(torch.load(model_path))
#         model.eval()
#         prob_list = []
#         for batch in tqdm(test_loader):
#             x = batch.to(device)
#             with torch.no_grad():
#                 x = torch.stack([x, x.flip(-1), x.flip(-2), x.flip(-1,-2),
#                                  x.transpose(-1,-2), x.transpose(-1,-2).flip(-1),
#                                  x.transpose(-1,-2).flip(-2), x.transpose(-1,-2).flip(-1,-2)],0)
#                 x = x.view(-1, 3, CFG.size, CFG.size)
#                 logits = model(x)
#                 logits = logits.view(8, CFG.batch_size, -1).mean(0)
#             prob_list.append(logits.softmax(1)) #(batch_size x num_class)
#         probs_np = torch.cat(prob_list, axis=0).to('cpu').numpy()
#         avg_preds.append(probs_np) #(test_size x num_class)
#     probs = np.mean(avg_preds, axis=0) # average of 5-fold prediction
#     return probs

In [None]:
# just like what I did in model selection section, I create a 3D array to save all softmax outputs
probs_3D_pred = np.zeros([8800,176,11])

In [None]:
probs_np_copy2 = test_prob()
probs_np_copy2.shape

In [None]:
# once the softmax outputs are saved, I do not have to run the prediction again for this model, although it only takes a few minites.
probs_3D_pred[:,:,0] = probs_np_copy2
# you only need to do ensemble after doing this for all 11 models
np.save('prediction_raw_data_11models.npy',probs_3D_pred)

In [None]:
# Uploading the models is too time-consuming. Instead, I uploaded the softmax output.

import numpy as np
import pandas as pd
probs_3D_ori_11 = np.load('../input/model-predictions/prediction_raw_data_11models.npy')
test = pd.read_csv('../input/classify-leaves/test.csv')
train = pd.read_csv('../input/classify-leaves/train.csv')

# ====================================================
# Leave labels mapping
# ====================================================
species_name_list = sorted(set(train['label']))
species_to_num = dict(zip(species_name_list, range(len(species_name_list))))
num_to_species = {value : key for key, value in species_to_num.items()}
num_class = len(species_name_list)

In [None]:
s6_pred = pd.Series(np.argmax(probs_3D_ori_11[:,:,(0, 1, 3, 4, 6, 7, 9)].sum(2),axis=1))

In [None]:
test['label'] = s6_pred.map(num_to_species)
submission = pd.concat([test['image'], test['label']], axis=1)
submission.head()

In [None]:
submission.to_csv('./submission_6.csv', index=False)
# !kaggle competitions submit -c classify-leaves -f submission_6.csv -m "0, 1, 3, 4, 6, 7, 9 CV98.33"