## Sample solution part of [iWildCam 2021 - Starter Notebook](https://www.kaggle.com/nayuts/iwildcam-2021-starter-notebook).

I haven't beaten kaggle_sample_all_zero_iwildcam_2021.csv yet, but I will publish the idea.

1. First we crop the image based on the bbox detected by MegaDetector.
2. In the training data, the correct answer labels are given as annotations, so we can use them to train the model.
3. Classify the cropped images of the test data with the trained model.
4. We choose the animal species and their counts of the image with the highest count among the images in the same image burst.

Cropping is time consuming, so I did it on [a different notebook](https://www.kaggle.com/nayuts/256-x-256-cropped-images). This notebook is also available to the public.

<img src="https://raw.githubusercontent.com/tasotasoso/kaggle_media/main/iwildcam2021/model_image.png" width="***300***">

In [None]:
import sys
sys.path.append('../input/pytorch-image-models/pytorch-image-models-master')

import collections
import gc
import json
import os
import random
import time
import warnings
warnings.simplefilter("ignore")

from albumentations import *
from albumentations.pytorch import ToTensor
import cv2
from imblearn.under_sampling import RandomUnderSampler
from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
from PIL import Image, ImageFilter
from sklearn.model_selection import KFold
from sklearn.model_selection import train_test_split
import tifffile as tiff
import timm
import torch
import torch.backends.cudnn as cudnn
import torch.nn as nn
from torch.nn import functional as F
import torch.optim as optim
from torch.optim.lr_scheduler import ReduceLROnPlateau
from torch.utils.data import DataLoader, Dataset, sampler
from tqdm import tqdm_notebook as tqdm

%matplotlib inline

### setting

In [None]:
!ls ../input/256-x-256-cropped-images

In [None]:
DATASET = "../input/iwildcam2021-fgvc8"
CROPED_DATA = "../input/256-x-256-cropped-images/"

TRAIN_CROPED_DATA = "croped_images_train/"
TEST_CROPED_DATA = "croped_images_test/"

In [None]:
BATCH_SIZE = 32
DEVICE = ('cuda' if torch.cuda.is_available() else 'cpu')
EPOCHS = 300
NUM_WORKERS = 4
SEED = 2021

In [None]:
def set_seed(seed=2**3):
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    np.random.seed(seed)
    random.seed(seed)
    torch.backends.cudnn.deterministic = True
set_seed(SEED)

In [None]:
df_croped_img_ids_train = pd.read_csv(CROPED_DATA + "croped_train.csv")
df_croped_img_ids_test = pd.read_csv(CROPED_DATA + "croped_test.csv")

In [None]:
df_croped_img_ids_train.head()

In [None]:
df_croped_img_ids_test.head()

### create train dataframe

In [None]:
with open('../input/iwildcam2021-fgvc8/metadata/iwildcam2021_train_annotations.json', encoding='utf-8') as json_file:
    train_annotations =json.load(json_file)
df_train_annotation = pd.DataFrame(train_annotations["annotations"])

In [None]:
train = df_croped_img_ids_train[["id", "idx"]].merge(df_train_annotation[["image_id", "category_id"]], 
                                      left_on='id', right_on='image_id')[["id", "idx", "category_id"]]

In [None]:
df_categories = pd.DataFrame(train_annotations["categories"])

In [None]:
cat_idxs = df_categories["id"]

def convert_cat_to_index(x):
    return np.where(cat_idxs==x)[0][0]

In [None]:
train["category_id"] = train["category_id"].map(lambda x: convert_cat_to_index(x))

In [None]:
train.head()

### unzip croped data

In [None]:
! unzip ../input/256-x-256-cropped-images/croped_images_train.zip 

In [None]:
! unzip ../input/256-x-256-cropped-images/croped_images_test.zip

# Train

## Create dataset for training

In [None]:
# ====================================================
# Dataset for train
# ====================================================

mean = np.array([0.37087523, 0.370876, 0.3708759] )
std = np.array([0.21022698, 0.21022713, 0.21022706])

def img2tensor(img,dtype:np.dtype=np.float32):
    if img.ndim==2 : img = np.expand_dims(img,2)
    img = np.transpose(img,(2,0,1))
    return torch.from_numpy(img.astype(dtype, copy=False))

class IWildcamTrainDataset(Dataset):
    def __init__(self, df, tfms=None):
        self.ids = df["id"]
        self.idxs = df["idx"]
        self.categories = df["category_id"]
        self.tfms = tfms
        
    def __len__(self):
        return len(self.ids)
    
    def __getitem__(self, idx):
        size = (256, 256)
        image_id = self.ids[idx]
        image_idx = self.idxs[idx]
        iamge_categorie = self.categories[idx]
        
        image_path = TRAIN_CROPED_DATA + f"{image_id}_{image_idx}.jpg"
        img = cv2.resize(cv2.cvtColor(cv2.imread(image_path), cv2.COLOR_BGR2RGB),size)

        if self.tfms is not None:
            augmented = self.tfms(image=img)
            img = augmented['image']
            
        # we should normalize here
        return img2tensor((img/255.0  - mean)/std), torch.tensor(iamge_categorie)

In [None]:
def get_aug(p=1.0):
    return Compose([
        HorizontalFlip(),
        ShiftScaleRotate(shift_limit=0.0625, scale_limit=0.2, rotate_limit=15, p=0.9, 
                         border_mode=cv2.BORDER_REFLECT),
        RandomBrightnessContrast(p=0.9),
    ], p=p)

## Create model

In [None]:
# ====================================================
# EfficientNet Model
# ====================================================

class enet_v2(nn.Module):

    def __init__(self, backbone, out_dim, pretrained=False):
        super(enet_v2, self).__init__()
        self.enet = timm.create_model(backbone, pretrained=pretrained)
        in_ch = self.enet.classifier.in_features
        self.myfc = nn.Linear(in_ch, out_dim)
        self.enet.classifier = nn.Identity()

    def forward(self, x):
        x = self.enet(x)
        x = self.myfc(x)
        return x

In [None]:
model = enet_v2(backbone="tf_efficientnet_b0", out_dim=205)
model.to(DEVICE)

## train setting

In [None]:
# ====================================================
# Optimizer and Loss
# ====================================================

optimizer = torch.optim.Adam([{'params': model.parameters(), 'lr': 1e-4}])
criterion = nn.CrossEntropyLoss()

## Train

Since we know that [the training data is imbalanced](https://www.kaggle.com/nayuts/iwildcam-2021-overviewing-for-start#EDA), I undersampled it.

In [None]:
rus = RandomUnderSampler(random_state=SEED, replacement=True)

def generate_dataloders(train):
    
    train_resampled, _ = rus.fit_resample(train, train["category_id"])
    test_resampled, _ = rus.fit_resample(train, train["category_id"])

    train_resampled = train_resampled.reset_index(drop=True)
    test_resampled = test_resampled.reset_index(drop=True)
    
    ds_train = IWildcamTrainDataset(train_resampled, tfms=get_aug())
    dl_train = DataLoader(ds_train,batch_size=BATCH_SIZE, shuffle=False, num_workers=NUM_WORKERS)
    ds_test = IWildcamTrainDataset(test_resampled)
    dl_test = DataLoader(ds_test,batch_size=BATCH_SIZE, shuffle=False, num_workers=NUM_WORKERS)
    
    return dl_train, dl_test

In [None]:
# ====================================================
# Train
# ====================================================

for epoch in tqdm(range(EPOCHS)):
    
    dl_train, dl_test = generate_dataloders(train)
    
    ###Train
    model.train()
    train_loss = 0
    
    for data in dl_train:
        optimizer.zero_grad()
        imgs, categories = data
        imgs = imgs.to(DEVICE)
        categories = categories.to(DEVICE)
        
        outputs = model(imgs)
    
        loss = criterion(outputs, categories)
        loss.backward()
        optimizer.step()
            
        train_loss += loss.item()
    train_loss /= len(dl_train)
        
    print(f"EPOCH: {epoch + 1}, train_loss: {train_loss}")
        
    ###Validation
    model.eval()
    valid_loss = 0
        
    for data in dl_test:
        imgs, categories = data
        imgs = imgs.to(DEVICE)
        categories = categories.to(DEVICE)
        
        outputs = model(imgs)
    
        loss = criterion(outputs, categories)
        
        valid_loss += loss.item()
    valid_loss /= len(dl_test)
        
    print(f"EPOCH: {epoch + 1}, valid_loss: {valid_loss}")
        
    
    if (epoch+1)%50 == 0 or (epoch+1)%EPOCHS == 0:
        ###Save model
        torch.save(model.state_dict(), f"{epoch+1}_.pth")

# Inference

## Create dataset for test

In [None]:
# ====================================================
# Dataset for test
# ====================================================

mean = np.array([0.37087523, 0.370876, 0.3708759] )
std = np.array([0.21022698, 0.21022713, 0.21022706])

class IWildcamTestDataset(Dataset):
    def __init__(self, df, tfms=None):
        self.ids = df["id"]
        self.idx = df["idx"]
        self.tfms = tfms
        
    def __len__(self):
        return len(self.ids)
    
    def __getitem__(self, idx):
        size = (256, 256)
        image_id = self.ids[idx]
        image_idx = self.idx[idx]
        
        image_path = TEST_CROPED_DATA + f"{image_id}_{image_idx}.jpg"
        
        img = cv2.resize(cv2.cvtColor(cv2.imread(image_path), cv2.COLOR_BGR2RGB),size)

        if self.tfms is not None:
            augmented = self.tfms(image=img)
            img = augmented['image']
            
        # we should normalize here
        return img2tensor((img/255.0 - mean)/std), image_id

In [None]:
ds_test = IWildcamTestDataset(df_croped_img_ids_test)
dl_test = DataLoader(ds_test,batch_size=32,shuffle=False,num_workers=NUM_WORKERS)

## Load trained model

In [None]:
model = enet_v2(backbone="tf_efficientnet_b0", out_dim=205)
model.to(DEVICE)
model.load_state_dict(torch.load(f"{epoch+1}_.pth"))
model.eval()

In [None]:
pred_categories = []
pred_img_ids = []

## inference

In [None]:
with torch.no_grad():
    for imgs, img_ids in tqdm(dl_test):
        imgs = imgs.to(DEVICE)
        
        outputs = model(imgs)
        output_labels = torch.argmax(outputs, dim=1).tolist()
        pred_categories += output_labels
        pred_img_ids += img_ids

In [None]:
pred = collections.defaultdict(list)
for category, img_id in zip(pred_categories, pred_img_ids):
    pred[img_id].append(category)

In [None]:
pred

# Create submit file

In [None]:
sub = pd.read_csv("../input/iwildcam2021-fgvc8/sample_submission.csv")
col_Predicted = [col for col in sub.columns if "Predicted" in col]

In [None]:
with open('../input/iwildcam2021-fgvc8/metadata/iwildcam2021_train_annotations.json', encoding='utf-8') as json_file:
    train_annotations =json.load(json_file)
df_categories = pd.DataFrame.from_records(train_annotations["categories"])

For each image, count the number of each animal species and store them in the corresponding column.

In [None]:
results = []

for key in pred.keys():
    c = collections.Counter(pred[key])
    
    res = []
    cnts = [ 0 for i in range(205)]
    for category, cnt in c.items():
        cnts[category] = cnt
    res += [key] + cnts[1:]
    results.append(res)

Convert to pandas dataframe.

In [None]:
sub_tmp = pd.DataFrame(results, columns=sub.columns)

In [None]:
sub_tmp.head()

In [None]:
sub_tmp.to_csv("./sub_tmp.csv", index=False)

Add seq_id information to the counted results. iwildcam2021_test_information.json contains the mapping between the id of the image and the id of the sequence.

In [None]:
with open('../input/iwildcam2021-fgvc8/metadata/iwildcam2021_test_information.json', encoding='utf-8') as json_file:
    test_information =json.load(json_file)
    
df_test_info = pd.DataFrame(test_information["images"])[["id", "seq_id"]]
df_test_info.head()

Take right join on the image id.

In [None]:
sub_tmp = sub_tmp.merge(df_test_info, left_on="Id", right_on="id", how="right")

In [None]:
sub_tmp.head()

Since there are multiple lines for the same sequence ID. We should aggregate them to single line. In this case, we will choose the image with the highest number of animals shown and submit the animal species and the number of animals shown in that image.

In [None]:
sum_counts = []
for i in range(len(sub_tmp)):
    sum_counts.append(sum(sub_tmp.iloc[i][col_Predicted]))

In [None]:
sub_tmp["total"] =  sum_counts
sub_tmp = sub_tmp.sort_values('total', ascending=False)
sub_tmp = sub_tmp[~sub_tmp.duplicated(keep='first', subset='seq_id')].fillna("0")

In [None]:
sub_tmp

I'll match the result to the sample submission format. I was told that the order of the rows is not related to the score, but we will match it just in case.

In [None]:
# Since it was difficult to join the pandas series, I intentionally created an extra column.
sub = sub.reset_index()
sub = sub[["index", "Id"]].merge(sub_tmp, left_on="Id", right_on="seq_id")

In [None]:
sub = sub[["Id_x"] + col_Predicted].rename(columns={"Id_x": "Id"})
sub.to_csv("sub.csv", index=False)

In [None]:
sub.head()

In [None]:
#If we don't delete them, csv files are buried and cannot be retrieved.
!rm -r croped_images_train
!rm -r croped_images_test