## Dataset derived from [iWildCam 2021 - Starter Notebook](https://www.kaggle.com/nayuts/iwildcam-2021-starter-notebook#Explanation-for-metadata-file).

I created a 256x256 image dataset for prototyping. Using the MegaDetector detection results for each image, I cropped out the location of the animal. Images with no detection results were resized as is. After the cropping process, the cropped out images were indexed for easy use in learning. A summary of the process is shown below.

# Preparation

In [1]:
import json
import os
import pickle

import cv2
import numpy as np 
import pandas as pd 
from PIL import Image, ImageFile, ImageFont, ImageDraw

In [2]:
TRAIN_PATH = "./data/train/"
TEST_PATH = "./data/test/"
ANNOTATIONS_PATH = "./data/metadata/"

CROPED_TRAIN_PATH = "./data/crop_train/"
CROPED_TEST_PATH = "./data/crop_test/"

threshold = 0.9

In [3]:
with open(ANNOTATIONS_PATH + 'iwildcam2021_megadetector_results.json', encoding='utf-8') as json_file:
    megadetector_results =json.load(json_file)

In [4]:
# os.mkdir(CROPED_TRAIN_PATH)
# os.mkdir(CROPED_TEST_PATH)

In [5]:
with open('./data/metadata/iwildcam2021_train_annotations.json', encoding='utf-8') as json_file:
    train_annotations =json.load(json_file)

In [6]:
annotations = megadetector_results["images"]

In [7]:
def get_crop_area(bbox, image_size):
    x1, y1,w_box, h_box = bbox
    ymin,xmin,ymax, xmax = y1, x1, y1 + h_box, x1 + w_box
    area = (xmin * image_size[0], ymin * image_size[1], 
            xmax * image_size[0], ymax * image_size[1])
    return area

In [8]:
img_ids_train = []
img_idx_train = []
img_ids_test = []
img_idx_test = []

x_tot_list,x2_tot_list = [],[]

In [9]:
def save_image(img, img_id, idx, is_train):  
    if is_train:
        img.save( CROPED_TRAIN_PATH + f"{img_id}_{idx}.jpg", format="jpeg")
        img_ids_train.append(f"{img_id}")
        img_idx_train.append(idx)
    else:
        img.save( CROPED_TEST_PATH + f"{img_id}_{idx}.jpg", format="jpeg")
        img_ids_test.append(f"{img_id}")
        img_idx_test.append(idx)

In [10]:
def calc_x_and_x2_tot(img):
    
    img = np.array(img, dtype=np.uint8)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = np.transpose(img,(2,0,1))
            
    return (img/255.0).reshape(-1,3).mean(0), ((img/255.0)**2).reshape(-1,3).mean(0)

## main processing module↓

In [13]:
def convert_images(annotation):

    size = (256,256)
    img_id = annotation["id"]
    is_train = True 
    
    try:
        detections = annotation["detections"]
    except:
        print(f"Passed {img_id}. There are no detection data.")
        return
    
    path_for_train = TRAIN_PATH + annotation["id"] + ".jpg"
    path_for_test = TEST_PATH + annotation["id"] + ".jpg"
    
    if os.path.exists(path_for_train):
        file_path = path_for_train
    elif os.path.exists(path_for_test):
        file_path = path_for_test
        is_train = False
    else:
        print(f"Passed {img_id}. There are no data.")
        return
    
    
    try:      
        img = Image.open(file_path)
    except:
        print(f"Passed {img_id}. Fail to open image.")
        print(f"pass {file_path}.")
        return
    
    for i, detection in enumerate(detections, 1):
        
        if detection["conf"] < threshold:
            continue

        if detection["category"] != "1":
            continue
            
        if len(detection) == 0:
            img = img.resize(size)
            save_image(img, img_id, 0, is_train)
            
            x_tot, x2_tot = calc_mean_and_var(img)
            x_tot_list.append(x_tot)
            x2_tot_list.append(x2_tot)
            
        else:
            crop_area = get_crop_area(detection["bbox"], img.size)
            img_cropped = img.crop(crop_area).resize(size)
            save_image(img_cropped, img_id, i, is_train)
        
            x_tot, x2_tot = calc_x_and_x2_tot(img_cropped)
            x_tot_list.append(x_tot)
            x2_tot_list.append(x2_tot)

# Cropping

Crop by calling convert_images fuction.

In [14]:
for annotation in annotations:
    convert_images(annotation)

Passed 8e7320aa-21bc-11ea-a13a-137349068a90. Fail to open image.
pass ./data/train/8e7320aa-21bc-11ea-a13a-137349068a90.jpg.


The mean and variance for standardization.

In [15]:
#image stats
img_avr =  np.array(x_tot_list).mean(0)
img_std =  np.sqrt(np.array(x2_tot_list).mean(0) - img_avr**2)
print('mean:',img_avr, ', std:', img_std)

mean: [0.37087523 0.370876   0.3708759 ] , std: [0.21022698 0.21022713 0.21022706]


## zipping

In [None]:
!zip croped_images_train -r ./croped_images_train
!zip croped_images_test -r ./croped_images_test

In [None]:
!rm -r ./croped_images_train
!rm -r ./croped_images_test

# Creating csv file for reference

In [16]:
cropped_train = {"id": img_ids_train, "idx":img_idx_train}
df_cropped_train = pd.DataFrame(cropped_train)
df_train_annotation = pd.DataFrame(train_annotations["annotations"])

In [17]:
df_cropped_train = df_cropped_train.merge(
    df_train_annotation[["image_id", "category_id"]], 
    left_on='id', right_on='image_id')[["id", "idx", "category_id"]]

In [18]:
df_cropped_train.head()

Unnamed: 0,id,idx,category_id
0,905a3c8c-21bc-11ea-a13a-137349068a90,1,374
1,905a3c8c-21bc-11ea-a13a-137349068a90,1,374
2,905a4416-21bc-11ea-a13a-137349068a90,1,97
3,905a4416-21bc-11ea-a13a-137349068a90,2,97
4,905a4416-21bc-11ea-a13a-137349068a90,3,97


In [19]:
cropped_test = {"id": img_ids_test, "idx":img_idx_test}
df_cropped_test = pd.DataFrame(cropped_test)

In [20]:
df_cropped_test.head()

Unnamed: 0,id,idx
0,915879a0-21bc-11ea-a13a-137349068a90,1
1,91588116-21bc-11ea-a13a-137349068a90,1
2,9158a2f4-21bc-11ea-a13a-137349068a90,1
3,9158aaa6-21bc-11ea-a13a-137349068a90,1
4,9158f1a0-21bc-11ea-a13a-137349068a90,1


In [21]:
df_cropped_train.to_csv("./data/cropped_train.csv", index=False)
df_cropped_test.to_csv("./data/cropped_test.csv", index=False)