In [None]:
import matplotlib.pyplot as plt
import skimage.feature
import pandas as pd
import random
import cv2
from joblib import Parallel, delayed
%matplotlib inline

In [None]:
SL_TRAIN_PATH = './data_set/train/sea_lions/'
SL_VALIDATION_PATH = './data_set/validation/sea_lions/'
SL_TEST_PATH = './data_set/test/sea_lions/'

BKG_TRAIN_PATH = './data_set/train/background/'
BKG_VALIDATION_PATH = './data_set/validation/background/'
BKG_TEST_PATH = './data_set/test/background/'

In [None]:
# Read coordinates from files
sea_lions_df_train = pd.read_csv('./sealions_train.csv', dtype={"coord_x": int, "coord_y": int, "class": str, "filename": str})
sea_lions_df_validation = pd.read_csv('./sealions_validation.csv', dtype={"coord_x": int, "coord_y": int, "class": str, "filename": str})
sea_lions_df_test = pd.read_csv('./sealions_test.csv', dtype={"coord_x": int, "coord_y": int, "class": str, "filename": str})
empty_df_train = pd.read_csv('./empty_train.csv', dtype={"coord_x": int, "coord_y": int, "class": str, "filename": str})
empty_df_validation = pd.read_csv('./empty_validation.csv', dtype={"coord_x": int, "coord_y": int, "class": str, "filename": str})
empty_df_test = pd.read_csv('./empty_test.csv', dtype={"coord_x": int, "coord_y": int, "class": str, "filename": str})

# Use a random seed
random.seed(42)

In [None]:
# Save 96x96 patches for sea lions
def save_sea_lion_patch(image, coord_x, coord_y, path, number):
    if coord_x < 72:
        coord_x = 0
    elif coord_x > len(image[0]) - 72:
        coord_x = len(image[0]) - 144
    else:
        coord_x = coord_x - 72
    if coord_y < 72:
        coord_y = 0
    elif coord_y > len(image) - 72:
        coord_y = len(image) - 144
    else:
        coord_y = coord_y - 72
    patch = image[coord_y:coord_y+144, coord_x:coord_x+144, :]
    cv2.imwrite(path + str(number) + '.jpg', patch)
    
    
# Save 96x96 patches for background
def save_background_patch(image, coord_x, coord_y, path, number):
    patch = image[coord_y-48:coord_y+48, coord_x-48:coord_x+48, :]
    cv2.imwrite(path + str(number) + '.jpg', patch)

In [None]:
def gen(img, path, lst):
    for l in lst:
        row = l[0]
        n = l[1]
        yield img, row[1]['coord_x'], row[1]['coord_y'], path, n
        
        
def extract_sea_lions(sl_df, path):
    i = 0
    file_names = sl_df.filename.unique()
    for file in file_names:
        image = cv2.imread("./kaggle_sea_lions/Train/" + file)
        df = sl_df[sl_df['filename'] == file]
        Parallel(n_jobs=4, verbose=1, backend="threading")(map(delayed(save_sea_lion_patch), list(gen(image, path, list(zip(df.iterrows(), range(i, i+len(df))))))))
        i += len(df)
        

def extract_background(bkg_df, path):
    i = 0
    file_names = bkg_df.filename.unique()
    for file in file_names:
        image = cv2.imread("./kaggle_sea_lions/Train/" + file)
        df = bkg_df[bkg_df['filename'] == file]
        Parallel(n_jobs=4, verbose=1, backend="threading")(map(delayed(save_background_patch), list(gen(image, path, list(zip(df.iterrows(), range(i, i+len(df))))))))
        i += len(df)

In [None]:
# Extract background train patches
extract_background(empty_df_train, BKG_TRAIN_PATH)

# Extract background validation patches
extract_background(empty_df_validation, BKG_VALIDATION_PATH)

# extract background test patches
extract_background(empty_df_test, BKG_TEST_PATH)

In [None]:
# Extract sea lions train patches
extract_sea_lions(sea_lions_df_train, SL_TRAIN_PATH)

# Extract sea lions validation patches
extract_sea_lions(sea_lions_df_validation, SL_VALIDATION_PATH)

# extract sea lions test patches
extract_sea_lions(sea_lions_df_test, SL_TEST_PATH)