In [4]:
## in this notebook we extract whale subimages from the main images using the master_annotations.json file in this folder

## we want to return subwimages that are exactly square and in grayscale

## it should be run inside this folder

## You can use this script wh

In [4]:
##import libraries
import os
import cv2
import json
import pandas as pd
import numpy as np

If you have sorted the images by whale using [this script](https://www.kaggle.com/c/noaa-right-whale-recognition/forums/t/16275/python-script-to-sort-images/91274#post91274) leave the `IMAGES_SORTED` variable as True. If the files are all im the `imgs` folder as they would be after unzipping, set this to false.

In [5]:
IMAGES_SORTED = True

In [6]:
## load input file
with open('master_annotations.json', 'rb') as infile:
    master_annotations = json.load(infile)

# if the images have been sorted, we need the training data file to find them
train_data = None
if IMAGES_SORTED:
    train_data = pd.read_csv('../Data/train.csv')
    

In [8]:
## for each set of annotations we want to:
# - make it a square so that height = width
# - first we find out which is lower height or width
# - we want to increase the lower one to equal the greater one, subject to the constraint that we can't go off the edge of the image
# - in the event that we might go off the edge we increase the height or width as much as possible, and then adjust x or y accordingly

## given an entry, return the subimage

def get_subimage(entry):
    target_file = entry['filename']
    prefix = "../../imgs/"
    if IMAGES_SORTED:
        prefix += train_data.whaleID[train_data.Image == target_file].iloc[0] + '/'
    print prefix + target_file
    full_image = cv2.imread(prefix + target_file, 0)
    if full_image == None:
        print 'Could not find ' + target_file
        return False
    full_height, full_width = full_image.shape
    
    for annotation in entry['annotations']:
        if annotation['class'] == 'neg':
            
            sub_height, sub_width, sub_x, sub_y = int(annotation['height']), int(annotation['width']), int(annotation['x']), int(annotation['y'])
        else:
            continue
        if sub_height < sub_width:
            #fix sub height
            dif = sub_width - sub_height
            if sub_y + dif < full_height:
                sub_height = sub_width
            else: 
                height_adj = full_height - sub_y - sub_height
                y_adj = dif - height_adj
                sub_height += height_adj
                sub_y -= y_adj
        elif sub_width < sub_height:
            #fix sub width
            dif = sub_height - sub_width
            if sub_x + dif < full_width:
                sub_width = sub_height
            else:
                width_adj = full_width - sub_x - sub_width
                x_adj = dif - width
                sub_width += width_adj
                sub_x -= x_adj
        #extract subimage with sub_height, . . .
        
        whale = full_image[ sub_y:sub_y + sub_height, sub_x:sub_x + sub_width]
        
        #make an outfolder
        if not os.path.exists('../../neg_imgs'):
            os.mkdir('../../neg_imgs')
        sub_name = '../../neg_imgs/' + target_file.replace('.jpg', '') + '_sub.jpg'
        cv2.imwrite(sub_name, whale)
                
            
            
                    
        



In [9]:
## this will loop through the master_annotations and use the function above to create subimages of whale heads
## it will save them in a folder two levels up in a folder called sub_imgs

for entry in master_annotations:
    get_subimage(entry)

../../imgs/whale_89615/w_1118.jpg
../../imgs/whale_52749/w_1157.jpg
../../imgs/whale_90957/w_1196.jpg
../../imgs/whale_85464/w_124.jpg
../../imgs/whale_59173/w_1296.jpg
../../imgs/whale_14892/w_1509.jpg
../../imgs/whale_37269/w_1566.jpg
../../imgs/whale_09062/w_1579.jpg
../../imgs/whale_29858/w_1671.jpg
../../imgs/whale_38681/w_1720.jpg
../../imgs/whale_33140/w_1751.jpg
../../imgs/whale_58474/w_1755.jpg
../../imgs/whale_23467/w_188.jpg
../../imgs/whale_90911/w_1941.jpg
../../imgs/whale_05784/w_2026.jpg
../../imgs/whale_98645/w_2069.jpg
../../imgs/whale_15615/w_2115.jpg
../../imgs/whale_95370/w_2197.jpg
../../imgs/whale_28892/w_2304.jpg
../../imgs/whale_27834/w_2352.jpg
../../imgs/whale_67036/w_2390.jpg
../../imgs/whale_06339/w_2547.jpg
../../imgs/whale_81818/w_2627.jpg
../../imgs/whale_03728/w_280.jpg
../../imgs/whale_46747/w_2839.jpg
../../imgs/whale_89456/w_2960.jpg
../../imgs/whale_78785/w_3001.jpg
../../imgs/whale_58474/w_3027.jpg
../../imgs/whale_51538/w_3079.jpg
../../imgs/whale_