Use the functions below to generate a list of files for annotation 

The code in the cell below should be run within this (RightWhaleID/ImageAnnotation) folder. When run it will get a random batch of 100 images to be annotated. It will place those images in a folder (`files_for_annotation`) two levels up (alongside the `imgs` folder). After you have run this script, annotate the images using the sloth program. When you are done, use the `check_annotations` folder below to make sure that your annotations are correct. When you are happy with the json file that you created, give it a name (like `annotations_kevin_1.json`) and submit a pull request to have it included with the others.

<h5>Please note that the code below assumes that you have run the script located [here](https://www.kaggle.com/c/noaa-right-whale-recognition/forums/t/16275/python-script-to-sort-images) to place the training images into folders for individual whales. The sorting script should be run at the same level as the `imgs` folder. The 
<br>


In [1]:
from PIL import Image, ImageDraw
import os
import json
import pandas as pd
import random
import shutil
from pprint import pprint

In [2]:
# load all the json files in the annotations folder and make a list of the files that have been annotated
files_to_search = [x for x in os.listdir("annotations/") if '.json' in x]



#this function takes in a json file of annotations and returns the set of images that are in it

def get_annotated_images(infile):
    annotated_images = set()
    with open("annotations/" + infile) as json_in:
        annotations = json.load(json_in)
        for annotation in annotations:
            if annotation['annotations'] == []:
                continue
            filename = annotation['filename'][annotation['filename'].rfind('/') + 1:]
            annotated_images.add(filename)
            
    return annotated_images


# build the list of images that have already been annotated
already_done = set()
for json_file in files_to_search:
    already_done = already_done.union(get_annotated_images(json_file)) 
    
print("Images annotated so far . . . " + str(len(already_done)))
            
            
#get list of all images
train_data = pd.read_csv('../Data/train.csv')
all_images = set(train_data.Image)

# make a list of all files that still require annotation
candidate_images = all_images - already_done
print("Images still requiring annotation . . . " + str(len(candidate_images)))

#select a random 100 files for annotation (do get a larger or smaller batch, change this number)
num_files_to_annotate = 100
images_to_annotate = random.sample(candidate_images, num_files_to_annotate)

# This function takes in a list of images to be annotated and moves them to a temporary folder to be used with sloth
def get_images_for_annotation(images):
    if not os.path.exists('../../files_for_annotation/'):
        os.makedirs('../../files_for_annotation/')
    # clear out any prexisiting images 
    for jpg in os.listdir('../../files_for_annotation/'):
        os.remove('../../files_for_annotation/' + jpg)
    for image_name in images:
        #look up the whale name
        whale = train_data.whaleID[train_data.Image == image_name]
        whale_name = whale.iloc[0]
        #get filename and path of source - create a folder one level above the git repo
        infile = '../../imgs/' + whale_name + '/' + image_name
        #check for annoations folder, if it's not there, create it
        
                
        outfile = '../../files_for_annotation/' + image_name
        
        shutil.copy(infile, outfile)
        
get_images_for_annotation(images_to_annotate)

Images annotated so far . . . 363
Images still requiring annotation . . . 4181


In [15]:
## use this to go over old annotations
get_images_for_annotation(get_annotated_images('whale_05661.json'))

In [23]:
files_to_search

['whale_03728.json',
 'whale_03623.json',
 'allan_visochek_2.json',
 'whale_03227.json',
 'whale_02411.json',
 'allan_visochek_3.json',
 'allan_visochek_1.json~',
 'whale_05661.json',
 'allan_visochek_1.json']

The function below is used to proofread annotations that you have made. It will create a group of images using your annotations to draw <span style="color:yellow">yellow</span> boxes around the whale heads and <span style="color:red">red</span> boxes around negative areas (that don't contain whales).

In [3]:
# a function to generated annotated images from a json file of whale annotations

# this function assumes that there is an imgs folder two levels up and that the images in that folder have been sorted
# with the script found here:  https://www.kaggle.com/c/noaa-right-whale-recognition/forums/t/16275/python-script-to-sort-images/91274#post91274


#make sure you have loaded the global 
def check_annotations(jsonfile):
    train_data = pd.read_csv('../Data/train.csv')
    #open the json
    with open(jsonfile, 'rb') as infile:
        annotations = json.load(infile)
        for annotation in annotations:
            #if the annotation is empty, continue
            if annotation['annotations'] == []:
                continue
            #open the image file and create a ImageDraw.Draw object
            filename = annotation['filename'][annotation['filename'].rfind('/') + 1:]
            whale = train_data.whaleID[train_data.Image == filename]
            whale_folder = whale.iloc[0]
            im = Image.open("../../imgs/" + whale_folder + '/' + filename)
            draw = ImageDraw.Draw(im)
            for item in annotation['annotations']:
                #set line color
                if item['class'] == 'Blowholes':
                    line_color = "yellow"
                elif item['class'] == 'TipOfHead':
                    line_color = "red"
                else:
                    continue
                #draw the rectangle
                x,y = item['x'], item['y']
                for i in range(100): #this controls the width of the line drawn
                    draw.polygon([(x,y-10),(x+10,y),(x,y+10),(x-10,y)],fill= line_color)
                    
            #check for the directory, create it if it isn't there.
            if not os.path.exists('../../marked_images/'):
                os.makedirs('../../marked_images/')
            im.save('../../marked_images/marked_' + filename)
            
    
    
    
    
    

In [5]:
check_annotations('annotations/allan_visochek_4.json')

In [14]:
## if you find a faulty entry while proofreading, you can use this function to remove the entry from your json and put the image back in the queue
def delete_entry(image, jsonfile):
    with open(jsonfile, 'rb') as infile:
        json_in = json.load(infile)
        
        new_json = [annotation for annotation in json_in if annotation['filename'][annotation['filename'].rfind('/') + 1:] != image]
        
    with open(jsonfile, 'wb') as outfile:
        json.dump(new_json, outfile)
        
                

In [9]:
## find the specific json file (or files) with annotations for the given image

def find_json(image):
    jsons = [json_file for json_file in os.listdir('annotations') if '.json' in json_file]
    for jfile in jsons:
        with open('annotations/' + jfile) as infile:
            json_in = json.load(infile)
            for x in json_in:
                if x['filename'][x['filename'].rfind('/') + 1:] == image:
                    pprint(x)
                    pprint(jfile)
                    
                                                     
                                                     

In [28]:
for x in ['w_39.jpg']:
    find_json(x)

{u'annotations': [{u'class': u'neg',
                   u'height': 483.4070981210857,
                   u'type': u'rect',
                   u'width': 461.7620041753653,
                   u'x': 1875.9081419624217,
                   u'y': 1024.5344467640919}],
 u'class': u'image',
 u'filename': u'../../../files_for_annotation/w_39.jpg'}
'burnham11.json'


In [29]:
delete_entry('w_39.jpg', 'annotations/burnham11.json')

In [22]:
## this function will create a master json file from all of the json files in the annotations folder

def make_master_json():
    master_json = []
    images_added = []
    # make list of all .json file in annotations folder
    jsons = [json_doc for json_doc in os.listdir('annotations/') if '.json' in json_doc]
    for entry in jsons:
        
        #import the json
        with open('annotations/' + entry, 'rb') as infile:
            json_in = json.load(infile)
            for image in json_in:
                if image['annotations'] == []:
                    continue
                
                filename = image['filename'][image['filename'].rfind('/') + 1:]
                if filename not in images_added:
                    master_json.append(image)
                    images_added.append(filename)
                else:
                    
                    #if it has more negs than the other entry add this one and delete the other. Otherwise, skip it
                    neg_count = 0
                    for item in image['annotations']:
                        if item['class'] == 'neg':
                            neg_count += 1
                    if neg_count == 0:
                        continue
                    else:
                        #look up the other one
                        for annotation in master_json:
                            old_neg_count = 0
                            old_entry = annotation['filename']
                            for item in image['annotations']:
                                if item['class'] == 'neg':
                                    old_neg_count += 1
                        if neg_count > old_neg_count:
                            #remove the old one
                            master_json.remove(annotation)
                            master_json.append(image)
                            
                                                     
    return master_json

    

In [30]:
master = make_master_json()
with open('master_annotations.json', 'w') as outfile:
    json.dump(master, outfile)

1024

In [44]:
bg_txt

NameError: name 'bg_txt' is not defined