In [None]:
from azure.storage.file import FileService
import json
import os
from PIL import Image, ImageDraw
import pandas as pd
import re
import numpy as np
from eMammal_helpers import draw_bboxes

In [None]:
# print all outputs in a cell
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

# auto reload external Python modules
%load_ext autoreload
%autoreload 2

# Check the annotations from iMerit

Download the annotation file from File Share to temp folder on local - **Do once**

In [None]:
key = os.environ["AZ_STORAGE_KEY"]
file_service = FileService(account_name='ai4edevshare', account_key=key)

In [None]:
annotation_path = '/home/yasiyu/yasiyu_temp/eMammal_annotations/microsoft_batch6_29Sept2018.json'

In [None]:
file_service.get_file_to_path('ai4edevfs', 'annotations/incoming_annotations', os.path.basename(annotation_path), annotation_path)

### Read in the annotations.

In [None]:
with open(annotation_path, 'r') as f:
    content = f.readlines()

len(content)  # from the filename, all appears to be of the eMammal dataset. Length is 1848

In [None]:
json.loads(content[0])

In [None]:
emammal = []
others = []
emammal_num_images = 0
emammal_images_with_groups = []

for row in content:
    entry = json.loads(row)
    assert len(entry['images']) > 0
    
    file_name = entry['images'][0]['file_name']
    if file_name.startswith('datasetemammal.'):
        emammal.append(file_name)
        emammal_num_images += len(entry['images'])
        
        for bbox in entry['annotations']:
            if bbox['category_id'] == 3:
                emammal_images_with_groups.add(bbox['image_id'])
    else:
        others.append(file_name)

In [None]:
len(emammal)  # each entry is one sequence, so length should be the number of sequences
emammal_num_images
len(others)

In [None]:
emammal_images_with_groups

In [None]:
diff_anno_img_count = 0
for row in content:
    entry = json.loads(row)
    if len(entry['annotations']) != len(entry['images']):
        diff_anno_img_count += 1
    if len(entry['categories']) != 3:  # since Sept 29 batch, new category 'group' added
        print('categories are {}'.format(entry['categories']))
diff_anno_img_count

In [None]:
# put the annotations in a dataframes so we can select all annotations for a given image
annotations = []
images = []
for row in content:
    entry = json.loads(row)
    annotations.extend(entry['annotations'])
    images.extend(entry['images'])
    
df_anno = pd.DataFrame(annotations)
df_img = pd.DataFrame(images)

In [None]:
df_anno.sample(n=10)

In [None]:
df_anno.iloc[0].image_id

In [None]:
df_img.sample(n=5)

In [None]:
len(df_anno)
len(df_img)  # some images had no annotations aka empty

### Visualize the bboxes on a few images to spot check.

Visualize 100 random images from the returned batch. Not all images have annotations.

In [None]:
viz_output_dir = '/home/yasiyu/yasiyu_temp/viz_output'

In [None]:
sample_img = df_img.sample(n=100)

In [None]:
labeled_images = []
no_annotation_images = []

# the dash between seq and frame is different among the batches
pattern = re.compile('^datasetemammal\.project(.+?)\.deployment(.+?)\.seq(.+?)[-_]frame(.+?)\.img(.+?)\.')

for i in range(len(sample_img)):
    file_name = sample_img.iloc[i]['file_name']
    match = pattern.match(file_name)
    project_id, deployment_id, seq_id, frame_order, image_id = match.group(1, 2, 3, 4, 5)
    img_path1 = '/datadrive/emammal/{}{}/{}.jpg'.format(project_id, deployment_id, image_id)
    img_path2 = '/datadrive/emammal/{}{}/{}.JPG'.format(project_id, deployment_id, image_id)
    img_path = img_path1 if os.path.exists(img_path1) else img_path2

    annos_i = df_anno.loc[df_anno['image_id'] == file_name, :]  # all annotations on this image
    
    if len(annos_i) > 0:
        bboxes = list(annos_i.loc[:, 'bbox'])
        classes = list(annos_i.loc[:, 'category_id'])
         
        if not os.path.exists(img_path):
            print('Image {} cannot be found at the path.'.format(img_path))
            continue
        labeled_img = draw_bboxes(img_path, bboxes, classes, show_label=True)
        #labeled_img.save(os.path.join(viz_output_dir, '{}_gtbbox.png'.format(file_name.lower().split('.jpg')[0])))
        labeled_images.append(labeled_img)
    else:
        print('No annotations found for image {}.'.format(file_name))
        if not os.path.exists(img_path):
            print('! Image also cannot be found.')
            continue
        no_annotation_images.append(Image.open(img_path))

In [None]:
labeled_images[10]

In [None]:
labeled_images[11]

In [None]:
no_annotation_images[0]

In [None]:
no_annotation_images[1]

### Visualize some images with persons

In [None]:
imgs_with_persons = df_anno[df_anno.human_visible == 1].image_id
len(imgs_with_persons)