# Inspect Island Conservation locations that contain rats

In [44]:
import json
import os
import pandas as pd

In [45]:
home_path = os.path.expanduser('~/')

In [46]:
ic_metadata_file = os.path.join(home_path, 'metadata/island_conservation.json')
with open(ic_metadata_file, 'r') as f:
    ic = json.load(f)

## Step 1. Find locations with rats

In [None]:
category_name_to_id = {ic['categories'][i]['name']:ic['categories'][i]['id'] for i in range(len(ic['categories']))}

In [34]:
# parse IC's image['file_name'] to derive image['location']
for img in ic['images']:
    path = img['file_name'].split('/')[0:2]
    path = '/'.join(path)
    img['location'] = path

In [43]:
image_map = {ic['images'][i]['id']:ic['images'][i] for i in range(len(ic['images']))}

def add_location(row):
    row['location'] = image_map[row['image_id']]['location']
    return row

ic_annos = pd.DataFrame(ic['annotations'])
samples_w_rats = ic_annos.loc[(ic_annos['category_id'] == category_name_to_id['rat'])]
samples_w_rats = samples_w_rats.apply(add_location, axis='columns')
locs_w_rats = samples_w_rats.location.unique()
print(f'Found {samples_w_rats.shape[0]} rat samples in {len(locs_w_rats)} locations:')
print(', '.join("'" + item + "'" for item in locs_w_rats))


Found 16338 rat samples in 50 locations:
'dominicanrepublic/camara116', 'dominicanrepublic/camara107', 'dominicanrepublic/camara106', 'dominicanrepublic/camara20', 'dominicanrepublic/camara115', 'dominicanrepublic/camara12', 'dominicanrepublic/camara32', 'dominicanrepublic/camara01', 'dominicanrepublic/camara108', 'dominicanrepublic/camara111', 'dominicanrepublic/camara117', 'dominicanrepublic/camara24', 'dominicanrepublic/camara30', 'ecuador1/ic1619', 'ecuador1/ic1616', 'chile/vaqueria', 'chile/frances02', 'puertorico/7a', 'puertorico/23', 'puertorico/2a', 'palau/cam02a', 'palau/cam09a', 'palau/cam10a', 'palau/cam13a', 'palau/cam14a', 'palau/cam01a', 'palau/cam04a', 'palau/cam06a', 'palau/cam07a', 'palau/cam08a', 'ecuador2/ic1605', 'ecuador2/ic1607', 'ecuador2/ic1618', 'micronesia/cam12', 'micronesia/cam13', 'micronesia/cam15', 'micronesia/cam03', 'micronesia/cam11', 'micronesia/cam10', 'micronesia/cam05', 'micronesia/cam08', 'micronesia/cam17', 'micronesia/cam14', 'micronesia/cam16',

Copy the printed out list above into the `locations_of_interest` variable at the top of the `/invasive-animal-detection/utils/download_lila_subset.py` script.

## Step 2. Inspect those locations' distributions of other species
To figure out what other "non-rat" classes we should include

In [36]:
from tqdm import tqdm

all_ic_images_df = pd.DataFrame(ic['images'])

# find ALL annotations (including non-rats) that are from the same locations as 
# the rat images included in training split

category_id_to_name = {ic['categories'][i]['id']:ic['categories'][i]['name'] for i in range(len(ic['categories']))}

annos_from_rat_locations = []
for anno in tqdm(ic['annotations']):
    img = image_map[anno['image_id']]
    location = img['location']
    if location in locs_w_rats:
        anno['location'] = location
        anno['category'] = category_id_to_name[anno['category_id']]
        annos_from_rat_locations.append(anno)

annos_from_rat_locations

100%|██████████| 142341/142341 [00:00<00:00, 217185.18it/s]


[{'id': '6c7e25e8-df2b-11ea-909a-000d3a74c7de',
  'image_id': 'dominicanrepublic_camara107_cam10701agosto2017_dominicanrepublic_cam10701agosto2017_20170218_174638_img_0139',
  'category_id': 4,
  'bbox': [0.96, 448.956, 361.92, 320.004],
  'location': 'dominicanrepublic/camara107',
  'category': 'raven'},
 {'id': '6c8be1a2-df2b-11ea-842c-000d3a74c7de',
  'image_id': 'dominicanrepublic_camara111_cam11101agosto2017_dominicanrepublic_cam11101agosto2017_20170613_075438_img_0183',
  'category_id': 3,
  'bbox': [1390.08, 573.048, 526.08, 158.976],
  'location': 'dominicanrepublic/camara111',
  'category': 'iguana'},
 {'id': '6ca18ca4-df2b-11ea-9f2f-000d3a74c7de',
  'image_id': 'dominicanrepublic_camara20_cam2025septiembre2015_dominicanrepublic_cam2025septiembre2015_20150912_033417_img_0030',
  'category_id': 5,
  'bbox': [1443.0720000000001, 263.952, 476.928, 751.032],
  'location': 'dominicanrepublic/camara20',
  'category': 'cat'},
 {'id': '6ca86a68-df2b-11ea-84e4-000d3a74c7de',
  'image_i

In [37]:
def get_category_counts(annos_list):
    annos_df = pd.DataFrame(annos_list)
    category_map = {ic['categories'][i]['id']:ic['categories'][i]['name'] for i in range(len(ic['categories']))}
    counts = annos_df.groupby(['category_id']).size().reset_index(name='counts')
    counts['category_name'] = counts['category_id'].map(category_map)
    counts['location_count'] = counts['category_id'].map(lambda id: len(annos_df.loc[annos_df.category_id == id]['location'].unique()))
    counts_sorted = counts.sort_values(by=['counts'], ascending=False)
    return counts_sorted

In [38]:

# get category counts for rat locations
counts = get_category_counts(annos_from_rat_locations)
counts

# it looks like the following classes would be the best contenders 
# for inclusion in the training set:
# - iguanas (4146 annos at 16 locations)
# - cats (3709 annos at 17 locations)
# - pigs (1265 annos at 16 locations)
    

Unnamed: 0,category_id,counts,category_name,location_count
0,0,31808,empty,50
7,7,16338,rat,50
3,3,4146,iguana,16
8,8,4099,human,40
5,5,3709,cat,17
17,22,3199,rabbit,2
20,26,1591,shearwater,2
27,37,1265,pig,16
29,40,581,rooster,4
18,23,528,petrel_chick,3


Add the all the classes (rats and non-rats) that you'd like to add to the `species_of_interest` variable at the top of the `/invasive-animal-detection/utils/download_lila_subset.py` script.