In [None]:
import json
import os

%matplotlib ipympl
import matplotlib
# matplotlib.use('GTK3Agg')
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import matplotlib.figure
import pandas as pd
import seaborn as sns
import numpy as np
from tqdm import tqdm
import random
from PIL import Image

from classification.train_utils import load_splits, plot_img_grid

random.seed(10)


### Load data

In [None]:
model = 'resnet-18'
experiment = 'subsample-rats' #'baseline'
split = 'val'
runs_root = '/home/natty/invasive-animal-detection/runs/'
processed_data_root = '/home/natty/invasive-animal-detection/data/processed/'
raw_data_root = '/home/natty/invasive-animal-detection/data/raw/images'

# load results file produced by predict.py
results_path = os.path.join(runs_root, model, experiment, 'predictions', f'{split}_results.json')
with open(results_path, 'r') as f:
    res = json.load(f)

# load COCO for Cameratraps file (for split we're evaluating)
cct_path = os.path.join(processed_data_root, experiment, f'{split}_cct.json')
with open(cct_path, 'r') as f:
    cct = json.load(open(cct_path, 'r'))

categories = cct['categories']
label_to_idx = { label['name']: idx for idx, label in enumerate(categories) }
idx_to_label = { idx: label['name'] for idx, label in enumerate(categories) }


In [None]:
categories

### Functions

In [None]:
def plot_img_grid(paths, ncols=8, size=224):
    """Plot a grid of square images.

    Args:
        paths: list of str, paths to image crops
        ncols: int, number of columns for output figure
        size: float, size (in inches) of each row/column

    Returns: matplotlib Figure
    """
    DPI = 113
    nrows = int(np.ceil(len(paths) / ncols))
    fig = matplotlib.figure.Figure(figsize=(ncols * size / DPI, nrows * size / DPI))
    axs = fig.subplots(nrows, ncols, squeeze=False)
    for i, path in tqdm(enumerate(paths)):
        r, c = i // ncols, i % ncols
        ax = axs[r, c]
        img = mpimg.imread(path)
        ax.imshow(img)
    for r in range(nrows):
        for c in range(ncols):
            axs[r, c].set_axis_off()
            axs[r, c].set_aspect('equal')
    fig.subplots_adjust(wspace=0, hspace=0)
    return fig

def print_top_two_labels(imgs):
    for sample_idx, sample in enumerate(imgs):
        scores_w_labels = [{'score': '%.3f'%(score), 'label': idx_to_label[idx]} for idx, score in enumerate(sample['scores'])]
        scores_w_labels.sort(key=lambda x: x['score'], reverse=True) # highest scores first 
        runner_up = scores_w_labels[1] # second-highest score
        print(f'image {sample_idx}: top score: {scores_w_labels[0]["label"]}, {scores_w_labels[0]["score"]} -- runner up: {runner_up["label"]}, {runner_up["score"]}')

In [None]:
for key, val in res.items():
    print(f'key: {key} - length: {len(val)} - first item - {val[0]}')

In [None]:
positives = []
negatives = []

for idx, pred in enumerate(res['pred_labels']):
    result_data = {
        'pred': pred,
        'actual': res['true_labels'][idx],
        'logits': res['logits'][idx],
        'scores': res['scores'][idx],
        'filepath': res['filepaths'][idx]
    }
    if result_data['actual'] == result_data['pred']:
        positives.append(result_data)
    elif result_data['actual'] != result_data['pred']:
        negatives.append(result_data)

In [None]:
print(f'No. of positives (correct classifications): {len(positives)}')
print(f'No. of negatvies (missed classifications): {len(negatives)}')
print(f'Overall accuracy: {len(positives)/ (len(negatives) + len(positives))}')

In [None]:
# show rats that were mislabeled as rodents
rat_label_idx = label_to_idx['rat']
rodent_label_idx = label_to_idx['rodent']
rodent_false_positives = list(filter(lambda x: (x['pred'] == rodent_label_idx and x['actual'] == rat_label_idx), negatives))

rodent_false_positives
# rat_positives.sort(key=lambda x: x['scores'][label_idx])

print('Rats that were mislabeled as rodents')
paths = [fn['filepath'] for fn in rodent_false_positives]
plot_img_grid(paths, 8, 224)

# # Display images using the filepaths key
# def image_generator(entries): # use this for lazyloading (load images one at a time, instead of all at once, freeing up memory)
#     for idx, entry in enumerate(entries):
#         img = Image.open(entry['filepath'])
#         yield idx, img
# plt.figure(figsize=(35, 20))
# for idx, img in image_generator(rodent_false_positives):
#     plt.subplot(5, 5, idx + 1)
#     plt.imshow(img, aspect='auto')
#     plt.title(idx)
#     plt.axis('off')
# plt.tight_layout()
# plt.show()

In [None]:
# show least confident true-positives for a given class
category = 'bird'
label_idx = label_to_idx[category]
bird_positives = list(filter(lambda x: x['actual'] == label_idx, positives))
bird_positives.sort(key=lambda x: x['scores'][label_idx])

print('Positive Birds w/ lowest bird scores')
paths = [fn['filepath'] for fn in bird_positives[0:24]]
plot_img_grid(paths, 8, 224)

In [None]:
print_top_two_labels(bird_positives[0:24])

In [None]:
# show all of LEAST confident true-positives for a given class
category = 'scrub jay'
label_idx = label_to_idx[category]
sj_positives = list(filter(lambda x: x['actual'] == label_idx, positives))
sj_positives.sort(key=lambda x: x['scores'][label_idx])

print('Positive scrub jays w/ lowest scrub jay scores')
paths = [fn['filepath'] for fn in sj_positives[0:24]]
plot_img_grid(paths, 8, 224)

In [None]:
print_top_two_labels(sj_positives[0:24])

In [None]:
# show all of MOST confident true-positives for a given class
category = 'scrub jay'
label_idx = label_to_idx[category]
sj_true_positives = list(filter(lambda x: x['actual'] == label_idx, positives))
sj_true_positives.sort(key=lambda x: x['scores'][label_idx], reverse=True)

print('Positive scrub jays w/ lowest scrub jay scores')
paths = [fn['filepath'] for fn in sj_true_positives[0:24]]
plot_img_grid(paths, 8, 224)

In [None]:
# show high confience rat false positives
category = 'rat'
label_idx = label_to_idx[category]
rat_false_positives = list(filter(lambda x: x['pred'] == label_idx, negatives))
rat_false_positives.sort(
    key=lambda x: x['scores'][label_idx],
    reverse=True # most confient first
  )

print('Rat false positives')
paths = [fn['filepath'] for fn in rat_false_positives[0:100]]
plot_img_grid(paths, 8, 224)

In [None]:
rat_false_positives_micronesia = []
for rat_fp in rat_false_positives:
    if 'micronesia' in rat_fp['filepath']:
        rat_false_positives_micronesia.append(rat_fp)

for fp in rat_false_positives_micronesia:
    print(fp['filepath'])

In [None]:
print_top_two_labels(rat_false_positives[0:24])

In [None]:
# Print most confident rat predictions that weren't rats

### Exploring suspicious lack of rodent/rat mislabeling

In [None]:
cct_path_train = os.path.join(processed_data_root, experiment, f'train_cct.json')
cct_train = json.load(open(cct_path_train, 'r'))

cct_path_val = os.path.join(processed_data_root, experiment, f'val_cct.json')
cct_val = json.load(open(cct_path_val, 'r'))

label_to_id = {cat['name']: cat['id'] for cat in cct_train['categories']}

In [None]:
image_id_to_image_map_train = {img['id']: img for img in cct_train['images']}
image_id_to_image_map_val = {img['id']: img for img in cct_val['images']}

In [None]:

rat_id = label_to_id['rat']
rats_train = list(filter(lambda x: x['category_id'] == rat_id, cct_train['annotations']))
print(f'No. of rats in training: {len(rats_train)}')

rats_val = list(filter(lambda x: x['category_id'] == rat_id, cct_val['annotations']))
print(f'No. of rats in val: {len(rats_val)}')

## Rats from train

In [None]:
paths = []
for r in random.sample(rats_train, 500):
    path = os.path.join(processed_data_root, experiment, 'crops', image_id_to_image_map_train[r['image_id']]['file_name'])
    paths.append(path)
plot_img_grid(paths, 8, 224)

## Rats from val

In [None]:
paths = []
for r in random.sample(rats_val, 500):
    path = os.path.join(processed_data_root, experiment, 'crops', image_id_to_image_map_val[r['image_id']]['file_name'])
    paths.append(path)
plot_img_grid(paths, 8, 224)

In [None]:
anno_record = rats_train[11000]
print(anno_record)
image_record = image_id_to_image_map_train[anno_record['image_id']]
print(image_record)
path = os.path.join(raw_data_root, 'images', image_record['orig_file_name'])
print(path)

In [None]:
# find all rat detections at 'location': 'micronesia/cam06'
rats_at_micronesia_cam06 = []
for r_anno in rats_train:
    # find location
    img = image_id_to_image_map_train[r_anno['image_id']]
    if img['location'] == 'micronesia/cam06':
        rats_at_micronesia_cam06.append(r_anno)

In [None]:
len(rats_at_micronesia_cam06)

# Rodents

In [None]:
rodent_id = label_to_id['rodent']
rodents_train = list(filter(lambda x: x['category_id'] == rodent_id, cct_train['annotations']))
print(f'No. of rodents in train: {len(rodents_train)}')

rodents_val = list(filter(lambda x: x['category_id'] == rodent_id, cct_val['annotations']))
print(f'No. of rodents in val: {len(rodents_val)}')

## Rodents from training split

In [None]:
paths = []
for c in random.sample(rodents_train, 500):
    path = os.path.join(processed_data_root, experiment, 'crops', image_id_to_image_map_train[c['image_id']]['file_name'])
    paths.append(path)
plot_img_grid(paths)

## Rodents from val

## Rodents from val

In [None]:
paths = []
for c in random.sample(rodents_val, 500):
    path = os.path.join(processed_data_root, experiment, 'crops', image_id_to_image_map_val[c['image_id']]['file_name'])
    paths.append(path)
plot_img_grid(paths)

In [None]:
cat_id = label_to_id['cat']
cats_train = list(filter(lambda x: x['category_id'] == cat_id, cct_train['annotations']))
len(cats_train)

In [None]:
paths = []
for c in cats_train[:500]:
    path = os.path.join(processed_data_root, 'crops', image_id_to_image_map[c['image_id']]['file_name'])
    paths.append(path)
plot_img_grid(paths, 8, 224)

In [None]:
# find locations
rat_locs_train = []
for rat in rats_train:
    # look up image
    img = image_id_to_image_map[rat['image_id']]
    if img['location'] in rat_locs_train:
        continue
    rat_locs_train.append(img['location'])

In [None]:
rat_locs_train

In [None]:
len(rat_locs_train)