In [34]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
import shutil 
import glob
import json
import re
import cv2
from PIL import Image
from sklearn.metrics import mean_absolute_percentage_error
import seaborn as sns
%matplotlib inline

In [35]:
ROOT = '../'
IMG_DIR = os.path.join(ROOT, 'species_labelling', 'export_annotated_data')
CODE_DIR = os.path.join(ROOT, "code")

## Load Ground Truth

In [105]:
def load_ground_truth(foldername=os.path.join(ROOT,"data/") , filename="test_labels4-1.csv"): 

    ground_truth = pd.read_csv(foldername + filename)
    
    return ground_truth

In [106]:
ground_truth = load_ground_truth()
ground_truth.head()

Unnamed: 0.1,Unnamed: 0,TRIGGER_ID,CLASS_SPECIES,Total,CLASS_SPECIES_RESTATED
0,0,SSWI000000002741773,Porcupine,1,other
1,1,SSWI000000004032002,"Fox, Gray",1,foxgray_foxred
2,2,SSWI000000004068822,"Fox, Gray",1,foxgray_foxred
3,3,SSWI000000004248668,Raccoon,1,raccoon
4,4,SSWI000000004277676,"Fox, Gray",1,foxgray_foxred


In [109]:
def load_megadetector_output(foldername="results/JSON_txt_outputs/", filename='phase2_megadetector_classifications_yolosplits_4-1_YOLO.json'):#filename="phase2_megadetector_output_YOLO.json"): 
    """
    Pkg dependencies: os, glob, re, pandas
    Purpose: 
    Inputs: 
    Outputs: 
    """

    with open(os.path.join(ROOT,foldername, filename), 'r') as fin: 
        fobj = fin.read()
        megadetector = json.loads(fobj)

    event_list = []
    img_list = []
    detection_list = []

    for event, image_set in megadetector['phase2_classification_results'].items():
        for image in image_set:
            event_list.append(image['event_id'])
            img_list.append(image['img_id'])
            detection_list.append(image['detections'])

    megadetector_df = pd.DataFrame({'event_id': event_list,
                  'image_id':img_list, 
                  'detections':detection_list})

    def extract_yolo(list_of_detections):
        yolo_list = []

        for i in list_of_detections:
            yolo_list.append(i['bbox'])
        return yolo_list

    megadetector_df['yolo'] = megadetector_df['detections'].apply(lambda x: extract_yolo(x))
    megadetector_df['count'] = megadetector_df['yolo'].apply(lambda x: len(x))

    def extract_conf(list_of_detections):
        conf_list = []

        for i in list_of_detections:
            conf_list.append(i['conf'])
        return conf_list

    megadetector_df['all_conf'] = megadetector_df['detections'].apply(lambda x: extract_conf(x))
    megadetector_df['max_detection_conf'] = megadetector_df['all_conf'].apply(lambda x:  max(x) if len(x) > 0 else 0)
    megadetector_df['all_class_pred'] = megadetector_df['count'].apply(lambda x:[1]*x)
    
    
    return megadetector_df

  

In [110]:
megadetector = load_megadetector_output()

In [99]:
def split_and_convert(s):
    """
    Purpose: Utility function used in load_yolo_output function for bounding box.  
    """
    new = []
    out = s.split(',')
    for i in out: 
        new.append(round(float(i), 4))
    return new
    
def load_yolo_output(foldername="results/JSON_txt_outputs/", filename="phase2_yolo_yolosplits4_1.txt"):
    """
    Pkg dependencies: os, glob, re, pandas
    Purpose: 
    Inputs: 
    Outputs: 
    
    """
 


    # Load yolo model output file 
    with open(os.path.join(ROOT, foldername, filename), 'r') as fin: 
        yolov5 = fin.readlines()

    # Parse through file and pick out filename and bounding box
    filenames = []
    bbox = []
    for line_num, line in enumerate(yolov5):
        newline = line.split("\n")[0]
        semicolon_idxs = [m.start() for m in re.finditer(";", newline)]
        bbox_start, bbox_end = re.search(r"Bbox\[list]:", newline).start(), re.search(r"Bbox\[list]:", newline).end()

        for i, idx in list(zip(range(0,len(semicolon_idxs)), semicolon_idxs)): 
            # Filename
            if i == 0:
                filenames.append(newline[:idx].split("Filename:")[1].lstrip()[:-4])

        # Yolo Bounding box
        bbox_data = newline[bbox_end:].lstrip().split(';')[:-1]
        if len(bbox_data) == 0:
            bbox.append([])
        else: 
            subl = [split_and_convert(i) for i in bbox_data]
            bbox.append(subl)

    # Construct DataFrame
    yolov5 = pd.DataFrame([pd.Series(filenames), pd.Series(bbox)]).T
    yolov5.columns = ["image_id", "yolo_bbox"]
    yolov5.sort_values(by="image_id", inplace=True, ignore_index=True)
    yolov5['yolo_count'] = yolov5['yolo_bbox'].apply(lambda x: len(x))

    return yolov5
        


In [100]:
yolov5 = load_yolo_output()
yolov5.head()

Unnamed: 0,image_id,yolo_bbox,yolo_count
0,2008329_0A.,"[[0.5517, 0.3845, 0.0638, 0.1064]]",1
1,2008329_1B.,"[[0.5532, 0.3815, 0.0547, 0.0881]]",1
2,2008329_2C.,[],0
3,2009625_0A.,[],0
4,2009625_1B.,[],0


## Merge YOLO, Megadetector and ground truth dataframes

In [112]:
def merge_all(yolo_df, megadetector_df, ground_truth_df): 
    """
    Pkg dependencies: pandas 
    Purpose: 
    Inputs: YOLO pd.DataFrame, Megadetector pd.DataFrame, ground truth pd.DataFrame
    Outputs: Merged pd.DataFrame of YOLO, Megadetector and ground truth
    """
    
    # Merge all - The image id will repeat 3 times
    final_raw = megadetector_df.merge(yolo_df, left_on="image_id", right_on="image_id")
    final_raw.loc[:, 'event_id'] = final_raw['image_id'].apply(lambda x: x[:-1])
    final_raw = ground_truth_df.merge(final_raw, left_on="TRIGGER_ID", right_on="event_id")
    final_raw.rename(columns={'count':'md_count', 'Total':'ground_truth_count', 'yolo':'md_bbox', 
                              'all_class_pred':'md_all_class_pred', 'all_conf':'md_all_conf', 'max_detection_conf': 'md_max_detection_conf'}, inplace=True)
#     final_raw.drop(columns=['TRIGGER_ID'], inplace=True)
    final_raw.sort_values(by="image_id").reset_index(drop=True)
    final_raw = final_raw[['image_id','CLASS_SPECIES','CLASS_SPECIES_RESTATED','md_all_class_pred','md_all_conf','md_max_detection_conf', \
               'md_bbox','yolo_bbox','ground_truth_count', 'md_count','yolo_count']]
    
    # Merge megadetector to YOLO by "image_id"
    final = megadetector_df.merge(yolo_df, left_on="image_id", right_on="image_id")
    final.loc[:, 'image_id'] = final['image_id'].apply(lambda x: x[:-1])
    final.sort_values(by="image_id").reset_index(drop=True)
    
    
    # Group by imageid (there should be 3), take the max count across the imageid that compose the event
    gby_imageid = final.groupby(by='image_id').agg(['max'])
    counts_md = gby_imageid['count']
    counts_yolo = gby_imageid['yolo_count']
    
    # Merge ground truth to megadetector
    merged_md = ground_truth_df.merge(counts_md, left_on="TRIGGER_ID", right_on="image_id")
    merged_md.rename(columns={'max':'md_count'}, inplace=True)
    # Merge ground truth to yolo
    merged_yolo = ground_truth_df.merge(counts_yolo, left_on="TRIGGER_ID", right_on="image_id")
    merged_yolo.rename(columns={'max':'yolo_count'}, inplace=True)
    
    # Merge everything 
    merged_final = merged_yolo[['TRIGGER_ID', "CLASS_SPECIES", "Total", "CLASS_SPECIES_RESTATED", 'yolo_count']].merge(merged_md[['TRIGGER_ID','md_count']], left_on="TRIGGER_ID", right_on="TRIGGER_ID")
    merged_final = merged_final[['TRIGGER_ID', 'CLASS_SPECIES', "CLASS_SPECIES_RESTATED", "Total", "yolo_count", "md_count"]]
    merged_final.rename(columns={"Total":"ground_truth_count"}, inplace=True)
    merged_final.sort_values(by="TRIGGER_ID", inplace=True)
    
    # Differences across each of Choose 2 of 3
    merged_final['md_gt_diff'] = merged_final['md_count'] - merged_final['ground_truth_count']
    merged_final['yolo_gt_diff'] = merged_final['yolo_count'] - merged_final['ground_truth_count']
    merged_final['md_yolo_diff'] = merged_final['md_count'] - merged_final['yolo_count']
    
    return final_raw, merged_final

# Duplicates
# print(merged_final[merged_final.duplicated()].shape)

# ground_truth[ground_truth.duplicated()]
# ground_truth[ground_truth.TRIGGER_ID.duplicated(keep=False)].shape
# merged_final['Total'].sum()
# ground_truth[ground_truth.TRIGGER_ID.duplicated(keep=False)]

In [113]:
final_raw, merged_final = merge_all(yolov5, megadetector, ground_truth)

final_raw.head(40)
merged_final.head()

Unnamed: 0,TRIGGER_ID,CLASS_SPECIES,CLASS_SPECIES_RESTATED,ground_truth_count,yolo_count,md_count,md_gt_diff,yolo_gt_diff,md_yolo_diff
