In [2]:
#pip install -r https://raw.githubusercontent.com/ultralytics/yolov5/master/requirements.txt

In [1]:
import cv2
import torch
from PIL import Image
import os
import pandas as pd
import numpy as np
from datetime import datetime

import argparse
import sys
import time
from pathlib import Path

import cv2
import json

In [2]:
pd.set_option('display.max_columns', None)

In [3]:
img_directory = '/Users/sleung2/Documents/MIDS Program/Capstone_local/snapshot_wisconsin/all/yolo_splits4.2/test/images/'


## Labels

In [66]:
#Stage 1
stage_1_labels = pd.DataFrame(['animal', 'blank']).sort_values(0)
stage_1_labels = stage_1_labels.rename(columns = {0: 'species'})
stage_1_labels.insert(0, 'label', range(0, len(stage_1_labels)))

#Stage 2 Yolo
stage_2_yolo_labels = pd.DataFrame(['foxgray_foxred',
              'cottontail_snowshoehare',
              'raccoon',
              'opossum',
              'turkey',
              'bear',
              'elk',
              'deer',
              'coyote',
              'wolf']).sort_values(0)
stage_2_yolo_labels = stage_2_yolo_labels.rename(columns = {0: 'species'})
stage_2_yolo_labels.insert(0, 'label', range(0, len(stage_2_yolo_labels)))

#Stage 2 Effnet (Add blank)
stage_2_effnet_labels = pd.DataFrame(['foxgray_foxred',
              'cottontail_snowshoehare',
              'raccoon',
              'opossum',
              'turkey',
              'bear',
              'elk',
              'deer',
              'coyote',
              'wolf',
                'blank']).sort_values(0)
stage_2_effnet_labels = stage_2_effnet_labels.rename(columns = {0: 'species'})
stage_2_effnet_labels.insert(0, 'label', range(0, len(stage_2_effnet_labels)))


## Helper Functions

In [5]:
def codes_to_labels(full_results_df, labels):
    
    label_dict = labels.set_index('label').to_dict()['species']
    full_results_df['species_name'] = full_results_df['class'].map(label_dict)
    full_results_df = full_results_df.fillna('')
    
    return full_results_df

def img_name_to_event(img):
    if '_' in img:
        event_name = img.split('_')[0]
    else:
        event_name = img.split('.')[0][:-1]
    
    return event_name

## Yolo Inference

In [2]:
def yolo_inference(img_directory, weights_path):

    # Model
    model = torch.hub.load('ultralytics/yolov5', 'custom', path=weights_path)

    #Images
    imgs = []
    img_names = []

    i = 1
    for img_name in os.listdir(img_directory):
        if i == 100:
            break
        img = cv2.imread(img_directory+img_name)[:, :, ::-1]
        imgs.append(img)
        img_names.append(img_name)
        i+=1
        
    print("Running inference on {} images".format(len(img_names)))
    # Inference
    results = model(imgs, size=329)  # includes NMS

    #Combine results from all images into single pandas df
    first = True
    for tensor,image_name in zip(results.xyxy, img_names):
        int_results_df = pd.DataFrame(np.array(tensor))

        int_results_df['image_name'] = image_name

        if first == True:
            full_results_df = int_results_df
            first = False
        else:
            full_results_df = pd.concat([full_results_df,
                                         int_results_df])
            
    full_results_df =full_results_df.set_axis(['xmin','ymin', 'xmax', 'ymax', 'conf', 'class', 'image_name'],
                                         axis = 1, inplace = False)
        
    #Blank images do not produce any results so we need to add blank rows wiht just the image names
    blank_imgs = [img for img in img_names if img not in list(full_results_df['image_name'])]
    blank_img_df = pd.DataFrame(columns = full_results_df.columns)
    blank_img_df['image_name'] = blank_imgs
    blank_img_df = blank_img_df.fillna('')

    full_results_df = pd.concat([full_results_df, blank_img_df])
        
    return full_results_df

def yolo_boxes_to_df(full_results_df):
    def convert_yolo_bbox(size, box):
        '''Convert result bbox format from xmin,xmax,ymin,ymax absolute values to 
        x,y,w,h relative values'''
        try:
            dw = 1./size[0]
            dh = 1./size[1]
            x = (box[0] + box[1])/2.0
            y = (box[2] + box[3])/2.0
            w = box[1] - box[0]
            h = box[3] - box[2]
            x = x*dw
            w = w*dw
            y = y*dh
            h = h*dh
            coord_string = '{},{},{},{}'.format(x,y,w,h)
        except:
            coord_string = ''
            
        
        return coord_string
    full_results_df['image_bbox'] = full_results_df.apply(lambda x: convert_yolo_bbox((329,329), [x['xmin'], x['xmax'], x['ymin'], x['ymax']]), axis = 1)

    return full_results_df


def yolo_spec_conf_bbox_formatting(full_results_df):
    image_group_id = []
    image_id_1 = []
    image_id_2 = []
    image_id_3 = []
    image_id_1_species_name = []
    image_id_2_species_name = []
    image_id_3_species_name = []
    image_id_1_conf = []
    image_id_2_conf = []
    image_id_3_conf = []
    image_id_1_bbox = []
    image_id_2_bbox = []
    image_id_3_bbox = []

    current_image = ''
    current_event = ''
    current_image_appendix = ''

    int_image_species = ''
    int_image_conf = ''
    int_image_bbox = ''

    i = 0

    for row, value in full_results_df.sort_values(by = 'image_name').iterrows():

        #Get current image and event names
        next_image = value['image_name']
        next_image_appendix = next_image.split('.')[0][-1]    
            
        next_event = img_name_to_event(next_image)

        if next_event != current_event:
            end_of_event = True
        if next_image != current_image:
            end_of_image = True

        if end_of_image == True:
            if i == 1 or i == 0:
                image_id_1.append(current_image_appendix)
                image_id_1_species_name.append(int_image_species)
                image_id_1_conf.append(int_image_conf)
                image_id_1_bbox.append(int_image_bbox)
            if i == 2:
                image_id_2.append(current_image_appendix)
                image_id_2_species_name.append(int_image_species)
                image_id_2_conf.append(int_image_conf)
                image_id_2_bbox.append(int_image_bbox)  

            if i == 3:
                image_id_3.append(current_image_appendix)
                image_id_3_species_name.append(int_image_species)
                image_id_3_conf.append(int_image_conf)
                image_id_3_bbox.append(int_image_bbox)  

            end_of_image = False
            i += 1

            int_image_species = ''
            int_image_conf = ''
            int_image_bbox = ''

        if end_of_event == True:
            if i == 2 or i ==1:
                image_id_2.append('')
                image_id_2_species_name.append('')
                image_id_2_conf.append('')
                image_id_2_bbox.append('') 

                image_id_3.append('')
                image_id_3_species_name.append('')
                image_id_3_conf.append('')
                image_id_3_bbox.append('') 

            elif i == 3:
                image_id_3.append('')
                image_id_3_species_name.append('')
                image_id_3_conf.append('')
                image_id_3_bbox.append('')  
            end_of_event = False
            i = 1

            image_group_id.append(current_event)

        #Setting new current values
        #If image has already registed a species, need a seperator between next entry
        if len(int_image_species) == 0:
            spec_conf_pre = ''
            bbox_pre = ''
        else:
            spec_conf_pre = ','
            bbox_pre = ';'  
        spec_to_add = spec_conf_pre + value['species_name']
        conf_to_add = spec_conf_pre + str(value['conf'])
        bbox_to_add = bbox_pre + value['image_bbox']

        int_image_species += spec_to_add
        int_image_conf += conf_to_add
        int_image_bbox += bbox_to_add


        current_image = next_image
        current_event = next_event
        current_image_appendix = next_image_appendix

    image_group_id.append(current_event)

    if i <= 3:
        image_id_3.append(current_image_appendix)
        image_id_3_species_name.append(int_image_species)
        image_id_3_conf.append(int_image_conf)
        image_id_3_bbox.append(int_image_bbox)  

    if i <= 2:
            image_id_2.append(current_image_appendix)
            image_id_2_species_name.append(int_image_species)
            image_id_2_conf.append(int_image_conf)
            image_id_2_bbox.append(int_image_bbox)  
    if i <= 3:
        image_id_1.append(current_image_appendix)
        image_id_1_species_name.append(int_image_species)
        image_id_1_conf.append(int_image_conf)
        image_id_1_bbox.append(int_image_bbox) 
        
    formatted_yolo = pd.DataFrame({'image_group_id':image_group_id, 
    'image_id_1':image_id_1, 
    'image_id_2':image_id_2, 
    'image_id_3':image_id_3, 
    'image_id_1_species_name': image_id_1_species_name, 
    'image_id_2_species_name':image_id_2_species_name, 
    'image_id_3_species_name' :image_id_3_species_name, 
    'image_id_1_conf': image_id_1_conf,
    'image_id_2_conf': image_id_2_conf,
    'image_id_3_conf': image_id_3_conf, 
    'image_id_1_bbox': image_id_1_bbox,
    'image_id_2_bbox': image_id_2_bbox, 
    'image_id_3_bbox': image_id_3_bbox})

    formatted_yolo = formatted_yolo[1:]
    
    return formatted_yolo

def yolo_count_blank_detect_formatting(formatted_yolo, model_id):
    def count_species(species_string):
        species_list = species_string.split(',')
        if species_list[0] == '':
            return 0
        else:
            return len(species_list)

    for image in range(1,4):
        formatted_yolo['image_id_{}_count'.format(image)] = formatted_yolo['image_id_{}_species_name'.format(image)].apply\
                                                        (lambda x:count_species(x))
        
    for image in range(1,4):
        formatted_yolo['image_id_{}_blank'.format(image)] = formatted_yolo['image_id_{}_species_name'.format(image)].apply\
                                                            (lambda x:True if x == '' else False)
        
    #Load date
    now = datetime.now() # current date and time
    date_string = now.strftime("%m/%d/%Y")
    formatted_yolo['load_date '] = date_string
    
    #Model ID
    formatted_yolo['model_id'] = model_id
        
    return formatted_yolo

    

## Blur

In [3]:
def variance_of_laplacian(image):
    # compute the Laplacian of the image and then return the focus
    # measure, which is simply the variance of the Laplacian
    return cv2.Laplacian(image, cv2.CV_64F).var()

d = {'img_id':[], 'blurry':[], 'blurry_index':[]}


def blur_processing(img_directory, formatted_yolo, run_blur):
    # loop over the input images
    if run_blur == True:
        for img in os.listdir(img_directory):
            # load the image, convert it to grayscale, and compute the
            # focus measure of the image using the Variance of Laplacian
            # method
            image = cv2.imread(img_directory + img)
            filename = img

            gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
            fm = variance_of_laplacian(gray)
            threshold = 100
            text = "Not Blurry"
            # if the focus measure is less than the supplied threshold,
            # then the image should be considered "blurry"
            if fm < threshold:
                text = "Blurry"
                d['img_id'].append(filename)
                d['blurry'].append(True)
                d['blurry_index'].append(fm)

            else: 
                d['img_id'].append(filename)
                d['blurry'].append(False)
                d['blurry_index'].append(fm)

        blur_df = pd.DataFrame(d)

        blur_df['image_group_id'] = blur_df['img_id'].apply(lambda x: img_name_to_event(x))
        blur_df['img_appendix'] = blur_df['img_id'].apply(lambda x: x.split('.')[0][-1])  

        for i in range(1,4):
            formatted_yolo = pd.merge(formatted_yolo, blur_df[['image_group_id', 'img_appendix', 'blurry']],
                     how = 'left',
                     left_on = ['image_group_id', 'image_id_{}'.format(i)],
                     right_on = ['image_group_id', 'img_appendix'])

            formatted_yolo['image_id_{}_detectable'.format(i)] = formatted_yolo['blurry']
            formatted_yolo = formatted_yolo.drop(columns = ['img_appendix', 'blurry'])
    
    else:
        for i in range(1,4):
            formatted_yolo['image_id_{}_detectable'.format(i)] = False
    
    return formatted_yolo



In [53]:
yolo_weights_species_path = 'yolov5s_best_serengeti_splits4.pt'
yolo_weights_blank_path = 'yolov5l_best_blank.pt'

def yolo_inference_and_formatting(img_directory, weight, labels, model_id, run_blur = True):
    full_results_df = yolo_inference(img_directory, weight)
    full_results_df = yolo_boxes_to_df(full_results_df)
    full_results_df = codes_to_labels(full_results_df, labels)

    formatted_yolo = yolo_spec_conf_bbox_formatting(full_results_df)
    formatted_yolo = yolo_count_blank_detect_formatting(formatted_yolo, model_id)

    formatted_yolo = blur_processing(img_directory, formatted_yolo, run_blur)
    
    return formatted_yolo

formatted_yolo_blank = yolo_inference_and_formatting(img_directory, yolo_weights_blank_path, stage_1_labels, 1,
                                                    run_blur = False)
formatted_yolo_species = yolo_inference_and_formatting(img_directory, yolo_weights_species_path, stage_2_labels, 3)

Using cache found in /Users/sleung2/.cache/torch/hub/ultralytics_yolov5_master
YOLOv5 🚀 2021-11-27 torch 1.10.0 CPU

Fusing layers... 
Model Summary: 392 layers, 46600566 parameters, 0 gradients
Adding AutoShape... 


Running inference on 99 images


Using cache found in /Users/sleung2/.cache/torch/hub/ultralytics_yolov5_master
YOLOv5 🚀 2021-11-27 torch 1.10.0 CPU

Fusing layers... 
Model Summary: 224 layers, 7078183 parameters, 0 gradients
Adding AutoShape... 


Running inference on 99 images




## EfficientNet

In [68]:
def get_speciesname_from_id(id, labels):
    
#     if phase == 'phase1':
#         speciesList = ['animal', 'blank']
#     else:
#         speciesList =  ['bear', 'blank', 'cottontail_snowshoehare', 'coyote', 'deer', 'elk', 'foxgray_foxred', 'opossum', 'raccoon', 'turkey', 'wolf']
        
    speciesList = list(labels['species'])
    idx = int(id)
    if idx > 10 or idx < 0:
        speciesName = 'other'
    else:
        speciesName = speciesList[idx]
    return speciesName

In [73]:
get_speciesname_from_id(1, stage_2_effnet_labels)

'blank'

In [29]:
def load_effnet_json(output_json, phase = 'phase2'):
    effnet_json = {}

    with open(output_json) as json_file:
        data = json.load(json_file)

    data = data['{}_classification_results'.format(phase)]
    for dict_list in data:
        value = dict_list
        newKey = value['id']
        effnet_json[newKey] = {}
        class_list = ""
        conf_list = ""

        for key2, value2 in value['conf_dict'].items():
            class_list += get_speciesname_from_id(key2, phase) + ","
            conf_list += str(value2[0]) + ","

        effnet_json[newKey]['Class'] = class_list[:-1]
        effnet_json[newKey]['Conf'] = conf_list[:-1]

        # print(effnet_json)
    return effnet_json


In [53]:
def format_effnet(effnet_dict, model_id):
    '''Convert effnet dict to pandas df'''
    event_list = []
    image_list = []
    class_list = []
    conf_list = []

    for key, value in effnet_dict.items():
        event_list.append(img_name_to_event(key))
        image_list.append(key)
        class_list.append(value['Class'])
        conf_list.append(value['Conf'])

    effnet_int_df = pd.DataFrame({'event':event_list,
                 'image':image_list,
                 'class': class_list,
                 'conf': conf_list}).sort_values(by='image')

    image_group_id = []
    image_id_1 = []
    image_id_2 = []
    image_id_3 = []
    image_id_1_species_name = []
    image_id_2_species_name = []
    image_id_3_species_name = []
    image_id_1_conf = []
    image_id_2_conf = []
    image_id_3_conf = []
    image_id_1_bbox = []
    image_id_2_bbox = []
    image_id_3_bbox = []

    for event in effnet_int_df['event'].unique():
        image_group_id.append(event)
        event_effnet = effnet_int_df[effnet_int_df['event'] == event]

        i = 1
        for row, values in event_effnet.iterrows():
            image_appendix = values['image'].split('.')[0][-1] 
            if model_id == 2:              
                class_for_row = values['class'].split(',')[0]
                conf_for_row = values['conf'].split(',')[0]
            else:
                class_for_row = values['class']
                conf_for_row = values['conf']
                
            if i ==1:
                image_id_1.append(image_appendix)
                image_id_1_species_name.append(class_for_row)
                image_id_1_conf.append(conf_for_row)
            elif i == 2:
                image_id_2.append(image_appendix)
                image_id_2_species_name.append(class_for_row)
                image_id_2_conf.append(conf_for_row)    
            elif i == 3:
                image_id_3.append(image_appendix)
                image_id_3_species_name.append(class_for_row)
                image_id_3_conf.append(conf_for_row)             
            i+=1

        if i == 3:
            image_id_3.append('')
            image_id_3_species_name.append('')
            image_id_3_conf.append('') 
        if i == 2:
            image_id_2.append('')
            image_id_2_species_name.append('')
            image_id_2_conf.append('') 

            image_id_3.append('')
            image_id_3_species_name.append('')
            image_id_3_conf.append('') 

    formatted_effnet = pd.DataFrame({'image_group_id':image_group_id, 
        'image_id_1':image_id_1, 
        'image_id_2':image_id_2, 
        'image_id_3':image_id_3, 
        'image_id_1_species_name': image_id_1_species_name, 
        'image_id_2_species_name':image_id_2_species_name, 
        'image_id_3_species_name' :image_id_3_species_name, 
        'image_id_1_conf': image_id_1_conf,
        'image_id_2_conf': image_id_2_conf,
        'image_id_3_conf': image_id_3_conf})
    
    
    for image in range(1,4):
        formatted_effnet['image_id_{}_count'.format(image)] = 0
   
    for image in range(1,4):
        formatted_effnet['image_id_{}_bbox'.format(image)] = ''
        

        
    for image in range(1,4):
        if model_id == 2: 
            formatted_effnet['image_id_{}_blank'.format(image)] = formatted_effnet['image_id_{}_species_name'.format(image)].apply\
                                                            (lambda x:True if x == 'blank' else False)        
        else:
            formatted_effnet['image_id_{}_blank'.format(image)] = formatted_effnet['image_id_{}_species_name'.format(image)].apply\
                                                            (lambda x:True if x == '' else False)

    for image in range(1,4):
        formatted_effnet['image_id_{}_detectable'.format(image)] = False
        
    formatted_effnet['model_id'] = model_id
    
    return formatted_effnet

In [54]:
stage2_output_json = '../results/JSON_txt_outputs/phase2_efficientnetb5_yolo_splits4-1_classifications_basePlusblanks.json'

stage2_effnet_dict = load_effnet_json(stage2_output_json, 'phase2')
stage2_formatted_effnet = format_effnet(stage2_effnet_dict, 4)


In [55]:
stage1_output_json = '../results/JSON_txt_outputs/phase1_efficientnetb0_classifications_yolosplits_4-1.json'

stage1_effnet_dict = load_effnet_json(stage1_output_json, 'phase1')
stage1_formatted_effnet = format_effnet(stage1_effnet_dict, 2)

In [56]:
stage1_formatted_effnet

Unnamed: 0,image_group_id,image_id_1,image_id_2,image_id_3,image_id_1_species_name,image_id_2_species_name,image_id_3_species_name,image_id_1_conf,image_id_2_conf,image_id_3_conf,image_id_1_count,image_id_2_count,image_id_3_count,image_id_1_bbox,image_id_2_bbox,image_id_3_bbox,image_id_1_blank,image_id_2_blank,image_id_3_blank,image_id_1_detectable,image_id_2_detectable,image_id_3_detectable,model_id
0,2008329,A,B,C,blank,blank,blank,0.9870110750198364,0.9590360522270203,0.9834611415863037,0,0,0,,,,True,True,True,False,False,False,2
1,2009625,A,B,C,animal,animal,animal,0.9721313714981079,0.9863938689231873,0.983247697353363,0,0,0,,,,False,False,False,False,False,False,2
2,2010025,A,B,C,blank,blank,blank,0.9983174800872803,0.9915269017219543,0.996371865272522,0,0,0,,,,True,True,True,False,False,False,2
3,2010778,A,B,C,blank,blank,blank,0.9938222169876099,0.9642019271850586,0.9773263335227966,0,0,0,,,,True,True,True,False,False,False,2
4,2010824,A,B,C,blank,animal,blank,0.9365163445472717,0.5628920793533325,0.523880660533905,0,0,0,,,,True,False,True,False,False,False,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1649,SSWI000000023457314,A,B,C,animal,animal,animal,0.9852554798126221,0.8180868625640869,0.8387702703475952,0,0,0,,,,False,False,False,False,False,False,2
1650,SSWI000000023457319,A,B,C,animal,animal,animal,0.9991550445556641,0.9956256151199341,0.9907820820808411,0,0,0,,,,False,False,False,False,False,False,2
1651,SSWI000000023494506,A,B,C,animal,animal,animal,0.9998522996902466,0.9999915361404419,0.668764591217041,0,0,0,,,,False,False,False,False,False,False,2
1652,SSWI000000023514111,A,B,C,animal,animal,animal,0.9979859590530396,0.9920384883880615,0.9798725843429565,0,0,0,,,,False,False,False,False,False,False,2


## Megadetector

In [35]:
def load_megad_json(output_json):
    megad_json = {}

    with open(output_json) as json_file:
        data = json.load(json_file)
    
    data = data['phase2_classification_results']
    
    # key, value in image_id is in the format: '0': "SSWI000000020143548C.jpg"
    for key, value in data.items():
        for dict_list in value:
            key = dict_list['img_id']
            megad_json[key] = {}
            megad_json[key]['Count'] = len(dict_list['detections'])
            coord_list = ""
            conf_list = ""
            for bbox_conf in dict_list['detections']:
                # coords has a list of 4 elements
                bbox = ','.join([str(item) for item in bbox_conf['bbox']]) + ";"
                coord_list += bbox
                conf_list += str(bbox_conf['conf']) + ";"
            if (len(dict_list['detections']) == 0):
                megad_json[key]['Coords'] = ""
                megad_json[key]['Conf'] = ""
            else:
                megad_json[key]['Coords'] = coord_list[:-1]
                megad_json[key]['Conf'] = conf_list[:-1]

    # print(megad_json)
    return megad_json

In [51]:
stage2_megad_dict

{'SSWI000000019807656A.jpg': {'Count': 10,
  'Coords': '0.5436,0.4957,0.8986,0.9914;0.8767,0.3685,0.2434,0.4795;0.9324,0.3239,0.1336,0.3718;0.8736,0.4589,0.2172,0.3029;0.8221,0.4584,0.1445,0.3046;0.7076,0.4479,0.5762,0.8533;0.9195,0.327,0.1453,0.5381;0.8333,0.3553,0.3219,0.596;0.7917,0.5032,0.06895,0.2138;0.8215,0.4836,0.08764,0.2589',
  'Conf': '0.999;0.627;0.467;0.334;0.222;0.18;0.163;0.128;0.126;0.118'},
 'SSWI000000019807656B.jpg': {'Count': 4,
  'Coords': '0.8367,0.527,0.3227,0.8099;0.2321,0.2798,0.156,0.09842;0.1977,0.2784,0.1143,0.09395;0.2056,0.2801,0.07686,0.08366',
  'Conf': '0.999;0.833;0.573;0.156'},
 'SSWI000000019807656C.jpg': {'Count': 5,
  'Coords': '0.8674,0.3758,0.2631,0.4973;0.2041,0.2784,0.09341,0.09285;0.248,0.2791,0.2051,0.1073;0.2302,0.2815,0.1332,0.09;0.2581,0.2862,0.1365,0.07968',
  'Conf': '0.999;0.857;0.517;0.345;0.17'},
 'SSWI000000020162423B.jpg': {'Count': 1,
  'Coords': '0.9051,0.7641,0.1866,0.4715',
  'Conf': '0.999'},
 'SSWI000000020162423A.jpg': {'Coun

In [58]:
def format_megad(megad_dict, model_id):
    '''Convert effnet dict to pandas df'''
    event_list = []
    image_list = []
    count_list = []
    bbox_list = []
    conf_list = []

    for key, value in megad_dict.items():
        event_list.append(img_name_to_event(key))
        image_list.append(key)
        count_list.append(value['Count'])
        bbox_list.append(value['Coords'])
        conf_list.append(value['Conf'])

    effnet_int_df = pd.DataFrame({'event':event_list,
                 'image':image_list,
                 'count': count_list,
                  'bbox':  bbox_list,              
                 'conf': conf_list}).sort_values(by='image')

    image_group_id = []
    image_id_1 = []
    image_id_2 = []
    image_id_3 = []
    image_id_1_count = []
    image_id_2_count = []
    image_id_3_count = []
    image_id_1_bbox = []
    image_id_2_bbox = []
    image_id_3_bbox = []
    image_id_1_conf = []
    image_id_2_conf = []
    image_id_3_conf = []
    
    for event in effnet_int_df['event'].unique():
        image_group_id.append(event)
        event_effnet = effnet_int_df[effnet_int_df['event'] == event]

        i = 1
        for row, values in event_effnet.iterrows():
            image_appendix = values['image'].split('.')[0][-1] 

            count_for_row = values['count']
            bbox_for_row = values['bbox']
            conf_for_row = values['conf']
                
            if i ==1:
                image_id_1.append(image_appendix)
                image_id_1_count.append(count_for_row)
                image_id_1_bbox.append(bbox_for_row)
                image_id_1_conf.append(conf_for_row)
            elif i == 2:
                image_id_2.append(image_appendix)
                image_id_2_count.append(count_for_row)
                image_id_2_bbox.append(bbox_for_row)
                image_id_2_conf.append(conf_for_row)    
            elif i == 3:
                image_id_3.append(image_appendix)
                image_id_3_count.append(count_for_row)
                image_id_3_bbox.append(bbox_for_row)
                image_id_3_conf.append(conf_for_row)             
            i+=1

        if i == 3:
            image_id_3.append('')
            image_id_3_count.append('')
            image_id_3_bbox.append('')
            image_id_3_conf.append('')  
        if i == 2:
            image_id_2.append('')
            image_id_2_count.append('')
            image_id_2_bbox.append('')
            image_id_2_conf.append('')  

            image_id_3.append('')
            image_id_3_count.append('')
            image_id_3_bbox.append('')
            image_id_3_conf.append('')  

    formatted_megad = pd.DataFrame({'image_group_id':image_group_id, 
        'image_id_1':image_id_1, 
        'image_id_2':image_id_2, 
        'image_id_3':image_id_3, 
        'image_id_1_count': image_id_1_count, 
        'image_id_2_count':image_id_2_count, 
        'image_id_3_count':image_id_3_count, 
        'image_id_1_conf': image_id_1_conf,
        'image_id_2_conf': image_id_2_conf,
        'image_id_3_conf': image_id_3_conf,
        'image_id_1_bbox': image_id_1_bbox,
        'image_id_2_bbox': image_id_2_bbox,
        'image_id_3_bbox': image_id_3_bbox})
    
    
    for image in range(1,4):
        formatted_megad['image_id_{}_species'.format(image)] = ''
        
    for image in range(1,4):
        formatted_megad['image_id_{}_blank'.format(image)] = formatted_megad['image_id_{}_count'.format(image)].apply\
                                                            (lambda x:True if x == 0 else False)

    for image in range(1,4):
        formatted_megad['image_id_{}_detectable'.format(image)] = False
        
    formatted_megad['model_id'] = model_id
    
    return formatted_megad

In [59]:
stage2_megad_output_json = '../results/JSON_txt_outputs/phase2_megadetector_classifications_yolosplits_4-1_YOLO.json'

stage2_megad_dict = load_megad_json(stage2_megad_output_json)
formatted_megad = format_megad(stage2_megad_dict, 5)



In [55]:
all_models_df = pd.concat([formatted_yolo_blank, formatted_yolo_species])

In [34]:
stage2_formatted_effnet

Unnamed: 0,image_group_id,image_id_1,image_id_2,image_id_3,image_id_1_species_name,image_id_2_species_name,image_id_3_species_name,image_id_1_conf,image_id_2_conf,image_id_3_conf,image_id_1_count,image_id_2_count,image_id_3_count,image_id_1_bbox,image_id_2_bbox,image_id_3_bbox,image_id_1_blank,image_id_2_blank,image_id_3_blank,image_id_1_detectable,image_id_2_detectable,image_id_3_detectable,model_id
0,2008329,A,B,C,"deer,blank,bear,turkey,foxgray_foxred","deer,blank,turkey,bear,foxgray_foxred","deer,blank,bear,turkey,foxgray_foxred","0.8773717284202576,0.1166832223534584,0.002613...","0.697812020778656,0.2494659423828125,0.0286425...","0.9052568674087524,0.08971214294433594,0.00292...",0,0,0,0,0,0,False,False,False,False,False,False,4
1,2009625,A,B,C,"deer,coyote,wolf,elk,bear","deer,bear,wolf,blank,coyote","deer,bear,wolf,elk,coyote","0.8681831359863281,0.06680141389369965,0.01947...","0.806126594543457,0.08366353064775467,0.031845...","0.9082666039466858,0.03075997903943062,0.01432...",0,0,0,0,0,0,False,False,False,False,False,False,4
2,2010025,A,B,C,"blank,deer,turkey,coyote,bear","blank,deer,turkey,coyote,bear","blank,deer,turkey,bear,coyote","0.8910685181617737,0.06277047842741013,0.02514...","0.8242872357368469,0.12111835181713104,0.03019...","0.5800703763961792,0.34787309169769287,0.03111...",0,0,0,0,0,0,False,False,False,False,False,False,4
3,2010778,A,B,C,"blank,deer,turkey,bear,raccoon","blank,deer,turkey,bear,raccoon","blank,deer,turkey,bear,raccoon","0.7326779365539551,0.2117285132408142,0.033788...","0.5076081156730652,0.39624109864234924,0.06382...","0.6003864407539368,0.35645681619644165,0.02731...",0,0,0,0,0,0,False,False,False,False,False,False,4
4,2010824,A,B,C,"deer,blank,turkey,bear,raccoon","deer,blank,turkey,bear,raccoon","deer,blank,turkey,bear,raccoon","0.6423595547676086,0.2451985478401184,0.100142...","0.5525268316268921,0.2621527910232544,0.164025...","0.6749184131622314,0.17222897708415985,0.12502...",0,0,0,0,0,0,False,False,False,False,False,False,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1576,SSWI000000023457314,A,B,C,"coyote,wolf,raccoon,foxgray_foxred,blank","coyote,foxgray_foxred,raccoon,wolf,blank","coyote,foxgray_foxred,raccoon,wolf,blank","0.9990426898002625,0.00032826143433339894,0.00...","0.999133288860321,0.0005109732737764716,0.0001...","0.9994726777076721,0.00023759269970469177,0.00...",0,0,0,0,0,0,False,False,False,False,False,False,4
1577,SSWI000000023457319,A,B,C,"coyote,wolf,deer,foxgray_foxred,raccoon","coyote,wolf,deer,foxgray_foxred,raccoon","coyote,wolf,foxgray_foxred,deer,elk","0.5620468854904175,0.29316431283950806,0.08121...","0.6803741455078125,0.15470340847969055,0.09736...","0.7213326096534729,0.1749291718006134,0.041265...",0,0,0,0,0,0,False,False,False,False,False,False,4
1578,SSWI000000023494506,A,B,C,"foxgray_foxred,coyote,wolf,raccoon,opossum","foxgray_foxred,coyote,wolf,raccoon,deer","foxgray_foxred,wolf,raccoon,blank,coyote","0.9652422070503235,0.026077574118971825,0.0078...","0.9934090375900269,0.005748316179960966,0.0008...","0.6963462829589844,0.1407080739736557,0.075674...",0,0,0,0,0,0,False,False,False,False,False,False,4
1579,SSWI000000023514111,A,B,C,"foxgray_foxred,coyote,deer,elk,opossum","foxgray_foxred,deer,coyote,wolf,elk","foxgray_foxred,coyote,deer,wolf,cottontail_sno...","0.752059817314148,0.17690403759479523,0.061005...","0.8043835759162903,0.09995599091053009,0.07060...","0.6843248009681702,0.25628429651260376,0.05626...",0,0,0,0,0,0,False,False,False,False,False,False,4


In [76]:
p = pd.read_csv('full_model_output.csv')

In [78]:
old_columns = list(p.columns)

In [79]:
old_columns

['image_group_id',
 'image_id_1',
 'image_id_2',
 'image_id_3',
 'image_id_1_species_name',
 'image_id_2_species_name',
 'image_id_3_species_name',
 'image_id_1_conf',
 'image_id_2_conf',
 'image_id_3_conf',
 'image_id_1_bbox',
 'image_id_2_bbox',
 'image_id_3_bbox',
 'image_id_1_count',
 'image_id_2_count',
 'image_id_3_count',
 'image_id_1_blank',
 'image_id_2_blank',
 'image_id_3_blank',
 'model_id',
 'image_id_1_detectable',
 'image_id_2_detectable',
 'image_id_3_detectable',
 'load_date']