In [None]:
####
# Creator: Rowan Converse (rowanconverse@unm.edu)
# Date: 2022/08/02
# Purpose: Scratch, testing pieces of the full code to translate raw labels generated by USFWS biologists in Labelbox into COCO format
# Ref COCO Camera Trap Standard: https://cocodataset.org/#format-data
####

In [2]:
#Load necessary modules
import json
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import datetime
from collections import OrderedDict 

#Load JSON file of Labelbox labels
path = r"/Users/rowanconverse/Library/CloudStorage/OneDrive-UniversityofNewMexico/CV4Ecology/Prototyping/Data/Labels/labelbox.json"
with open(path) as f:
  usfws = json.load(f)

In [164]:
#Image Statistics
images = list(set(img['External ID'] for img in usfws))
print('Number of Images: '+str(len(images)))

Number of Images: 13


In [36]:
#Labeler Statistics
userlist = list(set(user['Created By'] for user in usfws))
#print("Number of labelers: "+str(len(users)))
#print("Names of labelers: "+str(users))

userIDs = [{v: k for k, v in enumerate(
   OrderedDict.fromkeys(userlist), 1)}
      [n] for n in userlist]
users = dict(zip(userIDs, userlist))


{1: 'jeff_sanchez@fws.gov',
 2: 'dan_collins@fws.gov',
 3: 'david.butler@tpwd.texas.gov',
 4: 'john_vradenburg@fws.gov',
 5: 'barry_wilson@fws.gov',
 6: 'andrew_stetter@fws.gov',
 7: 'bill_johnson@fws.gov',
 8: 'josh_vest@fws.gov',
 9: 'steven_sesnie@fws.gov',
 10: 'jena_moon@fws.gov',
 11: 'mbrasher@ducks.org',
 12: 'ronald_deroche@fws.gov',
 13: 'jude_smith@fws.gov',
 14: 'stephen.mcdowell@tpwd.texas.gov',
 15: 'kammie_kruse@fws.gov'}

In [77]:
#Number/Type of Species Categories
species_list = []
for i in range(len(usfws)):
    if usfws[i]["Label"] == "Skip":
        continue
    cur_species = list(usfws[i]["Label"].keys())
    species_list.extend(cur_species)

# remove duplicates
species_list = list(set(species_list))

print(f"Species List: {species_list}")
print(f"Number of species: {len(species_list)}")


Species List: ['American Wigeon' 'Canadian Goose' 'Gadwall' 'Mallard' 'Northern Pintail'
 'Northern Shoveler' 'Other' 'Readhead' 'Ringneck' 'Ruddy'
 'Sandhill Crane' 'Snow Goose' 'Teal']
Number of species: 13


In [3]:
#Number of Annotations
annotations = []
for i in range(len(usfws)):
       if usfws[i]["Label"] == "Skip":
              continue
       for x in usfws[i]["Label"]:
              for r in usfws[i]["Label"][x]:
                annotations.append(r)

print(len(annotations))

19336


In [171]:
#Skipped Images
skipped = []
for i in range(len(usfws)):
       if usfws[i]["Label"] == "Skip":
        skipped.append(i)

print("Skipped Images: " +str(len(skipped)))

Skipped Images: 6


In [175]:
#Annotations: ID, Image ID, category ID, segmentation, area, bbox, iscrowd

#Pull out filename, labeler, label, geometry
annolist = []
for i in range(len(usfws)):
       if usfws[i]["Label"] == "Skip":
              continue
       filename = i['External ID']
       labeler = i["Created By"]
       for x in usfws[i]:
             for r in usfws[i]["Label"][x]:
              #here is where you pull out the geometries:
              x = []
              y = []
              w = []
              h = []
print(i)

#create annotation ID
annoIDs = [{v: k for k, v in enumerate(
   OrderedDict.fromkeys(annolist), 1)}
      [n] for n in annolist]
anno = dict(zip(annoIDs, annolist))

[{'geometry': [{'x': 4463, 'y': 2707},
   {'x': 4553, 'y': 2766},
   {'x': 4519, 'y': 2810},
   {'x': 4428, 'y': 2744}]},
 {'geometry': [{'x': 4311, 'y': 2748},
   {'x': 4399, 'y': 2731},
   {'x': 4413, 'y': 2786},
   {'x': 4308, 'y': 2798}]},
 {'geometry': [{'x': 3707, 'y': 1827},
   {'x': 3762, 'y': 1862},
   {'x': 3817, 'y': 1794},
   {'x': 3773, 'y': 1761}]},
 {'geometry': [{'x': 3634, 'y': 1882},
   {'x': 3717, 'y': 1889},
   {'x': 3718, 'y': 1920},
   {'x': 3628, 'y': 1915}]},
 {'geometry': [{'x': 3669, 'y': 1931},
   {'x': 3702, 'y': 1927},
   {'x': 3738, 'y': 1997},
   {'x': 3696, 'y': 2009}]},
 {'geometry': [{'x': 3608, 'y': 1914},
   {'x': 3655, 'y': 1929},
   {'x': 3642, 'y': 1994},
   {'x': 3595, 'y': 1982}]},
 {'geometry': [{'x': 3839, 'y': 2136},
   {'x': 3811, 'y': 2177},
   {'x': 3729, 'y': 2115},
   {'x': 3769, 'y': 2083}]},
 {'geometry': [{'x': 3915, 'y': 2244},
   {'x': 3996, 'y': 2292},
   {'x': 4028, 'y': 2247},
   {'x': 3948, 'y': 2190}]},
 {'geometry': [{'x': 386

In [24]:
#info
whattimeisitrightnowdotcom = datetime.date.today()
year = {"year": 2022}
vers = {"version": "1.0"}
desc = {"description": "This dataset includes annotations of UAS imagery collected x to y , 2018, at Bosque del Apache National Wildlife Refuge in New Mexico. Fifteen biologists from the US Fish and Wildlife Service identified waterfowl in thirteen benchmark images to the species level. Contact Rowan Converse (rowanconverse@unm.edu) with questions about this dataset. Please cite using a CC-By license with ASPIRE as the data repository."}
contr = {"contributor": "Center for the Advancement of Spatial Informatics Research and Education (ASPIRE), University of New Mexico; Project Manager Rowan Converse"}
url = {"url": "https://aspire.unm.edu/projects/project/ducks-and-drones.html"}
date = {"date created": whattimeisitrightnowdotcom}

infolist = [year, vers, desc, contr, url, date]

info = {"info": infolist}

info

{'info': [{'year': 2022},
  {'version': '1.0'},
  {'description': 'This dataset includes annotations of UAS imagery collected x to y , 2018, at Bosque del Apache National Wildlife Refuge in New Mexico. Fifteen biologists from the US Fish and Wildlife Service identified waterfowl in thirteen benchmark images to the species level. Contact Rowan Converse (rowanconverse@unm.edu) with questions about this dataset. Please cite using a CC-By license with ASPIRE as the data repository.'},
  {'contributor': 'Center for the Advancement of Spatial Informatics Research and Education (ASPIRE), University of New Mexico; Project Manager Rowan Converse'},
  {'url': 'https://aspire.unm.edu/projects/project/ducks-and-drones.html'},
  {'date created': datetime.date(2022, 8, 2)}]}

In [33]:
#Images

#Derive list of images
for i in range(len(usfws)):
  imglist = list(set(img['External ID'] for img in usfws))

#Add unique IDs to each filename
imgIDs = [{v: k for k, v in enumerate(
   OrderedDict.fromkeys(imglist), 1)}
      [n] for n in imglist]
img = dict(zip(imgIDs, imglist))
img

{1: 'BDA_12C_20181127_3.JPG',
 2: 'BDA_24C_20181107_1.JPG',
 3: 'BDA_18A4_20181107_2.JPG',
 4: 'BDA_12C_20181127_1.JPG',
 5: 'mxw_L13_20181215_1.JPG',
 6: 'BDA_12C_20181127_2.JPG',
 7: 'BDA_18A4_20181106_3.JPG',
 8: 'BDA_18A4_20181107_4.JPG',
 9: 'BDA_18A4_20181106_2.JPG',
 10: 'BDA_18A4_20181107_3.JPG',
 11: 'BDA_18A4_20181106_1.JPG',
 12: 'BDA_18A4_20181107_1.JPG',
 13: 'BDA_18A4_20181106_4.JPG'}

In [4]:
spplist = []
for i in range(len(usfws)):
  if usfws[i]["Label"] == "Skip":
    continue
  cur_species = list(usfws[i]["Label"].keys())
  spplist.extend(cur_species)
sppIDs = [{v: k for k, v in enumerate(
   OrderedDict.fromkeys(spplist), 1)}
      [n] for n in spplist]
cat = dict(zip(sppIDs, spplist))
categories = {"categories": cat}
categories

{'categories': {1: 'Canadian Goose',
  2: 'Sandhill Crane',
  3: 'Mallard',
  4: 'Northern Pintail',
  5: 'Northern Shoveler',
  6: 'Teal',
  7: 'American Wigeon',
  8: 'Gadwall',
  9: 'Ringneck',
  10: 'Ruddy',
  11: 'Readhead',
  12: 'Other',
  13: 'Snow Goose'}}

In [38]:
#License
lic_id = {"id": 1} 
lic_name = {"name": "Creative Commons (CC)-BY"}
lic_url = {"url": "https://creativecommons.org/about/cclicenses/"}
licenselist = [lic_id, lic_name, lic_url]
license = {"license": licenselist}
license

{'license': [{'id': 1},
  {'name': 'Creative Commons (CC)-BY'},
  {'url': 'https://creativecommons.org/about/cclicenses/'}]}

In [None]:
#Annotations per Species
ann_spp = []
for i in range(len(usfws)):
       if usfws[i]["Label"] == "Skip":
              continue
       for x in usfws[i]["Label"]:
              for r in usfws[i]["Label"][x]:
                     
#Label is a dictionary
#Each species is a list of geometries, which are dictionaries 
#need to count all geometries associated with each species

In [19]:
#Filtered mean of counts per image

#Remember to make a new derived dataset that is the filtered counts-- take the minimum area of overlapping boxes

dict

In [None]:
#Plurality vote of identifications, each bounding box overlapping > 0.3 (?), both the filtered dataset and the raw  