In [None]:
####
# Creator: Rowan Converse (rowanconverse@unm.edu)
# Date: 2022/08/02
# Purpose: Scratch, testing pieces of the full code to translate raw labels generated by USFWS biologists in Labelbox into COCO format
# Ref COCO Camera Trap Standard: https://cocodataset.org/#format-data
####

In [43]:
#Load necessary modules
import json
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import datetime
from collections import OrderedDict 
%matplotlib inline

#Load JSON file of Labelbox labels
path = r"/Users/rowanconverse/Library/CloudStorage/OneDrive-UniversityofNewMexico/CV4Ecology/Prototyping/Data/Labels/originals/labelbox.json"
with open(path) as f:
  usfws = json.load(f)

In [78]:
shov1 = usfws[0]["Label"]["Northern Shoveler"][0]["geometry"][0]#['x']
shov2 = usfws[0]["Label"]["Northern Shoveler"][0]["geometry"]
#print(shov1)
#print(shov2)
xlist = []
for i in usfws[0]["Label"]["Northern Shoveler"][0]["geometry"]:
    xlist.append(list(i.values()))
print(xlist)

[[3549, 3147], [3564, 3165], [3604, 3111], [3579, 3101]]


In [103]:
categories = {}
geometries = []
coords = []
annos = []
for label in usfws:
  row = label["Label"]
for i in range(len(row)):
  if row == "Skip":
    continue
  for l in row:
    for key in row.keys():
      if key not in categories:
        categories[key] = len(categories) + 1
      cat_id = categories[key]
    for lbls in row.values():
      for geo in lbls:
        geometries.append(geo)
        for x in geo.values():
          coords.append(x)
          #xminymin = list(x[0].values())
          #xmaxymax = list(x[1].values)
          #bbox = xminymin + xmaxymax
          #annotation = {
          #'annotation_id': len(annos)+1,
          #'bbox': xminymin,
          #'area': area,
          #'category_id': cat_id
          #}
          #annos.append(annotation)

print("coordinate 0:" + str(coords[0]))

print("geometries 0:" +str(geometries[0]))
print(len(geometries))

coordinate 0:[{'x': 2190, 'y': 562}, {'x': 2228, 'y': 554}, {'x': 2282, 'y': 568}, {'x': 2362, 'y': 597}, {'x': 2379, 'y': 623}, {'x': 2377, 'y': 645}, {'x': 2334, 'y': 651}, {'x': 2260, 'y': 647}, {'x': 2202, 'y': 604}]
geometries 0:{'geometry': [{'x': 2190, 'y': 562}, {'x': 2228, 'y': 554}, {'x': 2282, 'y': 568}, {'x': 2362, 'y': 597}, {'x': 2379, 'y': 623}, {'x': 2377, 'y': 645}, {'x': 2334, 'y': 651}, {'x': 2260, 'y': 647}, {'x': 2202, 'y': 604}]}
2100


In [16]:
labelers = {}

for user in usfws:
  labeler = user["Created By"]
  if labeler not in labelers:
    labelers[labeler] = len(labelers) + 1
  labeler_id = labelers[labeler]

labelers

{'steven_sesnie@fws.gov': 1,
 'jude_smith@fws.gov': 2,
 'andrew_stetter@fws.gov': 3,
 'david.butler@tpwd.texas.gov': 4,
 'jeff_sanchez@fws.gov': 5,
 'stephen.mcdowell@tpwd.texas.gov': 6,
 'ronald_deroche@fws.gov': 7,
 'mbrasher@ducks.org': 8,
 'kammie_kruse@fws.gov': 9,
 'jena_moon@fws.gov': 10,
 'barry_wilson@fws.gov': 11,
 'dan_collins@fws.gov': 12,
 'bill_johnson@fws.gov': 13,
 'john_vradenburg@fws.gov': 14,
 'josh_vest@fws.gov': 15}

In [None]:
##ANNOTATIONS

#Pull out bounding boxes
images = {}
annos = []
categories = {}
labelers = {}
for i in range(len(usfws)):
  labeler = usfws.user_name[i]
  if labeler not in labelers:
    labelers[labeler] = len(labelers) + 1
  labeler_id = labelers[labeler]
  image_id = None
  try:
    imgrow = json.loads(usfws.subject_data[i])
  except:
    print('DEBUG')
    continue
  for key in imgrow.keys():
      try: 
        name = imgrow[key]["Filename"]
      except:
        print("DEBUG")
        continue
      if name not in images:
        images[name] = len(images) + 1
      
      image_id = images[name]
  try:
    row = json.loads(usfws["annotations"][i])
  except:
    print("DEBUG")
    continue
  for j in range(len(row)):
    if row[j]['task'] != 'T1':
        # task was not to draw a bounding box
        continue
    
    annlist = row[j]['value']
    for k in range(len(annlist)):
      if k == "null":
        continue
      ann = annlist[k]
      try:
        x = ann["x"]
      except:
        print("DEBUG")
        continue
      y = ann["y"]
      try:
        w = ann["width"]
      except:
        print("DEBUG")
        continue
      h = ann["height"]
      label = ann["tool_label"]
      bbox = [x, y, w, h]
      area = w*h

      if label not in categories:
        # label class has not yet been registered; add
        categories[label] = len(categories) + 1
      category_id = categories[label]
      annotation = {
        'annotation_id': len(annos)+1,
        'bbox': bbox,
        'area': area,
        'category_id': category_id,
        'image_id': image_id,
        'labeler_id': labeler_id
      }
      annos.append(annotation)
    
annos

DEBUG
DEBUG
DEBUG
DEBUG
DEBUG
DEBUG
DEBUG
DEBUG
DEBUG
DEBUG


[{'annotation_id': 1,
  'bbox': [465.84832763671875,
   203.70372009277344,
   61.20111083984375,
   117.00212097167969],
  'area': 7160.6597740845755,
  'category_id': 1,
  'image_id': 1,
  'labeler_id': 1},
 {'annotation_id': 2,
  'bbox': [271.44476318359375,
   16.50030517578125,
   66.60122680664062,
   102.60186767578125],
  'area': 6833.410259859636,
  'category_id': 1,
  'image_id': 1,
  'labeler_id': 1},
 {'annotation_id': 3,
  'bbox': [536.0496215820312,
   648.311767578125,
   97.2017822265625,
   52.200927734375],
  'area': 5074.023209661245,
  'category_id': 2,
  'image_id': 1,
  'labeler_id': 1},
 {'annotation_id': 4,
  'bbox': [426.2475891113281,
   831.9151611328125,
   52.200958251953125,
   81.00146484375],
  'area': 4228.3540846556425,
  'category_id': 2,
  'image_id': 1,
  'labeler_id': 1},
 {'annotation_id': 5,
  'bbox': [88.04139709472656,
   199.90350341796875,
   90.00163269042969,
   73.80136108398438],
  'area': 6642.2429923345335,
  'category_id': 2,
  'image_

In [18]:
images = {}
image_id = None
for file in usfws:
  name = file["External ID"]
  if name not in images:
    images[name] = len(images) + 1
  image_id = images[name]
images

{'BDA_12C_20181127_1.JPG': 1,
 'mxw_L13_20181215_1.JPG': 2,
 'BDA_18A4_20181106_1.JPG': 3,
 'BDA_18A4_20181106_4.JPG': 4,
 'BDA_18A4_20181107_3.JPG': 5,
 'BDA_12C_20181127_2.JPG': 6,
 'BDA_18A4_20181106_2.JPG': 7,
 'BDA_18A4_20181107_1.JPG': 8,
 'BDA_18A4_20181107_4.JPG': 9,
 'BDA_12C_20181127_3.JPG': 10,
 'BDA_18A4_20181106_3.JPG': 11,
 'BDA_18A4_20181107_2.JPG': 12,
 'BDA_24C_20181107_1.JPG': 13}

In [8]:
usfws[0]["Label"]["Canadian Goose"][0].values()

dict_values([[{'x': 4463, 'y': 2707}, {'x': 4553, 'y': 2766}, {'x': 4519, 'y': 2810}, {'x': 4428, 'y': 2744}]])

In [42]:
#usfws[0]["Label"].keys()
annos = []
categories = {}
for label in usfws:
  row = label["Label"]
for i in range(len(row)):
  if row == "Skip":
    continue
  for l in usfws[i]["Label"]:
    for r in usfws[i]["Label"].keys():
      if r not in categories:
        categories[r] = len(categories) + 1
      cat_id = categories[r]
    for lbls in usfws[i]["Label"].values():
      for geo in lbls:
        
        x1 = geo[0]
        y1 = geo[1]
        x2 = geo[2]
        y2 = geo[3]
        x3 = geo[4]
        y3 = geo[5]
        x4 = geo[6]
        y4 = geo[7]
      
        x = (x1 + x2) / 2
        y = (y1 + y2) / 2
        w = (x1 - x2)
        h = (y1 - y2)
        bbox = [x1, y1, x2, y2]
        area = w*h
        annotation = {
          "annotation_id": annID,
          "bbox": bbox, 
          "area": area,
          "category_id":cat_id,
        #  "image_id": image_id,
        #  "labeler_id": labeler_id
        }
        annos.append(annotation)
annos

[{'bbox': [{'geometry': [{'x': 4463, 'y': 2707},
     {'x': 4553, 'y': 2766},
     {'x': 4519, 'y': 2810},
     {'x': 4428, 'y': 2744}]},
   {'geometry': [{'x': 3879, 'y': 2093},
     {'x': 3826, 'y': 2103},
     {'x': 3890, 'y': 2129},
     {'x': 3914, 'y': 2102}]},
   {'geometry': [{'x': 2519, 'y': 3447},
     {'x': 2550, 'y': 3454},
     {'x': 2532, 'y': 3500},
     {'x': 2501, 'y': 3489}]},
   {'geometry': [{'x': 2844, 'y': 3023},
     {'x': 2889, 'y': 3049},
     {'x': 2905, 'y': 3024},
     {'x': 2852, 'y': 2999}]}],
  'category_id': 5},
 {'bbox': [{'geometry': [{'x': 3288, 'y': 223},
     {'x': 3363, 'y': 229},
     {'x': 3342, 'y': 353},
     {'x': 3281, 'y': 343}]},
   {'geometry': [{'x': 3879, 'y': 2093},
     {'x': 3826, 'y': 2103},
     {'x': 3890, 'y': 2129},
     {'x': 3914, 'y': 2102}]},
   {'geometry': [{'x': 2519, 'y': 3447},
     {'x': 2550, 'y': 3454},
     {'x': 2532, 'y': 3500},
     {'x': 2501, 'y': 3489}]},
   {'geometry': [{'x': 2844, 'y': 3023},
     {'x': 2889

In [None]:
##ANNOTATIONS: THIS IS THE MOST CORRECT ONE

#Pull out bounding boxes
images = {}
annos = []
categories = {}
labelers = {}

for row in usfws:
  labeler = row["Created By"]
  if labeler not in labelers:
    labelers[labeler] = len(labelers) + 1
  labeler_id = labelers[labeler]
  
  image_id = None
  name = row["External ID"]
  if name not in images:
    images[name] = len(images) + 1
  image_id = images[name]


  label = row["Label"]
  for i in range(len(label)):
    if row == "Skip":
      continue
    for x in usfws[i]["Label"]:
      for r in usfws[i]["Label"].keys():
        if r not in categories:
          categories[r] = len(categories) + 1
        cat_id = categories[r]

##Has been checked to here
  for j in range(len(row)):
    annlist = row[j]['value']
    for k in range(len(annlist)):
      ann = annlist[k]
      x = ann["x"]
      y = ann["y"]
      w = ann["width"]
      h = ann["height"]
      bbox = [x, y, w, h]
      area = w*h
#
      annotation = {
        'annotation_id': len(annos)+1,
        'bbox': bbox,
        'area': area,
        'category_id': cat_id,
        'image_id': image_id,
        'labeler_id': labeler_id
      }
      annos.append(annotation)  
  annos

In [164]:
#Image Statistics
images = list(set(img['External ID'] for img in usfws))
print('Number of Images: '+str(len(images)))

Number of Images: 13


In [36]:
#Labeler Statistics
userlist = list(set(user['Created By'] for user in usfws))
#print("Number of labelers: "+str(len(users)))
#print("Names of labelers: "+str(users))

userIDs = [{v: k for k, v in enumerate(
   OrderedDict.fromkeys(userlist), 1)}
      [n] for n in userlist]
users = dict(zip(userIDs, userlist))


{1: 'jeff_sanchez@fws.gov',
 2: 'dan_collins@fws.gov',
 3: 'david.butler@tpwd.texas.gov',
 4: 'john_vradenburg@fws.gov',
 5: 'barry_wilson@fws.gov',
 6: 'andrew_stetter@fws.gov',
 7: 'bill_johnson@fws.gov',
 8: 'josh_vest@fws.gov',
 9: 'steven_sesnie@fws.gov',
 10: 'jena_moon@fws.gov',
 11: 'mbrasher@ducks.org',
 12: 'ronald_deroche@fws.gov',
 13: 'jude_smith@fws.gov',
 14: 'stephen.mcdowell@tpwd.texas.gov',
 15: 'kammie_kruse@fws.gov'}

In [77]:
#Number/Type of Species Categories
species_list = []
for i in range(len(usfws)):
    if usfws[i]["Label"] == "Skip":
        continue
    cur_species = list(usfws[i]["Label"].keys())
    species_list.extend(cur_species)

# remove duplicates
species_list = list(set(species_list))

print(f"Species List: {species_list}")
print(f"Number of species: {len(species_list)}")


Species List: ['American Wigeon' 'Canadian Goose' 'Gadwall' 'Mallard' 'Northern Pintail'
 'Northern Shoveler' 'Other' 'Readhead' 'Ringneck' 'Ruddy'
 'Sandhill Crane' 'Snow Goose' 'Teal']
Number of species: 13


In [119]:
#Number of Annotations
annotations = []
geos = []
xlist = []
ylist = []
for i in range(len(usfws)):
       if usfws[i]["Label"] == "Skip":
              continue
       for x in usfws[i]["Label"]:
              for r in usfws[i]["Label"][x]:
                     annotations.append(x)
                     for y in r.values():
                            for geo in y:
                                   geos.append(geo)
                                   x = geo[0]
                                   xlist.append(x)
                                   #for x in geo:
                                   #       xlist.append(x[0])
                                          #ylist.append(x[1])

print(len(annotations))
print(len(xlist))
print(annotations[0])
print(geos[0])
print(xlist[0])

KeyError: 0

In [None]:
p2 = max([613,2007,1769,581]) = 2007
p1 = min([613,2007,1769,581]) = 581
q2 = max([1767,1674,2260,2395]) = 2395
q1 = min([1767,1674,2260,2395]) = 1674

In [171]:
#Skipped Images
skipped = []
for i in range(len(usfws)):
       if usfws[i]["Label"] == "Skip":
        skipped.append(i)

print("Skipped Images: " +str(len(skipped)))

Skipped Images: 6


In [175]:
#Annotations: ID, Image ID, category ID, segmentation, area, bbox, iscrowd

#Pull out filename, labeler, label, geometry
annolist = []
for i in range(len(usfws)):
       if usfws[i]["Label"] == "Skip":
              continue
       filename = i['External ID']
       labeler = i["Created By"]
       for x in usfws[i]:
             for r in usfws[i]["Label"][x]:
              #here is where you pull out the geometries:
              x = []
              y = []
              w = []
              h = []
print(i)

#create annotation ID
annoIDs = [{v: k for k, v in enumerate(
   OrderedDict.fromkeys(annolist), 1)}
      [n] for n in annolist]
anno = dict(zip(annoIDs, annolist))

[{'geometry': [{'x': 4463, 'y': 2707},
   {'x': 4553, 'y': 2766},
   {'x': 4519, 'y': 2810},
   {'x': 4428, 'y': 2744}]},
 {'geometry': [{'x': 4311, 'y': 2748},
   {'x': 4399, 'y': 2731},
   {'x': 4413, 'y': 2786},
   {'x': 4308, 'y': 2798}]},
 {'geometry': [{'x': 3707, 'y': 1827},
   {'x': 3762, 'y': 1862},
   {'x': 3817, 'y': 1794},
   {'x': 3773, 'y': 1761}]},
 {'geometry': [{'x': 3634, 'y': 1882},
   {'x': 3717, 'y': 1889},
   {'x': 3718, 'y': 1920},
   {'x': 3628, 'y': 1915}]},
 {'geometry': [{'x': 3669, 'y': 1931},
   {'x': 3702, 'y': 1927},
   {'x': 3738, 'y': 1997},
   {'x': 3696, 'y': 2009}]},
 {'geometry': [{'x': 3608, 'y': 1914},
   {'x': 3655, 'y': 1929},
   {'x': 3642, 'y': 1994},
   {'x': 3595, 'y': 1982}]},
 {'geometry': [{'x': 3839, 'y': 2136},
   {'x': 3811, 'y': 2177},
   {'x': 3729, 'y': 2115},
   {'x': 3769, 'y': 2083}]},
 {'geometry': [{'x': 3915, 'y': 2244},
   {'x': 3996, 'y': 2292},
   {'x': 4028, 'y': 2247},
   {'x': 3948, 'y': 2190}]},
 {'geometry': [{'x': 386

In [24]:
#info
whattimeisitrightnowdotcom = datetime.date.today()
year = {"year": 2022}
vers = {"version": "1.0"}
desc = {"description": "This dataset includes annotations of UAS imagery collected x to y , 2018, at Bosque del Apache National Wildlife Refuge in New Mexico. Fifteen biologists from the US Fish and Wildlife Service identified waterfowl in thirteen benchmark images to the species level. Contact Rowan Converse (rowanconverse@unm.edu) with questions about this dataset. Please cite using a CC-By license with ASPIRE as the data repository."}
contr = {"contributor": "Center for the Advancement of Spatial Informatics Research and Education (ASPIRE), University of New Mexico; Project Manager Rowan Converse"}
url = {"url": "https://aspire.unm.edu/projects/project/ducks-and-drones.html"}
date = {"date created": whattimeisitrightnowdotcom}

infolist = [year, vers, desc, contr, url, date]

info = {"info": infolist}

info

{'info': [{'year': 2022},
  {'version': '1.0'},
  {'description': 'This dataset includes annotations of UAS imagery collected x to y , 2018, at Bosque del Apache National Wildlife Refuge in New Mexico. Fifteen biologists from the US Fish and Wildlife Service identified waterfowl in thirteen benchmark images to the species level. Contact Rowan Converse (rowanconverse@unm.edu) with questions about this dataset. Please cite using a CC-By license with ASPIRE as the data repository.'},
  {'contributor': 'Center for the Advancement of Spatial Informatics Research and Education (ASPIRE), University of New Mexico; Project Manager Rowan Converse'},
  {'url': 'https://aspire.unm.edu/projects/project/ducks-and-drones.html'},
  {'date created': datetime.date(2022, 8, 2)}]}

In [33]:
#Images

#Derive list of images
for i in range(len(usfws)):
  imglist = list(set(img['External ID'] for img in usfws))

#Add unique IDs to each filename
imgIDs = [{v: k for k, v in enumerate(
   OrderedDict.fromkeys(imglist), 1)}
      [n] for n in imglist]
img = dict(zip(imgIDs, imglist))
img

{1: 'BDA_12C_20181127_3.JPG',
 2: 'BDA_24C_20181107_1.JPG',
 3: 'BDA_18A4_20181107_2.JPG',
 4: 'BDA_12C_20181127_1.JPG',
 5: 'mxw_L13_20181215_1.JPG',
 6: 'BDA_12C_20181127_2.JPG',
 7: 'BDA_18A4_20181106_3.JPG',
 8: 'BDA_18A4_20181107_4.JPG',
 9: 'BDA_18A4_20181106_2.JPG',
 10: 'BDA_18A4_20181107_3.JPG',
 11: 'BDA_18A4_20181106_1.JPG',
 12: 'BDA_18A4_20181107_1.JPG',
 13: 'BDA_18A4_20181106_4.JPG'}

In [4]:
spplist = []
for i in range(len(usfws)):
  if usfws[i]["Label"] == "Skip":
    continue
  cur_species = list(usfws[i]["Label"].keys())
  spplist.extend(cur_species)
sppIDs = [{v: k for k, v in enumerate(
   OrderedDict.fromkeys(spplist), 1)}
      [n] for n in spplist]
cat = dict(zip(sppIDs, spplist))
categories = {"categories": cat}
categories

{'categories': {1: 'Canadian Goose',
  2: 'Sandhill Crane',
  3: 'Mallard',
  4: 'Northern Pintail',
  5: 'Northern Shoveler',
  6: 'Teal',
  7: 'American Wigeon',
  8: 'Gadwall',
  9: 'Ringneck',
  10: 'Ruddy',
  11: 'Readhead',
  12: 'Other',
  13: 'Snow Goose'}}

In [38]:
#License
lic_id = {"id": 1} 
lic_name = {"name": "Creative Commons (CC)-BY"}
lic_url = {"url": "https://creativecommons.org/about/cclicenses/"}
licenselist = [lic_id, lic_name, lic_url]
license = {"license": licenselist}
license

{'license': [{'id': 1},
  {'name': 'Creative Commons (CC)-BY'},
  {'url': 'https://creativecommons.org/about/cclicenses/'}]}