In [4]:
####
# Creator: Rowan Converse (rowanconverse@unm.edu)
# Date: 2022/08/02
# Purpose: Scratch, testing pieces of the full code to translate raw labels generated by volunteers on the Zooniverse platform into COCO format
# Ref COCO Camera Trap Standard: https://cocodataset.org/#format-data
####

In [1]:
#Load packages
import json
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import datetime
from collections import OrderedDict 
import torch
import torchvision
from torchvision.io import read_image
from torchvision.utils import draw_bounding_boxes
from torchvision.ops import box_area

#Load data
path = r"/Users/rowanconverse/Library/CloudStorage/OneDrive-UniversityofNewMexico/CV4Ecology/Prototyping/Data/Labels/originals/20221212_dronesforducks.csv"
zooniverse = pd.read_csv(path)
#zooniverse = pd.read_csv("/Users/rowanconverse/Library/CloudStorage/OneDrive-UniversityofNewMexico/CV4Ecology/Prototyping/Data/Labels/zooniverse_snippet.csv")

In [4]:
zooniverse.head()

Unnamed: 0,classification_id,user_name,user_id,user_ip,workflow_id,workflow_name,workflow_version,created_at,gold_standard,expert,metadata,annotations,subject_data,subject_ids
0,299391202,rowan_aspire,2215751.0,1ec03e604526af9cc008,17287.0,Duck/Goose/Crane,34.39,2021-01-05 21:42:47 UTC,,,"{""source"":""api"",""session"":""15e8c6c801b72d6bfd7...","[{""task"":""T0"",""task_label"":""Do you see birds i...","{""54652402"":{""retired"":null,""Filename"":""BDA_12...",54652402.0
1,299410573,rowan_aspire,2215751.0,1a1eefb58b2c8cb65d09,17287.0,Duck/Goose/Crane,34.39,2021-01-05 22:49:13 UTC,,,"{""source"":""api"",""session"":""3ff627d3c29c1bb0f36...","[{""task"":""T0"",""task_label"":""Do you see birds i...","{""54652421"":{""retired"":null,""Filename"":""BDA_12...",54652421.0
2,299415329,rowan_aspire,2215751.0,72fe8ea7234b477eba41,17287.0,Duck/Goose/Crane,35.4,2021-01-05 23:06:39 UTC,,,"{""source"":""api"",""session"":""6b97d1dd59d7f200bbc...","[{""task"":""T1"",""task_label"":""Draw a rectangle a...","{""54652440"":{""retired"":null,""Filename"":""BDA_18...",54652440.0
3,300919845,rowan_aspire,2215751.0,1ec03e604526af9cc008,17287.0,Duck/Goose/Crane,35.45,2021-01-11 16:20:04 UTC,,,"{""source"":""api"",""session"":""6a70be793547652e5be...","[{""task"":""T1"",""task_label"":""Draw a rectangle a...","{""54652428"":{""retired"":null,""Filename"":""BDA_18...",54652428.0
4,300921066,rowan_aspire,2215751.0,1ec03e604526af9cc008,17287.0,Duck/Goose/Crane,35.45,2021-01-11 16:24:19 UTC,,,"{""source"":""api"",""session"":""6a70be793547652e5be...","[{""task"":""T1"",""task_label"":""Draw a rectangle a...","{""54652440"":{""retired"":null,""Filename"":""BDA_18...",54652440.0


In [2]:
##ANNOTATIONS

#Pull out bounding boxes
images = {}
annos = []
categories = {}
labelers = {}
for i in range(len(zooniverse)):
  labeler = zooniverse.user_name[i]
  if labeler not in labelers:
    labelers[labeler] = len(labelers) + 1
  labeler_id = labelers[labeler]
  image_id = None
  try:
    imgrow = json.loads(zooniverse.subject_data[i])
  except:
    print('DEBUG')
    continue
  for key in imgrow.keys():
      try: 
        name = imgrow[key]["Filename"]
      except:
        print("DEBUG")
        continue
      if name not in images:
        images[name] = len(images) + 1
      
      image_id = images[name]
  try:
    row = json.loads(zooniverse["annotations"][i])
  except:
    print("DEBUG")
    continue
  for j in range(len(row)):
    if row[j]['task'] != 'T1':
        # task was not to draw a bounding box
        continue
    
    annlist = row[j]['value']
    for k in range(len(annlist)):
      if k == "null":
        continue
      ann = annlist[k]
      try:
        x = ann["x"]
      except:
        print("DEBUG")
        continue
      y = ann["y"]
      try:
        w = ann["width"]
      except:
        print("DEBUG")
        continue
      h = ann["height"]
      label = ann["tool_label"]
      bbox = [x, y, w, h]
      area = w*h

      if label not in categories:
        # label class has not yet been registered; add
        categories[label] = len(categories) + 1
      category_id = categories[label]
      annotation = {
        'annotation_id': len(annos)+1,
        'bbox': bbox,
        'area': area,
        'category_id': category_id,
        'category': label,
        'image_id': image_id,
        'filename': name,
        'labeler_id': labeler_id
      }
      annos.append(annotation)
    
annos

DEBUG
DEBUG
DEBUG
DEBUG
DEBUG
DEBUG
DEBUG
DEBUG
DEBUG
DEBUG
DEBUG


[{'annotation_id': 1,
  'bbox': [465.84832763671875,
   203.70372009277344,
   61.20111083984375,
   117.00212097167969],
  'area': 7160.6597740845755,
  'category_id': 1,
  'category': 'Crane',
  'image_id': 1,
  'filename': 'BDA_12C_20181127_1 copy.jpg',
  'labeler_id': 1},
 {'annotation_id': 2,
  'bbox': [271.44476318359375,
   16.50030517578125,
   66.60122680664062,
   102.60186767578125],
  'area': 6833.410259859636,
  'category_id': 1,
  'category': 'Crane',
  'image_id': 1,
  'filename': 'BDA_12C_20181127_1 copy.jpg',
  'labeler_id': 1},
 {'annotation_id': 3,
  'bbox': [536.0496215820312,
   648.311767578125,
   97.2017822265625,
   52.200927734375],
  'area': 5074.023209661245,
  'category_id': 2,
  'category': 'Goose',
  'image_id': 1,
  'filename': 'BDA_12C_20181127_1 copy.jpg',
  'labeler_id': 1},
 {'annotation_id': 4,
  'bbox': [426.2475891113281,
   831.9151611328125,
   52.200958251953125,
   81.00146484375],
  'area': 4228.3540846556425,
  'category_id': 2,
  'category'

In [3]:
with open("20221212_zooniverse_coco.json", "w") as outfile:
    json.dump(annos, outfile)

In [8]:
###INFO
whattimeisitrightnowdotcom = datetime.date.today()
year = {"year": 2022}
vers = {"version": "1.0"}
desc = {"description": "This dataset includes annotations of UAS imagery collected Nov 11th to 27th, 2018, over eight flights / three field sessions at Bosque del Apache National Wildlife Refuge in New Mexico. Over 3,000 volunteers identified waterfowl in 611 images using three morphological categories (Duck, Goose, Crane). The drone platform was a DJI Mavic Pro 2 with Hassleblad L1D-20c sensor collecting in RGB, flown at altitudes ranging 20 - 40m AGL. Image labels were collected via the participatory science platform Zooniverse. Contact Rowan Converse (rowanconverse@unm.edu) with questions about this dataset. Please cite using a CC-By license with ASPIRE as the data repository."}
contr = {"contributor": "Center for the Advancement of Spatial Informatics Research and Education (ASPIRE), University of New Mexico; Project Manager Rowan Converse"}
url = {"url": "https://aspire.unm.edu/projects/project/ducks-and-drones.html"}
date = {"date created": whattimeisitrightnowdotcom}

infolist = [year, vers, desc, contr, url, date]

info = {"info": infolist}
info

{'info': [{'year': 2022},
  {'version': '1.0'},
  {'description': 'This dataset includes annotations of UAS imagery collected x to y , 2018, at Bosque del Apache National Wildlife Refuge in New Mexico. Over 3,000 volunteers identified waterfowl in 611 images using three morphological categories (Duck, Goose, Crane). The labels were collected via the participatory science platform, Zooniverse. Contact Rowan Converse (rowanconverse@unm.edu) with questions about this dataset. Please cite using a CC-By license with ASPIRE as the data repository.'},
  {'contributor': 'Center for the Advancement of Spatial Informatics Research and Education (ASPIRE), University of New Mexico; Project Manager Rowan Converse'},
  {'url': 'https://aspire.unm.edu/projects/project/ducks-and-drones.html'},
  {'date created': datetime.date(2022, 8, 3)}]}

In [89]:
###IMAGE
#Derive list of images
imglist = []
for i in range(len(zooniverse)):
    row = json.loads(zooniverse.subject_data[i])
    for key in row.keys():
        name = row[key]["Filename"]
        imglist.append(name)
imglist = set(imglist)
img_pairs = list(map(lambda p: ("file_name", p), imglist))

#Add unique IDs to each filename
imgIDs = [{v: k for k, v in enumerate(
   OrderedDict.fromkeys(img_pairs), 1)}
      [n] for n in img_pairs]
imgIDs = list(map(lambda m: ("id", m), imgIDs))
img = dict(zip(imgIDs, img_pairs))
images = {"images": img}
images


{'images': {('id', 1): ('file_name', 'BDA_unknown_20181106_1_00075_01_02.png'),
  ('id', 2): ('file_name', 'FWS_MaxwellLake13_20171215_3_01693_06_07.png'),
  ('id', 3): ('file_name', 'BDA_unknown_20181106_1_00047_04_01.png'),
  ('id', 4): ('file_name', 'FWS_MaxwellLake13_20171215_2_00873_03_05.png'),
  ('id', 5): ('file_name', 'BDA_18a4_20181106_3_00748_01_06.png'),
  ('id', 6): ('file_name', 'BDA_unknown_20181106_1_00151_05_07.png'),
  ('id', 7): ('file_name', 'BDA_18a4_20181106_2_00345_04_01.png'),
  ('id', 8): ('file_name', 'BDA_18a4_20181106_2_00352_06_04.png'),
  ('id', 9): ('file_name', 'BDA_18a4_20181106_2_00516_02_04.png'),
  ('id', 10): ('file_name', 'BDA_unknown_20181106_1_00100_01_08.png'),
  ('id', 11): ('file_name', 'BDA_18a4_20181106_3_00721_07_06.png'),
  ('id', 12): ('file_name', 'BDA_18a4_20181106_3_00801_07_04.png'),
  ('id', 13): ('file_name', 'BDA_18a4_20181106_2_00306_02_02.png'),
  ('id', 14): ('file_name', 'BDA_18a4_20181106_3_00743_06_08.png'),
  ('id', 15): ('f

In [91]:
annIDs = [{v: k for k, v in enumerate(
   OrderedDict.fromkeys(annos), 1)}
      [n] for n in annos]
annIDs = list(map(lambda m: ("id", m), annIDs))
anns = dict(zip(annIDs, annos))
annotations = {"annotations": anns}
annotations

TypeError: unhashable type: 'list'

In [None]:
##ANNOTATIONS

#Pull out bounding boxes
annos = []
for i in range(len(zooniverse)):
  row = json.loads(zooniverse.annotations[i])
  for j in range(len(row)):
    if row[j]['task'] != 'T1':
        # task was not to draw a bounding box
        continue
    annlist = row[j]['value']
    for k in range(len(annlist)):
      ann = annlist[k]
      x = ann["x"]
      y = ann["y"]
      w = ann["width"]
      h = ann["height"]
      label = ann["tool_label"]
      label_a = {"category": label}
      bbox = [x, y, w, h]
      bbox_a = {"bbox": bbox}
      area = w*h
      area_a = {"area": area}
      annotation = [label_a, area_a, bbox_a]
      annos.append(annotation)

annos

In [None]:
#Assigning individual IDs to each annotation
annIDs = [{v: k for k, v in enumerate(
   OrderedDict.fromkeys(annos), 1)}
      [n] for n in annos]
annpairs = dict(zip(annIDs, annos))
annotations = {"annotations": annpairs}
annotations


In [87]:
#Category List

spplist = []
for i in range(len(zooniverse)):
  row = json.loads(zooniverse.annotations[i])
  for j in range(len(row)):
    if row[j]['task'] != 'T1':
        # task was not to draw a bounding box
        continue
    annlist = row[j]['value']
    for k in range(len(annlist)):
      ann = annlist[k]
      label = ann["tool_label"]
      spplist.append(label)

spplist = set(spplist)
spp_pairs = list(map(lambda s: ("name", s), spplist))

sppIDs = [{v: k for k, v in enumerate(
   OrderedDict.fromkeys(spp_pairs), 1)}
      [n] for n in spp_pairs]
sppIDs = list(map(lambda s: ("id", s), sppIDs))
cat = dict(zip(sppIDs, spp_pairs))
cat
categories = {"categories": cat}
categories

{'categories': {('id', 1): ('name', 'Duck'),
  ('id', 2): ('name', 'Other Bird'),
  ('id', 3): ('name', 'Goose'),
  ('id', 4): ('name', 'Crane')}}

In [79]:
#Individual labelers
userlist = list(set(zooniverse.user_id))
user_pairs = list(map(lambda u: ("name", u), userlist))

userIDs = [{v: k for k, v in enumerate(
   OrderedDict.fromkeys(userlist), 1)}
      [n] for n in userlist]
userIDs = list(map(lambda u: ("id", u), userIDs))
labelers = dict(zip(userIDs, user_pairs))
labelers

labelinfo = {"labelers": labelers}
labelinfo
#print("Number of Labelers: "+str(len(users.unique())))

{'labelers': {('id', 1): ('name', 2313184),
  ('id', 2): ('name', 2263329),
  ('id', 3): ('name', 1822173),
  ('id', 4): ('name', 2316323),
  ('id', 5): ('name', 2141263),
  ('id', 6): ('name', 2247823),
  ('id', 7): ('name', 2313201),
  ('id', 8): ('name', 2313202),
  ('id', 9): ('name', 2154838),
  ('id', 10): ('name', 2091290),
  ('id', 11): ('name', 2142076),
  ('id', 12): ('name', 2316349),
  ('id', 13): ('name', 2316350)}}

In [7]:
##LICENSE
lic_id = {"id": 1} 
lic_name = {"name": "Creative Commons (CC)-BY"}
lic_url = {"url": "https://creativecommons.org/about/cclicenses/"}
licenselist = [lic_id, lic_name, lic_url]
license = {"license": licenselist}
license

{'license': [{'id': 1},
  {'name': 'Creative Commons (CC)-BY'},
  {'url': 'https://creativecommons.org/about/cclicenses/'}]}

In [90]:
##COMBINING INTO ONE JSON
coco = {info, images, annotations, categories, labelers, license}
coco

NameError: name 'annotations' is not defined

In [None]:
#Benni's example: load first row's annotations
row0_anno = json.loads(zooniverse.annotations[0])
print('Row 0: number of annotations = {}'.format(len(row0_anno)))

first_anno = row0_anno[0]['value'][0]    # extract the first of two annotations of the first row, then its properties under 'value' (dict), and again the first entry (that property is a list)
print('First annotation: bbox = {}, {}, {}, {}; label = {}'.format(
    first_anno['x'], first_anno['y'],
    first_anno['width'], first_anno['height'],
    first_anno['tool_label']
))

In [23]:
#Statistics about images

images = zooniverse.subject_ids
print("Number of Images: "+str(len(images.unique())))

#print('Animal images: '+str(len([ann['value'] for ann in anno if ann['value'] != 0])))
#print('Empty images: '+str(len([ann['value'] for ann in anno if ann['value'] == 0])))

Number of Images: 282


In [74]:
#experimenting with pulling out the label name
row10 = json.loads(zooniverse.annotations[10])
an0row10 = row10[0]['value'][0]
duck = an0row10["tool_label"] 
print(duck)

Duck


In [120]:
#test-- pulling out annotations
for i in range(len(zooniverse)):
  row = json.loads(zooniverse.annotations[i])
  for j in range(len(row)):
    if row[j]['task'] != 'T1':
        # task was not to draw a bounding box
        continue
    annlist = row[j]['value']
    for k in range(len(annlist)):
      ann = annlist[k]
      x = ann["x"]
      y = ann["y"]
      w = ann["width"]
      h = ann["height"]
      label = ann["tool_label"]
      anno = [label, x, y, w, h]
      print(anno)
      
#spp = set(spplist)

['Duck', 575.9765625, 265.39453125, 79.1796875, 94.40625]
['Duck', 185.109375, 124.38671875, 62.6328125, 30.52734375]
['Duck', 5.06640625, 223.09765625, 42.37109375, 51.4453125]
['Duck', 1.75390625, 317.02734375, 53.32421875, 56.390625]
['Duck', 368.42578125, 149.796875, 61.16015625, 36.3046875]
['Duck', 322.69921875, 39.5390625, 81.23046875, 34.07421875]
['Duck', 509.6328125, 189.3984375, 70.2265625, 31.53515625]
['Duck', 490.140625, 261.484375, 76.5390625, 27.76171875]
['Duck', 402.88671875, 329.359375, 90.72265625, 40.3671875]
['Duck', 325.5478210449219, 270.52587890625, 73.85104370117188, 72.52145385742188]
['Duck', 336.7303161621094, 401.5649719238281, 61.29571533203125, 93.30828857421875]
['Duck', 207.77734375, 2.83984375, 49.7421875, 16.66015625]
['Duck', 225.41796875, 52.8046875, 32.66796875, 32.36328125]
['Duck', 269.51171875, 37.29296875, 30.5234375, 32.2265625]
['Duck', 458.16015625, 3.140625, 16.49609375, 9.37890625]
['Duck', 453.125, 40.58984375, 29.5078125, 39.984375]
['D

In [None]:
#Test-- pulling out the filename 
subj = json.loads(zooniverse.subject_data[0])
subj["60189910"]["Filename"]

'BDA_18a4_20181106_2_00425_05_06.png'