In [19]:
####
# Creator: Rowan Converse (rowanconverse@unm.edu)
# Date: 2022/08/02
# Purpose: Translate raw labels generated by volunteers on the participatory science platform Zooniverse into COCO format for public release 
# Ref COCO Camera Trap Standard: https://cocodataset.org/#format-data
####

#Load necessary modules
import json
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import datetime
from collections import OrderedDict 

#Load data
path = r"/Users/rowanconverse/Library/CloudStorage/OneDrive-UniversityofNewMexico/Projects/CameraTraps/Labels/Scratch/rowanraw_20221130.csv"
zooniverse = pd.read_csv(path)

In [43]:
my20 = json.loads(zooniverse.annotations[13])
my20[1]["value"][1]

{'x': 0.59912109375,
 'y': 453.5413818359375,
 'tool': 0,
 'frame': 0,
 'width': 254.98313903808594,
 'height': 248.5550537109375,
 'details': [],
 'tool_label': 'Animal'}

In [47]:
bboxes = []
#testanno = []
for i in range(len(zooniverse)):
  row = json.loads(zooniverse.annotations[i])
  if row[i]['task'] == 'T1':
    boxlist = row[j]['value']
    for k in range(len(boxlist)):
        box = boxlist[k]
        x = box["x"]
        y = box["y"]
        w = box["width"]
        h = box["height"]
        bbox = [x, y, w, h]
        bbox.append(bboxes)
  #annotation = {
  #      'annotation_id': len(bboxes)+1,
  #      'bbox': bbox
  #    }
  #testanno.append(annotation)

len(bboxes)

IndexError: list index out of range

In [44]:
###Annotations: import ID, Image ID, Category ID, bounding boxes (x,y, width, height). 
images = {}
annos = []
categories = {}

for i in range(len(zooniverse)):
  image_id = None
  subject = zooniverse.subject_ids[i]
  imgrow = json.loads(zooniverse.subject_data[i])
  for key in imgrow.keys():
      name = imgrow[key]["Filename"]
      if name not in images:
        images[name] = len(images) + 1
      image_id = images[name]
      row = json.loads(zooniverse["annotations"][i])
      for j in range(len(row)):
        if row[j]['task'] == 'T1':
            annlist = row[j]['value']
            for k in range(len(annlist)):
                ann = annlist[k]
                x = ann["x"]
                y = ann["y"]
                w = ann["width"]
                h = ann["height"]
                bbox = [x, y, w, h]
        if row[j]['task'] == 'T0': 
          labelinfo = row[j]['value']
          for k in range(len(labelinfo)):
            lbl = labelinfo[k]
            label = lbl["choice"]
            if label not in categories:
   #label class has not yet been registered; add
                  categories[label] = len(categories) + 1
                  category_id = categories[label]
      annotation = {
        'annotation_id': len(annos)+1,
        'bbox': bbox,
        'category_id': category_id,
        'class': label,
        'image_id': image_id,
        'filename': name,
        'subjectID': subject
      }
      annos.append(annotation)

In [45]:
annos

[{'annotation_id': 1,
  'bbox': [1461.9310302734375,
   498.5384521484375,
   216.414306640625,
   186.416259765625],
  'category_id': 1,
  'class': 'MULEDEER',
  'image_id': 1,
  'filename': 'IMG_0058.JPG',
  'subjectID': 83030587},
 {'annotation_id': 2,
  'bbox': [1142.66650390625,
   470.68316650390625,
   304.2655029296875,
   304.26556396484375],
  'category_id': 2,
  'class': 'PRONGHORN',
  'image_id': 2,
  'filename': 'IMG_1502.JPG',
  'subjectID': 83032065},
 {'annotation_id': 3,
  'bbox': [2.7418365478515625,
   644.2431030273438,
   201.41525268554688,
   329.97821044921875],
  'category_id': 2,
  'class': 'PRONGHORN',
  'image_id': 3,
  'filename': 'nuun1_0421.JPG',
  'subjectID': 83030430},
 {'annotation_id': 4,
  'bbox': [69.166015625, 522.1082763671875, 68.56689453125, 55.71063232421875],
  'category_id': 3,
  'class': 'COYOTE',
  'image_id': 4,
  'filename': 'IMG_0819.JPG',
  'subjectID': 83031348},
 {'annotation_id': 5,
  'bbox': [1022.6743774414062,
   669.955688476562

In [46]:
len(annos)

2073

In [26]:
for i in range(len(zooniverse)):
   row = json.loads(zooniverse["annotations"][i])
   if row[j]['task'] == 'T0': 
      labelinfo = row[j]['value']
      for k in range(len(labelinfo)):
            lbl = labelinfo[k]
            label = lbl["choice"]
            if label not in categories:
   #label class has not yet been registered; add
                  categories[label] = len(categories) + 1
                  category_id = categories[label]
      annotation = {
        'annotation_id': len(annos)+1,
        'category_id': category_id,
        'class': label,
      }
      annos.append(annotation)
annos

[{'annotation_id': 1, 'category_id': 1, 'class': 'MULEDEER'},
 {'annotation_id': 2, 'category_id': 2, 'class': 'PRONGHORN'},
 {'annotation_id': 3, 'category_id': 4, 'class': 'VEHICLE'},
 {'annotation_id': 4, 'category_id': 4, 'class': 'HUMAN'},
 {'annotation_id': 5, 'category_id': 4, 'class': 'HUMAN'},
 {'annotation_id': 6, 'category_id': 4, 'class': 'VEHICLE'},
 {'annotation_id': 7, 'category_id': 5, 'class': 'AFRICANORYX'},
 {'annotation_id': 8, 'category_id': 5, 'class': 'AFRICANORYX'},
 {'annotation_id': 9, 'category_id': 5, 'class': 'MULEDEER'},
 {'annotation_id': 10, 'category_id': 5, 'class': 'PRONGHORN'},
 {'annotation_id': 11, 'category_id': 5, 'class': 'PRONGHORN'},
 {'annotation_id': 12, 'category_id': 5, 'class': 'MULEDEER'},
 {'annotation_id': 13, 'category_id': 5, 'class': 'AFRICANORYX'},
 {'annotation_id': 14, 'category_id': 6, 'class': 'ELK'},
 {'annotation_id': 15, 'category_id': 6, 'class': 'VEHICLE'},
 {'annotation_id': 16, 'category_id': 7, 'class': 'JAVELINA'},
 {'

In [9]:
#load camera ID
camID = pd.read_csv(r'/Users/rowanconverse/Library/CloudStorage/OneDrive-UniversityofNewMexico/Projects/CameraTraps/Labels/rowanID_camerainfo')
camID.head()

Unnamed: 0.1,Unnamed: 0,image_id,filename,camera
0,0,83030012,nuun1_0546.JPG,109891
1,1,83030013,nuun1_0547.JPG,109891
2,2,83030014,nuun1_0548.JPG,109891
3,3,83030015,nuun1_0549.JPG,109891
4,4,83030016,nuun1_0550.JPG,109891


In [16]:
sev = pd.DataFrame(annos)
sev.head()

Unnamed: 0,annotation_id,bbox,category_id,class,image_id,filename,subjectID
0,1,"[735.9075317382812, 462.56317138671875, 101.02...",1,MULEDEER,1,IMG_1203.JPG,74583293
1,2,"[1568.1363525390625, 467.3737487792969, 336.73...",2,VEHICLE,2,IMG_0044.JPG,74583836
2,3,"[1517.202392578125, 336.9212951660156, 224.376...",3,AFRICANORYX,3,IMG_0606.JPG,74582696
3,4,"[1517.202392578125, 336.9212951660156, 224.376...",3,AFRICANORYX,4,IMG_1526.JPG,74583616
4,5,"[1615.500732421875, 475.8211669921875, 181.638...",3,MULEDEER,5,IMG_1334.JPG,74583424


In [17]:
addcams = pd.merge(sev, camID, how='left', left_on=["subjectID"], right_on=["image_id"])
addcams.head()

Unnamed: 0.1,annotation_id,bbox,category_id,class,image_id_x,filename_x,subjectID,Unnamed: 0,image_id_y,filename_y,camera
0,1,"[735.9075317382812, 462.56317138671875, 101.02...",1,MULEDEER,1,IMG_1203.JPG,74583293,0.0,74583293.0,IMG_1203.JPG,103602.0
1,2,"[1568.1363525390625, 467.3737487792969, 336.73...",2,VEHICLE,2,IMG_0044.JPG,74583836,1.0,74583836.0,IMG_0044.JPG,
2,3,"[1517.202392578125, 336.9212951660156, 224.376...",3,AFRICANORYX,3,IMG_0606.JPG,74582696,2.0,74582696.0,IMG_0606.JPG,103602.0
3,4,"[1517.202392578125, 336.9212951660156, 224.376...",3,AFRICANORYX,4,IMG_1526.JPG,74583616,3.0,74583616.0,IMG_1526.JPG,103602.0
4,5,"[1615.500732421875, 475.8211669921875, 181.638...",3,MULEDEER,5,IMG_1334.JPG,74583424,4.0,74583424.0,IMG_1334.JPG,103602.0


In [27]:
cleancams = addcams.drop(['image_id_y', 'filename_y', 'Unnamed: 0'], axis=1)
cleancams = cleancams.rename({'image_id_x': 'imageID', 'filename_x': 'filename'}, axis=1)
cleancams.head()

Unnamed: 0,annotation_id,bbox,category_id,class,imageID,filename,subjectID,camera
0,1,"[735.9075317382812, 462.56317138671875, 101.02...",1,MULEDEER,1,IMG_1203.JPG,74583293,103602.0
1,2,"[1568.1363525390625, 467.3737487792969, 336.73...",2,VEHICLE,2,IMG_0044.JPG,74583836,
2,3,"[1517.202392578125, 336.9212951660156, 224.376...",3,AFRICANORYX,3,IMG_0606.JPG,74582696,103602.0
3,4,"[1517.202392578125, 336.9212951660156, 224.376...",3,AFRICANORYX,4,IMG_1526.JPG,74583616,103602.0
4,5,"[1615.500732421875, 475.8211669921875, 181.638...",3,MULEDEER,5,IMG_1334.JPG,74583424,103602.0


In [29]:
cleancams.camera.value_counts()

103602.0    24907
106190.0     1375
Name: camera, dtype: int64

In [32]:
cleancams.to_csv("sev_ann_cam.csv", sep=',')

In [35]:
newpath = r"/Users/rowanconverse/Library/CloudStorage/OneDrive-UniversityofNewMexico/Projects/CameraTraps/Labels/sevilleta_clean.csv"
clean = pd.read_csv(newpath)
clean = clean.drop('Unnamed: 8', axis=1)
clean.head()

Unnamed: 0,annotation_id,bbox,category_id,class,imageID,filename,subjectID,camera
0,1,"[735.9075317382812, 462.56317138671875, 101.02...",1,MULEDEER,1,IMG_1203.JPG,74583293,Nuun2
1,3,"[1517.202392578125, 336.9212951660156, 224.376...",3,AFRICANORYX,3,IMG_0606.JPG,74582696,Nuun2
2,4,"[1517.202392578125, 336.9212951660156, 224.376...",3,AFRICANORYX,4,IMG_1526.JPG,74583616,Nuun2
3,5,"[1615.500732421875, 475.8211669921875, 181.638...",3,MULEDEER,5,IMG_1334.JPG,74583424,Nuun2
4,6,"[10.168608665466309, 115.3065185546875, 816.50...",3,MULEDEER,6,IMG_0087.JPG,78164412,106190


In [36]:
clean.to_csv("clean_sevilleta.csv", sep=',')

In [31]:
with open("sev_annos_cams.json", "w") as outfile:
    json.dump(cleancams, outfile)

TypeError: Object of type DataFrame is not JSON serializable