Analysis of Consensus of image labels generated by USFWS Biologists from Bosque del Apache and Maxwell NWRs
Rowan Converse
Start Date: 2023 - 03 - 07

In [1]:
#Imports
import pandas as pd
import numpy as np
import ast

import sklearn.metrics

from shapely.geometry import Polygon,Point
import matplotlib.pyplot as plt
import shapely
import cv2 as cv
import os
import gc

In [3]:
#Data Loading

#Originals 
orig_path = "/Users/rowanconverse/Library/CloudStorage/OneDrive-UniversityofNewMexico/CV4Ecology/Prototyping/Data/Labels/coco/labelbox.csv"
with open(orig_path) as f:
  originals = pd.read_csv(f)
  originals = originals[originals["filename"] != "BDA_24C_20181107_1.JPG"]
  originals['bbox'] = originals['bbox'].apply(ast.literal_eval)

#Analysis annotations
path = "/Users/rowanconverse/Library/CloudStorage/OneDrive-UniversityofNewMexico/Dissertation/1_Chapter/consensus/data/expert/20230307_expertanalysislabels_spponly.csv"
with open(path) as f:
  df = pd.read_csv(f)
  df = df[df["filename"] != "BDA_24C_20181107_1.JPG"]
  #df['bbox_orig'] = df['bbox_orig'].apply(ast.literal_eval)
  #df['bbox_refined'] = df['bbox_refined'].apply(ast.literal_eval)

#Consensus annotations
refinedpath = "/Users/rowanconverse/Library/CloudStorage/OneDrive-UniversityofNewMexico/Dissertation/1_Chapter/consensus/data/expert/expertconsensus_superclass.csv"
with open(refinedpath) as f:
  ref = pd.read_csv(f)
  ref['bbox'] = ref['bbox'].apply(ast.literal_eval)


In [9]:
ref.head()

Unnamed: 0,label_id,filename,bbox,class_id
0,0,BDA_12C_20181127_1.JPG,"[4445.5, 2719.5, 95.0, 80.5]",2
1,1,BDA_12C_20181127_1.JPG,"[4312.5, 2739.5, 98.0, 44.0]",2
2,2,BDA_12C_20181127_1.JPG,"[3725.5, 1779.0, 73.5, 70.5]",2
3,3,BDA_12C_20181127_1.JPG,"[3628.0, 1882.0, 92.0, 38.0]",2
4,4,BDA_12C_20181127_1.JPG,"[3679.0, 1929.0, 65.0, 82.0]",2


In [10]:
def calc_area(row):
    bbox = row['bbox']
    xmin, ymin, w, h = bbox
    return w * h

ref['area'] = ref.apply(calc_area, axis=1)

In [12]:
ref.groupby("class_id")["area"].mean()

class_id
1    16238.500000
2     3719.692857
3     2598.333601
Name: area, dtype: float64

In [3]:
#Calculating IOU for each individual box in the dataset to make a per-label score
from shapely.geometry import box

def eval_bbox(row, col_name):
    bbox_str = row[col_name]
    if pd.notnull(bbox_str):
        bbox = np.array(ast.literal_eval(bbox_str))
        bbox = bbox.astype(float)
    else:
        bbox = np.array([np.nan, np.nan, np.nan, np.nan])
    return bbox


# Define a function to calculate the IOU only if both bounding boxes are non-null
def calculate_iou(row):
    bbox_orig = eval_bbox(row, 'bbox_orig')
    bbox_ref = eval_bbox(row, 'bbox_refined')
    if np.isnan(bbox_orig[0]) or np.isnan(bbox_orig[1]) or np.isnan(bbox_orig[2]) or np.isnan(bbox_orig[3]) or \
        np.isnan(bbox_ref[0]) or np.isnan(bbox_ref[1]) or np.isnan(bbox_ref[2]) or np.isnan(bbox_ref[3]):
        iou = None
    else:
        bbox_orig = box(bbox_orig[0], bbox_orig[1], bbox_orig[0] + bbox_orig[2], bbox_orig[1] + bbox_orig[3])
        bbox_ref = box(bbox_ref[0], bbox_ref[1], bbox_ref[0] + bbox_ref[2], bbox_ref[1] + bbox_ref[3])
        iou = bbox_orig.intersection(bbox_ref).area / bbox_orig.union(bbox_ref).area
    return iou

# Apply the function to each row of the DataFrame and save the results in a new column
df['IOU'] = df.apply(calculate_iou, axis=1)
df.head()

Unnamed: 0.1,Unnamed: 0,annotation_ID,bbox_orig,filename,labeler,cat_orig,cluster_id,cat_refined,bbox_refined,IOU
0,0,1,"[4428, 2707, 125, 103]",BDA_12C_20181127_1.JPG,steven_sesnie@fws.gov,Canadian Goose,0,Canadian Goose,"[4445.5, 2719.5, 95.0, 80.5]",0.593981
1,1,2,"[4308, 2731, 105, 67]",BDA_12C_20181127_1.JPG,steven_sesnie@fws.gov,Canadian Goose,1,Canadian Goose,"[4312.5, 2739.5, 98.0, 44.0]",0.612935
2,2,3,"[3707, 1761, 110, 101]",BDA_12C_20181127_1.JPG,steven_sesnie@fws.gov,Canadian Goose,2,Canadian Goose,"[3725.5, 1779.0, 73.5, 70.5]",0.466404
3,3,4,"[3628, 1882, 90, 38]",BDA_12C_20181127_1.JPG,steven_sesnie@fws.gov,Canadian Goose,3,Canadian Goose,"[3628.0, 1882.0, 92.0, 38.0]",0.978261
4,4,5,"[3669, 1927, 69, 82]",BDA_12C_20181127_1.JPG,steven_sesnie@fws.gov,Canadian Goose,4,Canadian Goose,"[3679.0, 1929.0, 65.0, 82.0]",0.753031


In [8]:
df["IOU"].mean()

0.6380874298901835

In [13]:
df["cat_orig"].unique()

array(['Canadian Goose', 'Sandhill Crane', 'Mallard', 'Northern Pintail',
       'Northern Shoveler', 'Teal', 'Gadwall', 'Ringneck',
       'American Wigeon', 'Other', 'Ruddy', 'Readhead', 'Snow Goose'],
      dtype=object)

In [14]:
#map superclasses
mapping = {'Canadian Goose': 'Goose',
           'Sandhill Crane': 'Crane',
           'Mallard': 'Duck',
           'Northern Pintail': 'Duck',
           'American Wigeon': 'Duck',
           'Ringneck': 'Duck',
           "Ruddy": 'Duck',
           "Readhead": "Duck",
           "Snow Goose": "Goose",
           'Other': 'Other',
           'Teal': 'Duck',
           'Gadwall': 'Duck',
           'Northern Shoveler': 'Duck'}
df["orig_superclass"] = df["cat_orig"].map(mapping)
df.head()

Unnamed: 0.1,Unnamed: 0,annotation_ID,bbox_orig,filename,labeler,cat_orig,cluster_id,cat_refined,bbox_refined,IOU,superclass,orig_superclass
0,0,1,"[4428, 2707, 125, 103]",BDA_12C_20181127_1.JPG,steven_sesnie@fws.gov,Canadian Goose,0,Canadian Goose,"[4445.5, 2719.5, 95.0, 80.5]",0.593981,Goose,Goose
1,1,2,"[4308, 2731, 105, 67]",BDA_12C_20181127_1.JPG,steven_sesnie@fws.gov,Canadian Goose,1,Canadian Goose,"[4312.5, 2739.5, 98.0, 44.0]",0.612935,Goose,Goose
2,2,3,"[3707, 1761, 110, 101]",BDA_12C_20181127_1.JPG,steven_sesnie@fws.gov,Canadian Goose,2,Canadian Goose,"[3725.5, 1779.0, 73.5, 70.5]",0.466404,Goose,Goose
3,3,4,"[3628, 1882, 90, 38]",BDA_12C_20181127_1.JPG,steven_sesnie@fws.gov,Canadian Goose,3,Canadian Goose,"[3628.0, 1882.0, 92.0, 38.0]",0.978261,Goose,Goose
4,4,5,"[3669, 1927, 69, 82]",BDA_12C_20181127_1.JPG,steven_sesnie@fws.gov,Canadian Goose,4,Canadian Goose,"[3679.0, 1929.0, 65.0, 82.0]",0.753031,Goose,Goose


In [7]:
df.groupby("cat_refined")["IOU"].mean()

cat_refined
American Wigeon      0.667285
Canadian Goose       0.616649
Gadwall              0.665799
Mallard              0.630587
Northern Pintail     0.685820
Northern Shoveler    0.641286
Other                0.621356
Sandhill Crane       0.683264
Teal                 0.693175
Name: IOU, dtype: float64

In [15]:
# Group the dataframe by image, then by cluster
grouped = df.groupby(['filename', 'cluster_id'])

# Create empty lists to store the results
cluster_id_list = []
filename_list = []
consensus_class_id_list = []
pielou_index_list = []

# Loop through each group and calculate Pielou's evenness index
for name, group in grouped:
    # Get the cluster ID, filename, and consensus class ID for this group
    cluster_id = name[1]
    filename = name[0]
    consensus_class_id = group['superclass'].iloc[0]  # Assumes all consensus IDs in the group are the same
    
    # Count the number of annotations in the group
    num_annotations = len(group)
    
    # Count the number of annotations for each original class ID
    class_counts = group.groupby('orig_superclass').size().values
    
    # Calculate the relative abundance of each original class ID
    relative_abundance = class_counts / num_annotations
    
    # Calculate the evenness index using Pielou's formula
    evenness_index = -np.sum(relative_abundance * np.log(relative_abundance)) / np.log(len(relative_abundance))
    
    # Append the results to the lists
    cluster_id_list.append(cluster_id)
    filename_list.append(filename)
    consensus_class_id_list.append(consensus_class_id)
    pielou_index_list.append(evenness_index)

# Create a new dataframe with the results
pielou = pd.DataFrame({
    'cluster_id': cluster_id_list,
    'filename': filename_list,
    'consensus_class_ID': consensus_class_id_list,
    'pielou_index': pielou_index_list
})

  evenness_index = -np.sum(relative_abundance * np.log(relative_abundance)) / np.log(len(relative_abundance))
  evenness_index = -np.sum(relative_abundance * np.log(relative_abundance)) / np.log(len(relative_abundance))
  evenness_index = -np.sum(relative_abundance * np.log(relative_abundance)) / np.log(len(relative_abundance))
  evenness_index = -np.sum(relative_abundance * np.log(relative_abundance)) / np.log(len(relative_abundance))
  evenness_index = -np.sum(relative_abundance * np.log(relative_abundance)) / np.log(len(relative_abundance))
  evenness_index = -np.sum(relative_abundance * np.log(relative_abundance)) / np.log(len(relative_abundance))
  evenness_index = -np.sum(relative_abundance * np.log(relative_abundance)) / np.log(len(relative_abundance))
  evenness_index = -np.sum(relative_abundance * np.log(relative_abundance)) / np.log(len(relative_abundance))
  evenness_index = -np.sum(relative_abundance * np.log(relative_abundance)) / np.log(len(relative_abundance))
  evenness

In [17]:
pielou.groupby("consensus_class_ID")["pielou_index"].mean()

consensus_class_ID
Crane    0.503258
Duck     0.626674
Goose    0.509647
Other    0.982293
Name: pielou_index, dtype: float64

In [10]:
result_df.groupby("consensus_class_ID")["pielou_index"].mean()

consensus_class_ID
American Wigeon      0.874592
Canadian Goose       0.510691
Gadwall              0.913858
Mallard              0.684334
Northern Pintail     0.763596
Northern Shoveler    0.923912
Other                0.916291
Sandhill Crane       0.503258
Teal                 0.890206
Name: pielou_index, dtype: float64

In [9]:
df.groupby(["labeler"])["bbox_orig"].count()

labeler
andrew_stetter@fws.gov             1145
barry_wilson@fws.gov               2315
bill_johnson@fws.gov               1983
dan_collins@fws.gov                1395
david.butler@tpwd.texas.gov        2747
jeff_sanchez@fws.gov               1791
jena_moon@fws.gov                  1076
john_vradenburg@fws.gov            1426
josh_vest@fws.gov                   508
jude_smith@fws.gov                 1844
kammie_kruse@fws.gov                960
mbrasher@ducks.org                  485
ronald_deroche@fws.gov              485
stephen.mcdowell@tpwd.texas.gov    1096
steven_sesnie@fws.gov                68
Name: bbox_orig, dtype: int64

In [10]:
ref.groupby(["category"])["bbox"].count()

category
American Wigeon                                                         22
Canadian Goose                                                         140
Gadwall                                                                  5
Mallard                                                               1688
Northern Pintail                                                       262
Northern Shoveler                                                        2
Other                                                                   70
Sandhill Crane                                                          52
Teal                                                                     2
['American Wigeon' 'Gadwall' 'Mallard' 'Northern Pintail' 'Other']       1
['American Wigeon' 'Gadwall' 'Mallard' 'Other' 'Teal']                   1
['American Wigeon' 'Gadwall' 'Teal']                                     2
['American Wigeon' 'Mallard' 'Northern Pintail' 'Other' 'Teal']          9
['American Wigeo

In [19]:
spplist = list(originals["category"].unique())
spplist

['Canadian Goose',
 'Sandhill Crane',
 'Mallard',
 'Northern Pintail',
 'Northern Shoveler',
 'Teal',
 'American Wigeon',
 'Gadwall',
 'Ringneck',
 'Ruddy',
 'Readhead',
 'Other',
 'Snow Goose']

In [20]:
class_dict = {'Canadian Goose': "Goose",
 'Sandhill Crane': "Crane",
 'Mallard':"Duck",
 'Northern Pintail': "Duck",
 'Northern Shoveler': "Duck",
 'Teal': "Duck",
 'American Wigeon': "Duck",
 'Gadwall': "Duck",
 'Ringneck': "Duck",
 'Ruddy': "Duck",
 'Readhead': "Duck",
 'Other': "Other",
 'Snow Goose': "Goose"}

In [23]:
df["class_refined"] = df["cat_refined"].map(class_dict)

In [34]:
df['agree'] = 'No'
df.loc[df['class_orig'] == df["class_refined"], 'agree'] = 'Yes'


labeler                          class_refined  agree
andrew_stetter@fws.gov           Crane          Yes       43
                                 Duck           Yes      840
                                 Goose          Yes      100
                                 Other          No        57
                                                Yes        1
                                                        ... 
stephen.mcdowell@tpwd.texas.gov  Other          Yes        3
                                                No         2
steven_sesnie@fws.gov            Crane          Yes        9
                                 Duck           Yes       14
                                 Goose          Yes       43
Name: agree, Length: 82, dtype: int64

In [43]:
agreement = df.groupby(["labeler","class_refined"])["agree"].value_counts()

In [50]:
savepath = "/Users/rowanconverse/Library/CloudStorage/OneDrive-UniversityofNewMexico/Dissertation/1_Chapter/consensus/data/expert/"
#exportanalysis = datetime.now().strftime('%Y%m%d_zooniverseanalysislabels_seagull_nodrops.csv')
agreement.to_csv(savepath+"agreementsuperclass.csv")

In [8]:
missing = df[df["cluster_id"] == -1]
len(missing)

1238

In [10]:
clusters = df.groupby("filename")["bbox_orig","cluster_id","labeler"].nunique()
c = clusters.reset_index()
c["expected"] = c["bbox_orig"]/c["labeler"]
c["diff"] = c["cluster_id"]/c["expected"]
clusters.to_csv()
#pd.eval("c['diff'] > 1.1 or c['diff'] < 0.9").sum()

  clusters = df.groupby("filename")["bbox_orig","cluster_id","labeler"].nunique()


Unnamed: 0_level_0,bbox_orig,cluster_id,labeler
filename,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
BDA_12C_20181127_1.JPG,841,86,10
BDA_12C_20181127_2.JPG,3817,489,8
BDA_12C_20181127_3.JPG,4867,793,6
BDA_18A4_20181106_1.JPG,1622,164,10
BDA_18A4_20181106_2.JPG,3497,356,10
BDA_18A4_20181106_3.JPG,438,63,7
BDA_18A4_20181106_4.JPG,1145,116,10
BDA_18A4_20181107_1.JPG,780,79,10
BDA_18A4_20181107_2.JPG,201,27,7
BDA_18A4_20181107_3.JPG,895,90,10


In [37]:
savepath = "/Users/rowanconverse/Library/CloudStorage/OneDrive-UniversityofNewMexico/Dissertation/1_Chapter/consensus/data/expert/"
#exportanalysis = datetime.now().strftime('%Y%m%d_zooniverseanalysislabels_seagull_nodrops.csv')
clusters.to_csv(path+"clustercheck.csv")

In [12]:
def minmax(val_list):
    min_val = min(val_list)
    max_val = max(val_list)

    return (min_val, max_val)

print(minmax(c["diff"]))
print(c["diff"].std())
print(c["diff"].var())

(0.9402985074626865, 1.0248886560125754)
0.02409051426821127
0.0005803528777068907


In [8]:
len(df["cat_orig"])

19324

In [16]:
import sklearn.metrics
#ground_truth = ["cat", "cat", "others", "cat", "others", "others","cat", "cat", "cat", "others" ]
#prediction = ["cat", "others", "cat", "cat", "others", "others","cat", "others", "others", "cat"]
pred = df["cat_orig"].astype(str)
ground = df["cat_refined"].astype(str)

#confusion_matrix = sklearn.metrics.multilabel_confusion_matrix(ground, pred)
confusion_matrices = sklearn.metrics.multilabel_confusion_matrix(ground, pred)
#for confusion_matrix in confusion_matrices:
##    disp = sklearn.metrics.ConfusionMatrixDisplay(confusion_matrix, display_labels=ground)
#    disp.plot(include_values=True, cmap="viridis", ax=None, xticks_rotation="vertical")
#    plt.show()
#accuracy = sklearn.metrics.accuracy_score(ground, pred) 
#precision = sklearn.metrics.precision_score(ground, pred)
#recall = sklearn.metrics.recall_score(ground, pred)
#F1_score = sklearn.metrics.f1_score(ground, pred) 
#print({"Accuracy":accuracy,"Precision":precision,"Sensitivity_recall": recall, "Specificity": precision,"F1_score": F1_score})

#cm_display = sklearn.metrics.ConfusionMatrixDisplay(confusion_matrix = confusion_matrix)
#cm_display.plot()
#plt.show() 

In [12]:
#Cluster reliability: Group by filename, labeler-- count the number of original bounding boxes per labeler, get range/variance
df['agree'] = 'No'
df.loc[df['cat_orig'] == df["cat_refined"], 'agree'] = 'Yes'
df.groupby("labeler")["agree"].value_counts()

labeler                          agree
andrew_stetter@fws.gov           Yes       949
                                 No        196
barry_wilson@fws.gov             Yes      1669
                                 No        646
bill_johnson@fws.gov             Yes      1497
                                 No        486
dan_collins@fws.gov              Yes      1069
                                 No        326
david.butler@tpwd.texas.gov      Yes      1873
                                 No        874
jeff_sanchez@fws.gov             Yes      1460
                                 No        331
jena_moon@fws.gov                No        590
                                 Yes       486
john_vradenburg@fws.gov          Yes      1205
                                 No        221
josh_vest@fws.gov                Yes       497
                                 No         11
jude_smith@fws.gov               No       1072
                                 Yes       772
kammie_kruse@fws.gov 

In [13]:
df.groupby(["cat_refined"])["agree"].value_counts()

cat_refined        agree
American Wigeon    Yes         74
                   No          57
Canadian Goose     Yes       1119
                   No          74
Gadwall            No          22
                   Yes         17
Mallard            Yes      10745
                   No        2179
Northern Pintail   Yes       1643
                   No         366
Northern Shoveler  No           7
                   Yes          6
Other              No         263
                   Yes        235
Sandhill Crane     Yes        441
                   No           1
Teal               Yes          7
                   No           6
Name: agree, dtype: int64

In [15]:
df['duck_agree'] = 'No'
df.loc[df['class_orig'] == df["class_refined"], 'duck_agree'] = 'Yes'
df.groupby("labeler")["duck_agree"].value_counts()

19324

In [40]:
range = df.groupby(['filename', 'labeler']).size()
r = range.reset_index()

r.rename( columns={0 :'count'}, inplace=True )
r.groupby("filename")["count"].apply(minmax)

filename
BDA_12C_20181127_1.JPG        (68, 93)
BDA_12C_20181127_2.JPG      (288, 570)
BDA_12C_20181127_3.JPG     (220, 1043)
BDA_18A4_20181106_1.JPG     (159, 165)
BDA_18A4_20181106_2.JPG     (300, 367)
BDA_18A4_20181106_3.JPG       (62, 66)
BDA_18A4_20181106_4.JPG     (113, 117)
BDA_18A4_20181107_1.JPG       (75, 83)
BDA_18A4_20181107_2.JPG       (26, 36)
BDA_18A4_20181107_3.JPG       (88, 91)
BDA_18A4_20181107_4.JPG       (83, 86)
mxw_L13_20181215_1.JPG        (38, 39)
Name: count, dtype: object

In [57]:
#Set supercategories for taxa, export to separate CSV
#spplist = ['Canadian Goose', 'Sandhill Crane', 'Mallard','Northern Pintail','American Wigeon','Other','Teal','Gadwall','Northern Shoveler']
ref['class'] = 'Duck'
ref.loc[ref['category'] == "Canadian Goose", 'class'] = 'Goose'
ref.loc[ref['category'] == "Sandhill Crane", 'class'] = 'Crane'
ref['class'].value_counts()

Duck     2183
Goose     140
Crane      52
Name: class, dtype: int64

In [23]:
#Sa'doun's confusion matrix code
def iou(ground, pred):
    x1 = ground["bbox"][0]
    y1 = ground["bbox"][1]
    w1 = ground["bbox"][2]
    h1 = ground["bbox"][3]
    
    x2 = pred["bbox_orig"][0]
    y2 = pred["bbox_orig"][1]
    w2 = pred["bbox_orig"][2]
    h2 = pred["bbox_orig"][3]

    w_intersection = min(x1 + w1, x2 + w2) - max(x1, x2)
    h_intersection = min(y1 + h1, y2 + h2) - max(y1, y2)
    if w_intersection <= 0 or h_intersection <= 0: # No overlap
        return 0
    I = w_intersection * h_intersection
    U = w1 * h1 + w2 * h2 - I # Union = Total Area - I
    return I / U

In [None]:
def class_cm(ground,pred,iou):
  
  classes = ground['category'].unique()
  cm=np.zeros((len(classes), len(classes)+2))

  for i in range(len(pred)):
    iou_list = []
    class_list=[]
    x=ground.loc[ground['filename'] == pred.iloc[i][0]]
    for j in range(len(x)):
      iou_list.append(iou(pred.iloc[i][1],pred.iloc[i][2],pred.iloc[i][3]-pred.iloc[i][1],pred.iloc[i][4]-pred.iloc[i][2],x.iloc[j][1],x.iloc[j][2],x.iloc[j][3]-x.iloc[j][1],x.iloc[j][4]-x.iloc[j][2]))
      class_list.append(x.iloc[j][5])
    if iou_list == []:
      continue
    if max(iou_list) == 0:
      cm[int(pred.iloc[i]['class'])][-1]+=1
    else:
      cm[class_list[iou_list.index(max(iou_list))]][int(pred.iloc[i]['class'])]+=1

  for i in range(len(cm)):
    cm[i][-2]=len(ground.loc[ground['class'] == i])- np.sum(cm[i][:-1])
    
  return cm

In [31]:
#Confusion matrix for bounding boxes solution from: https://towardsdatascience.com/confusion-matrix-and-object-detection-f0cbcb634157

def evaluation(ground,pred,iou_value):
  """
  ground= array of ground-truth contours.
  preds = array of predicted contours.
  iou_value= iou treshold for TP and otherwise.
  """
  truth=np.squeeze(ground)
  preds=np.squeeze(pred)
  #we will use this function to check iou less than threshold
  def CheckLess(list1,val):
    return(all(x<=val for x in list1))

  # Using predicted output as the reference
  prob1=[]
  for i in range(len(preds)):
      f1=preds[i]
      # define a Shapely polygone for prediction i
      f1=shapely.geometry.Polygon(f1)
      # determine the radius
      f1_radius=np.sqrt((f1.area)/np.pi)
      #buffer the polygon fromt the centroid
      f1_buffered=shapely.geometry.Point(f1.centroid).buffer(f1_radius*500)
      cont=[]
      for i in range(len(truth)):
        ff=shapely.geometry.Polygon(np.squeeze(truth[i]))
        if f1_buffered.contains(ff)== True:
          iou=(ff.intersection(f1).area)/(ff.union(f1).area)  
       
          cont.append((iou))

      prob1.append(cont)

  fp=0

  for t in prob1:
    if CheckLess(t,iou_value)==True:
      fp=fp+1
    
  prob2=[]
  #loop through each groun truth instance 
  for i in range(len(truth)):
      f1=truth[i]
      f1=shapely.geometry.Polygon(f1)
      #find radius
      f1_radius=np.sqrt((f1.area)/np.pi)
      #buffer the polygon from the centroid
      f1_buffered=shapely.geometry.Point(f1.centroid).buffer(f1_radius*500)
      cont=[]
      # merge up the ground truth instance against prediction
      # to determine the IoU
      for i in range(len(preds)):
        ff=shapely.geometry.Polygon(np.squeeze(preds[i]))
        if f1_buffered.contains(ff)== True:
          #calculate IoU
          iou=(ff.intersection(f1).area)/(ff.union(f1).area)
          cont.append((iou))
      # probability of a given prediction to be contained in a
      # ground truth instance
      prob2.append(cont)
  fn=0
  tp=0
  for t in prob2:
    if np.sum(t)==0:
      fn=fn+1
    elif CheckLess(t,iou_value)==False:
      tp=tp+1
  
  #lets add this section just to print the results
  print("TP:",tp,"\t FP:",fp,"\t FN:",fn,"\t GT:",truth.shape[0])
  precision=round(tp/(tp+fp),3) 
  recall=round(tp/(tp+fn),3)
  f1= round(2*((precision*recall)/(precision+recall)),3)
  print("Precall:",precision,"\t Recall:",recall, "\t F1 score:",f1)
  
  return tp,fp,fn,precision,recall,f1

In [None]:
#OUTPUTS

#Line graph of average identification consensus across all images (y; % agreement) by individual (x). One for expert, one for crowdsourced.  
#Bar graph of consensus (y; % agreement) by morphology (x; duck/goose/crane). Two bars per class, one for expert, one for crowdsourced
#Line graph of average count consensus across all images (y; % agreement) by individual (x). One for expert, one for crowdsourced.  
#Line graph of consensus (y; % agreement) by density (x; consensus # of individuals per image). One line for expert, one line for crowdsourced.
