### In this module we will run YOLO on our test set and analyze its outputs

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
import os
from ultralytics import YOLO
import torch
import migKeyPoint.utils.YAMLtools as yt

In [None]:
conf = yt.load_configuration('../master_configuration.yaml')['yoloConf']

In [None]:
conf

### Load test set

**In this document conf['project_dir'] will be the parent directory for most things**

In [None]:
'''Calling our testset dataframe df'''
df = pd.read_feather(conf['project_dir']+"/data/test%s.feather"%(conf['suffix']))

# Now let's evaluate YOLO on the test images

After training yolo it will create a directory called *runs/*. You'll need to navigate through runs and find the
desired model you trained. In the example below *train13/* is the directory that we're using. The *weights/* subdirectory contains two files, one called 'best' and the other called 'last'. 'best' is the saved weights file during the best performing epoch of training. It's typically best to use this file.

In [None]:
'''You may need to manually set the weights path. If you do multiple trainings in the same project
ultralytics will append version numbers to the directories where the weights are located...for instance
train2/ train3/ train/50, etc'''

from PIL import Image

#Directory of the test PNG images we created is determined by conf['suffix']

testpath = conf['project_dir']+'/datasets/test%s/images/'%(conf['suffix'])

#Create a sorted list of PIL images to pass into YOLO
images = [Image.open(testpath+val) for val in sorted(os.listdir(testpath))]
print('Performing YOLO inference\n')

#Load trained YOLO model weights. Where we look will depend on if the model is pretrained or not
if len(conf['pretrained_model_path']) != 0:
    try:
        weights_file = conf['pretrained_model_path']+'/best.pt'
        model = YOLO(weights_file)
        print(f"Found weights file at {weights_file}")
    except:
        print(f"Weights file not found at {weights_file} trying {conf['pretrained_model_path']}")
        try:
            weights_file = conf['pretrained_model_path']
            model = YOLO(weights_file)
            print(f"Found weights file at {conf['pretrained_model_path']}")
        except:
            raise OSError(f"Couldn't find a model at {weights_file}. Did you mean to use a pretrained_model path?")
else:
    weights_file = conf['project']+'/train/weights/best.pt'
    model = YOLO(weights_file)

#Perform YOLO inference on all images
results = model.predict(images,batch = len(images),verbose = False, workers = 1,imgsz=512,rect=True)
print("DONE")

### Let's take a closer look at the results file before analyzing

In [None]:
#The length of the results list should always be the same as the test dataframe
print(len(results),len(df))

#Looking through the results list, each entry has a boxes and keypoints object
results

In [None]:
#Now lets look at boxes and keypoints for the first image
print(results[0].boxes)

In [None]:
#The above shows several options for boxes. The xyxyn format gives [xmin,ymin,xmax,ymax] of the bounding box
#normalized to 1. This is what we should use so we can upscale the box to the 2048 x 1152 image

results[0].boxes.xyxyn.cpu().numpy()

In [None]:
#It's possible that there can be more than one bounding box per image, let's check if any images identified more than
#one track. If they do, any additionalbounding boxes beyond the first would be a false positive

for i,res in enumerate(results):
    if len(res.boxes.xyxyn) > 1:
        print(i)

### Now lets take a look at the keypoints

In [None]:
results[0].keypoints

In [None]:
#the .xyn field contains the normalized keypoint coordinates so lets use these so we can scale them up to 2048 x 1152
#Let's quickly check if all 9 keypoints are found in each image

for i,res in enumerate(results):
    if len(res.keypoints.xyn[0]) != conf['maxNumKeyPoints']:
        print(i)

# Lets aggregate our keypoint and bounding box predictions into a dataframe so we can quantitatively compare them with truth

In [None]:
### The columns of interest are 'class_index', 'xBB', 'yBB', 'width',
### 'height', 'pkx', 'pky'; k \in [0,...,maxNumKeyPoints]
### We need to convert these back to the aspect ratio of the camera

xcols = ['xBB', 'width'] + ['p%sx'%(i) for i in range(0,conf['maxNumKeyPoints'])]
ycols = ['yBB', 'height'] + ['p%sy'%(i) for i in range(0,conf['maxNumKeyPoints'])]

for col in xcols:
    df[col] = df[col]*conf['cameraX']

for col in ycols:
    df[col] = df[col]*conf['cameraY']

### Our bounding box dimensions are [xmin,xmax,ymin,ymax] so lets make columns for these in our test dataframe too

In [None]:
'''Bounding box perimeters'''
df['xmin'] = np.round(df['xBB']-df['width']/2).astype('int')
df['xmax'] = np.round(df['xBB']+df['width']/2).astype('int')

df['ymin'] = np.round(df['yBB']-df['height']/2).astype('int')
df['ymax'] = np.round(df['yBB']+df['height']/2).astype('int')

### Now let's compile the YOLO results

In [None]:
'''We compile results into track-indexed data but will store the frame index so we can link back
to data indexed by image frame. Since we only had one track per frame in this example, there will be a one-to-one
correspondence between frame-indexed and track-indexed data. That will not be the case in general. 
The code below handles the general case.'''

yolo = pd.DataFrame() #YOLO results dataframe
xmin = []
xmax = []
ymin = []
ymax = []
pred = [] #class prediction, should be 0 (ER) for each event in this sample
prob = [] #class prediction confidence score between 0 and 1. 1 is most confident, 0 means the model has no idea
frameIndex = []
coords = {}

#Fill coords dictionary with lists of each keypoint
for i in range(0,conf['maxNumKeyPoints']):
    coords[i] = []
for i,res in enumerate(results):
    boxes = res.boxes.xyxyn.cpu().numpy() #tensor of all of the boxes converted to 4 x Nboxes numpy array
    points = res.keypoints.xyn.cpu().numpy() #tensor of all sets of key points as (1 x 2) x NkeyPointSets array 
    data = res.boxes.data.cpu().numpy() #to get class prediction and class confidence score
    for datum, box, point in zip(data,boxes,points): #loop through all boxes and sets of key points in frame i
        frameIndex.append(i) #frame index
        xmin.append(box[0]*conf['cameraX'])
        ymin.append(box[1]*conf['cameraY'])
        xmax.append(box[2]*conf['cameraX'])
        ymax.append(box[3]*conf['cameraY'])
        pred.append(datum[5])
        prob.append(datum[4])
        #grab each of the N key points to put into the coords[j] list these are still normalized
        for j,p in enumerate(point):
            coords[j].append(p)

In [None]:
'''Now we populate the yolo results dataframe'''
yolo['frame'] = frameIndex #frame number
yolo['xmin'] = xmin
yolo['xmax'] = xmax
yolo['ymin'] = ymin
yolo['ymax'] = ymax
yolo['pred'] = pred
yolo['prob'] = prob
for i in range(0,conf['maxNumKeyPoints']):
    yolo['p%s'%(i)] = coords[i]

In [None]:
'''Now lets change the pj"s to pjx and pjy and also convert to resolution of the camera'''
# Initialize an empty dictionary to hold the new columns
new_columns = {}

# Iterate over each of the keypoint columns in the DataFrame
for col in yolo.columns[int(-1*conf['maxNumKeyPoints']):]:
    # Extract x and y components from each column
    yolo[[f'{col}x', f'{col}y']] = pd.DataFrame(yolo[col].tolist(), index=yolo.index)
    # Drop the original column
    yolo.drop(columns=[col], inplace=True)
    
'''Scale pjx and pjy to aspect ratio of images'''
for i in range(0,conf['maxNumKeyPoints']):
    yolo['p%sx'%(i)] *= conf['cameraX']
    yolo['p%sy'%(i)] *= conf['cameraY']

### Let's compare YOLO's output to our original test set

In [None]:
#Test set (truth)
len(df)

In [None]:
#Test set (truth)
len(yolo)

In [None]:
'''From the two cells above we see that YOLO predicted two more tracks than there actually are so we need to
handle these properly. On real data we won"t know how many tracks there actually are per frame, so a good way to
handle this is to group YOLO"s output by frame number (this is the index of our truth test set) and then aggregate
the contents of YOLO"s output'''

grp = yolo.groupby('frame').agg(list).reset_index()
grp

In [None]:
'''grp is the output of grouping YOLO"s output and then aggregating. You can see that the difference between
grp and yolo is that everything is now in a list and the length of grp now matches the length of our
truth test set (df). It"s important to note that if we had any false negatives, the length of grp would be 
shorter than the length of our test set. This is because each frame in the test set has one track track.'''

#Now let's check the frames with false positives

'''This statement grabs all events where the length of the list of predictions is greater than 1, i.e.
predictions with two tracks when we know in actuality each frame had one track. Selecting ['prob'] we 
analyze the confidences in each track'''
grp[grp['pred'].apply(lambda x: len(x) > 1)]['prob']

In [None]:
'''Here are the confidences of all tracks. The two track events have lower confidences in their predictions
than average'''
plt.hist(yolo['prob'],bins=51);

### Let's plot an example output

In [None]:
#Assumes test dataframe is called df and yolo results are called yolo
def plot_output(i,zoom): #zoom zooms to truth frame
    tmp = df.iloc[i]
    tmpyolo = grp.query('frame == %s'%(i))
    '''Use vignetted_q with noise, otherwise q'''
    try:
        im = np.histogram2d(tmp['x'],tmp['y'],weights=tmp['q'],bins=(2048,1152),range=((0,2048),(0,1152)))[0].T
    except:
        im = np.histogram2d(tmp['x'],tmp['y'],weights=tmp['vignetted_q'],bins=(2048,1152),range=((0,2048),(0,1152)))[0].T
    plt.imshow(im,cmap='viridis')
    
    #Truth bounding box in white
    plt.hlines(tmp['ymin'],tmp['xmin'],tmp['xmax'],color='w',lw=2)
    plt.hlines(tmp['ymax'],tmp['xmin'],tmp['xmax'],color='w',lw=2)
    plt.vlines(tmp['xmin'],tmp['ymin'],tmp['ymax'],color='w',lw=2)
    plt.vlines(tmp['xmax'],tmp['ymin'],tmp['ymax'],color='w',lw=2)
    
    for xmin,xmax,ymin,ymax in zip(tmpyolo['xmin'],tmpyolo['xmax'],tmpyolo['ymin'],tmpyolo['ymax']):
        #Predicted bounding boxes in cyan
        plt.hlines(ymin,xmin,xmax,color='cyan',lw=2)
        plt.hlines(ymax,xmin,xmax,color='cyan',lw=2)
        plt.vlines(xmin,ymin,ymax,color='cyan',lw=2)
        plt.vlines(xmax,ymin,ymax,color='cyan',lw=2)
    
    if zoom:
        plt.xlim(tmp['xmin']-15,tmp['xmax']+15)
        plt.ylim(tmp['ymin']-15,tmp['ymax']+15)
    
    #Truth keypoints in white, predicted keypoints in cyan
    for i in range(0,conf['maxNumKeyPoints']):
        if i == 0:
            plt.plot(tmp['p%sx'%(i)],tmp['p%sy'%(i)],'o',color='k')
            for x,y in zip(tmpyolo['p%sx'%(i)],tmpyolo['p%sy'%(i)]):
                plt.plot(x,y,'o',color='magenta')
        else:
            plt.plot(tmp['p%sx'%(i)],tmp['p%sy'%(i)],'o',color='w')
            for x,y in zip(tmpyolo['p%sx'%(i)],tmpyolo['p%sy'%(i)]):
                plt.plot(x,y,'o',color='cyan')
    
    plt.show()

In [None]:
'''Recall when assessing the results that our model was trained on noisy image. The images shown here
are the truth ERs without noise (I didn't save the noise in the processing)'''

plot_output(18,zoom = True)

In [None]:
'''Here"s one with two tracks'''
plot_output(73,zoom = True)

# Let's quantify our performance

We'll start with bounding box quantification. We use the IOU which stands for "intersection over union" metric. This metric is the ratio of the areas of the intersection to union of the truth and predicted bounding boxes. Perfect overlap is 1, no overlap is 0

![](../../figures/IOU_def.png)

In [None]:
def bb_intersection_over_union(xmin1, xmax1, ymin1, ymax1, xmin2, xmax2, ymin2, ymax2):
    
    # determine the (x, y)-coordinates of the intersection rectangle
    xA = max(xmin1, xmin2)
    yA = max(ymin1, ymin2)
    xB = min(xmax1, xmax2)
    yB = min(ymax1, ymax2)
    
    # compute the area of intersection rectangle
    interArea = max(0, xB - xA + 1) * max(0, yB - yA + 1)
    
    # compute the area of both the prediction and ground-truth rectangles
    
    boxAArea = (xmax1 - xmin1 + 1) * (ymax1 - ymin1 + 1)
    boxBArea = (xmax2 - xmin2 + 1) * (ymax2 - ymin2 + 1)
    
    # compute the intersection over union by taking the intersection
    # area and dividing it by the sum of prediction + ground-truth
    # areas - the interesection area so we don't double count the intersection
    
    iou = interArea / (boxAArea + boxBArea - interArea)

    return iou

In [None]:
IOUs = []
for i in range(0,len(df)):
    tmp = df.iloc[i]
    tmpyolo = yolo.query('frame == %s'%(i))
    if len(tmpyolo) != 1:
        IOU = -1
    else:
        tmpyolo = tmpyolo.iloc[0]
        IOU = bb_intersection_over_union(tmp['xmin'], tmp['xmax'], tmp['ymin'], tmp['ymax'], 
                                     tmpyolo['xmin'], tmpyolo['xmax'], tmpyolo['ymin'], tmpyolo['ymax'])
    IOUs.append(IOU)

In [None]:
metrics = pd.DataFrame()
metrics['IOU'] = IOUs

In [None]:
# Let's visualize our IOU scores

plt.hist(metrics['IOU'],range=(0,1))

In [None]:
### Need to add IOU to grp. We'll just remake grp to do this...in an analysis with much larger datasets you want
### to be more careful about when to do large operations, but here we have very small datasets, so it's okay

grp = yolo.groupby('frame').agg(list).reset_index()

In [None]:
# Generally speaking our IOUs look great. Lets see what the best and worst overlapping events look like

worstIOUidx = metrics['IOU'].nsmallest(1).index.to_numpy()[0]
bestIOUidx = metrics['IOU'].nlargest(1).index.to_numpy()[0]

In [None]:
plot_output(worstIOUidx,zoom = True)

In [None]:
plot_output(bestIOUidx,zoom = True)

# Next lets assess our keypoint detection performance
Coming up with a good metric to holistically assess keypoint assignments is important, and there's a good deal of flexibilty of how to come up with one. Genreally speaking, with ML metrics, we want something that ranges from 0 to 1 with 0 being maximally bad and 1 being maximally good. The computer vision community often uses a metric called [object keypoint similarity](https://learnopencv.com/object-keypoint-similarity/), which satisfies these criteria. We'll use a simplified version of object keypoint similarity and begin by defining the keypoint similarity of point $i$ as
$$
KS_{i} = \exp\left(\frac{-d_i^2}{2s^2k^2}\right),
$$

where $d_i$ is the Euclidean distance between point the truth and predicted keypoint $i$, $s$ is a scale parameter defined as the area of the truth bounding box, and $k$ is an empirically determined constant. For an image we'll compute the object keypoint similarity, $\rm OKS$, as
$$
\mathrm{OKS}=\frac{1}{N_\mathrm{keypoints}}\sum_{i=1}^{N_\mathrm{keypoints}}KS_i
$$

**Important, OKS is an ordered quantity. The order of the keypoints determined by YOLO is supposed to match the truth ordering. For applications where direction matters, we want OKS to be order dependent. If we don't care about direction, we can reorder the points before computing OKS in such a way that it minimizes the Euclidean distance.**

In [None]:
'''Define a function for order-dependents OKS between truth and measured'''
def order_dependent_OKS(truth,meas,k): #arguments are two tracks which we can think of as rows of dataframes
    distances = []
    for i in range(0,conf['maxNumKeyPoints']): #Loop through each point in the event
        d = np.sqrt((meas['p%sx'%(i)]-truth['p%sx'%(i)])**2+(meas['p%sy'%(i)]-truth['p%sy'%(i)])**2)
        distances.append(d)
    scale = truth['width']*truth['height'] #This is s, we'll compute truth bounding box relative to image size for this
    OKS = np.exp(-1*(np.array(distances)**2)/(2*scale**2*k**2)).sum()/conf['maxNumKeyPoints']
    print(OKS)
    return OKS

In [None]:
'''Compute OKS'''
OKSs = []
for i in range(0,len(df)):
    tmp = df.iloc[i]
    tmpyolo = yolo.query('frame == %s'%(i))
    if len(tmpyolo) != 1:
        OKS = -1
    else:
        tmpyolo = tmpyolo.iloc[0]
        OKS = order_dependent_OKS(tmp,tmpyolo,k=0.001)
    OKSs.append(OKS)

In [None]:
'''Add OKS to the metrics dataframe'''
metrics['OKS'] = OKSs

In [None]:
plt.hist(metrics['OKS'],range=(0,1))

In [None]:
metrics.query('OKS > 0')['OKS'].mean()

In [None]:
# Let's see what the best and worst keypoint images look like

worstOKSidx = metrics.query('OKS > 0')['OKS'].nsmallest(2).index.to_numpy()[1]
avgOKSidxs = metrics.query('0.9 > OKS > 0.88')['OKS'].nsmallest(10).index.to_numpy()
bestOKSidx = metrics['OKS'].nlargest(1).index.to_numpy()[0]

In [None]:
plot_output(worstOKSidx,zoom = True)

In [None]:
plot_output(avgOKSidxs[0],zoom = True)

In [None]:
plot_output(bestOKSidx,zoom = True)

# A few other things we can do:
1. Repeat this exercise for images with multiple tracks (great way to play around with more realistic data sets!)
2. Compute a head/tail OKS score only considering head and tail points. Then you can compare the OKS with the order YOLO gave versus the reverse order. This would be more interesting in a more diverse set where we get head/tail wrong sometimes
3. Assess how good the heads and tails individually were assessed
4. Repeat these notebooks with more realistic simulation that includes Noise (Done)
5. Play around with estimating track angles based on keypoint trajectories and comparing to truth

### We now have a starting point for labeling data. We can use our pretrained model and test how it works when labeling data in Label Studio