In [7]:
import pickle

In [8]:
f1 = open('true_face_haar.pickle', 'rb')
f2 = open('inferred_face_haar.pickle', 'rb')
f3 = open('better_face_extraction.pickle', 'rb')

In [9]:
true_dict = pickle.load(f1)
inferred_dict = pickle.load(f2)
better_dict = pickle.load(f3)

## FPS

In [10]:
true_list = []
for key, value in true_dict.items():
    true_list.append(value['sec'])
true_list = [1/item for item in true_list[1:]] # discard 1st frame for cold start latency

In [11]:
inferred_list = []
for key, value in inferred_dict.items():
    inferred_list.append(value['sec'])
inferred_list = [1/item for item in inferred_list[1:]] # discard 1st frame for cold start latency

In [12]:
better_list = []
for key, value in better_dict.items():
    better_list.append(value['sec'])
better_list = [1/item for item in better_list[1:]] # discard 1st frame for cold start latency

In [13]:
import plotly
import plotly.plotly as py
import plotly.graph_objs as go

In [14]:
import pandas as pd

In [15]:
df = pd.DataFrame([true_list, inferred_list, better_list]).transpose()
df.columns = ['true','inferred','better']
df.head()

Unnamed: 0,true,inferred,better
0,10.006833,6.141523,4.630809
1,10.233229,5.895422,4.918706
2,10.520319,5.731685,4.934417
3,10.790039,6.370788,5.045209
4,8.121335,6.47483,5.028382


In [16]:
# df.describe().to_html()

In [17]:
data = [{
    'x': df.index,
    'y': df[col],
    'name': col
}  for col in df.columns]

In [18]:
layout = go.Layout(
    title='Face Extraction (true v. inferred v. better)',
    yaxis=dict(       
        title='fps',
    ),
    xaxis=dict(
        title='frame number',
    )
)

In [19]:
fig = go.Figure(data=data, layout=layout)

In [20]:
py.iplot(fig)


Consider using IPython.display.IFrame instead



## Accuracy

In [15]:
appearances = {}
f = open('clip-Appearances.txt','r')
for x in f:
    split = x.strip('\n').split(',')
    actor = int(split[0])
    appearances[actor] = [int(split[1]), int(split[2])]

In [16]:
appearances

{0: [107, 292], 1: [763, 1018]}

In [19]:
# 'true' found

tp = 0 # face there, face detected
fn = 0 # face there, face not detected

for actor in appearances.keys():
    for fnum in range(appearances[actor][0], appearances[actor][1]):
        true_found = true_dict[fnum]['coords_list']
        if len(true_found)>0: # detected
            tp += 1
        else: # not detected
            fn += 1

In [20]:
tp, fn

(257, 183)

In [21]:
tp / (tp+fn), fn / (tp+fn)

(0.5840909090909091, 0.4159090909090909)

In [22]:
# 'infer' found

tp = 0 # face there, face detected
fn = 0 # face there, face not detected

fn_frames = []
for actor in appearances.keys():
    for fnum in range(appearances[actor][0], appearances[actor][1]):
        inferred_found = inferred_dict[fnum]['coords_list']
        if len(inferred_found)>0: # detected
            tp += 1
        else: # not detected
            fn_frames.append(fnum)
            fn += 1

In [23]:
tp, fn

(185, 255)

In [24]:
tp / (tp+fn), fn / (tp+fn)

(0.42045454545454547, 0.5795454545454546)

This poor accuracy is due to the fact that sometimes the body detector fails, which means a face cannot be found. We can remedy this by using a more accurate body detector. Options: YOLOv3, etc.

In [25]:
# To confirm our suspicions, let's look at the false negatives reported by the inferred version.

In [26]:
no_body_no_face = 0 # want to minimize this
yes_body_no_face = 0 # want to minimize this

for fnum in fn_frames:
    body = inferred_dict[fnum]['found_body']
    face = inferred_dict[fnum]['found_face']
    
    if (not body) & (not face): no_body_no_face += 1
    else: yes_body_no_face += 1

In [27]:
no_body_no_face, yes_body_no_face

(54, 201)

Ok, so the detector did not perform as poorly as I expected. I expected that there would be more instances of no_body_no_face. However, 54/255 = ~21% of the missed labels stem from the fact that a body could not be detected. It seems that both the face and body detectors need improvement.