In [1]:
import numpy as np
import pandas as pd
import glob, os
import itertools
import matplotlib.image as mpimg
import pickle
import math
import json

# import classifier

%matplotlib inline
import matplotlib.pyplot as plt

MAX_FRAME_NUM = 1000000

In [2]:
# TSD dataset

failure_starts = pd.DataFrame(os.listdir('/Users/kenneth/Projects/ga-ml/tsd/jsons/'))
failure_starts[0] = failure_starts[0].str.replace('.json', '').map(int)
failure_starts = failure_starts.set_index(0)
failure_starts['start1'] = MAX_FRAME_NUM
failure_starts['start2'] = MAX_FRAME_NUM

selected_failure_starts = pd.read_csv('/Users/kenneth/Projects/ga-ml/tsd/failure_starts.csv', index_col=0, names=['start1', 'start2'])
selected_failure_starts['start1'] = selected_failure_starts['start1']*25/30 # account for the 25fps vs 30 fps bug in the code.
selected_failure_starts['start2'] = selected_failure_starts['start2']*25/30 # account for the 25fps vs 30 fps bug in the code.
max_id = max(selected_failure_starts.index)
failure_starts = failure_starts[failure_starts.index <= max_id]
failure_starts = failure_starts[failure_starts.index >= 4572]
failure_starts.update(selected_failure_starts)
failure_starts.loc[failure_starts['start2'] < 0, 'start2'] = MAX_FRAME_NUM

vids = [str(i) for i in failure_starts.index.values.tolist()]


In [3]:
failure_starts[failure_starts.start1 < 1000000]

Unnamed: 0_level_0,start1,start2
0,Unnamed: 1_level_1,Unnamed: 2_level_1
6165,2590.833333,3518.333333
10664,425.0,454.166667
5909,174.166667,220.833333
6112,1041.666667,1070.0
5441,25.0,1000000.0
6282,34.166667,1000000.0
5553,885.0,1000000.0
5800,165.0,1000000.0
4728,28.333333,1000000.0
5247,370.833333,499.166667


In [4]:
def load_df_from_json(fn):
    if not os.path.isfile(fn):
        return pd.DataFrame()
    l = [ f['fields'] for f in json.load(open(fn, 'r')) if 'fields' in f]
    df = pd.DataFrame(l)
    return df

# def validate_ewm(fn):
#     df = load_df_from_json(fn)
#     if df.empty:
#         return
#     ser = ((df['ewm_mean'] - df['rolling_ewm'])[20:] > 1e-03)
#     return ser[ser].count() == 0

# for vid in alerted.index.values:
#     print(vid)
#     print(validate_ewm('/mnt/ml-data4/tsd/jsons/{}.json'.format(vid)))

def load_detections(vid):
    return load_df_from_json('/Users/kenneth/Projects/ga-ml/tsd/jsons/{}.json'.format(vid))

settings = dict(THRESHOLD_LOW=0.11, THRESHOLD_HIGH=0.65, INIT_SAFE_FRAME_NUM=30, ROLLING_MEAN_SHORT_MULTIPLE=5.43)

def is_failing(prediction, detective_sensitivity=1.0):
    if prediction.current_frame_num < settings['INIT_SAFE_FRAME_NUM']:
        return False

    adjusted_ewm_mean = prediction.ewm_mean * detective_sensitivity
    if adjusted_ewm_mean < settings['THRESHOLD_LOW'] + prediction.rolling_mean_long:
        return False

    if adjusted_ewm_mean > settings['THRESHOLD_HIGH'] + prediction.rolling_mean_long:
        return True

    if adjusted_ewm_mean > prediction.rolling_mean_short * settings['ROLLING_MEAN_SHORT_MULTIPLE']:
        return True
    
    return False

def detect_failure_start(vid, plot=False):
    det = load_detections(vid)
    if plot:
        det[['current_p', 'ewm_mean', 'rolling_mean_short', 'rolling_mean_long']].plot()

    det['is_failing'] = det.apply(is_failing, axis=1)
    failing_frames = det[det['is_failing']].index

    if len(failing_frames) > 0:
        return det[det['is_failing']].index[0]


In [11]:
# def video_fp_fn(vids, plot=False):

START1_RELAX = 2
START2_RELAX = 30

plot=False
# if plot:
#     plt.style.use('ggplot')
#     fig=plt.figure(figsize=(16, 16*math.ceil(len(vids)/7)))
fp = []
fn = []
tn = []
tp = []
for j, vid in enumerate(vids):

    detected_failure_start = detect_failure_start(vid, plot)
    if detected_failure_start == None:
        detected_failure_start = MAX_FRAME_NUM

    start1 = failure_starts.loc[int(vid)].start1
    start2 = failure_starts.loc[int(vid)].start2
    if plot:
        if start1 < MAX_FRAME_NUM:
            plt.plot([start1, start1],[0,1], linestyle='-', color='y')
        if start2 < MAX_FRAME_NUM:
            plt.plot([start2, start2],[0,1], linestyle='-', color='r')
        if detected_failure_start < MAX_FRAME_NUM:
            plt.plot([detected_failure_start, detected_failure_start],[0,1], linestyle='-', color='b')

    print(vid, detected_failure_start, start1, start2)
    if detected_failure_start < start1 - START1_RELAX :
        print('fp')
        fp += [vid]
    elif detected_failure_start > start2 + START2_RELAX:
        print('fn')
        fn += [vid]
    elif start1 >= MAX_FRAME_NUM:
        print('tn')
        tn += [vid]
    else:
        print('tp')
        tp += [vid]

(fp, fn, tn, tp)

4772 1000000 1000000.0 1000000.0
tn
4637 1000000 1000000.0 1000000.0
tn
10367 1000000 1000000.0 1000000.0
tn
5476 1000000 1000000.0 1000000.0
tn
5708 1000000 1000000.0 1000000.0
tn
5358 1000000 1000000.0 1000000.0
tn
5837 1000000 1000000.0 1000000.0
tn
4725 1000000 1000000.0 1000000.0
tn
6348 1000000 1000000.0 1000000.0
tn
10625 150 1000000.0 1000000.0
fp
5134 1000000 1000000.0 1000000.0
tn
10275 1000000 1000000.0 1000000.0
tn
5071 1000000 1000000.0 1000000.0
tn
4660 1000000 1000000.0 1000000.0
tn
6132 195 1000000.0 1000000.0
fp
5572 1000000 1000000.0 1000000.0
tn
10633 1000000 1000000.0 1000000.0
tn
5821 1000000 1000000.0 1000000.0
tn
4676 1000000 1000000.0 1000000.0
tn
5964 1000000 1000000.0 1000000.0
tn
5437 1000000 1000000.0 1000000.0
tn
5067 1000000 1000000.0 1000000.0
tn
6165 124 2590.8333333333335 3518.3333333333335
fp
4764 216 1000000.0 1000000.0
fp
6309 1000000 1000000.0 1000000.0
tn
10664 464 425.0 454.1666666666667
tp
5175 1000000 1000000.0 1000000.0
tn
5030 1000000 1000000.

5031 1000000 1000000.0 1000000.0
tn
4620 1000000 1000000.0 1000000.0
tn
5932 1000000 1000000.0 1000000.0
tn
5898 1000000 1000000.0 1000000.0
tn
10409 1000000 1000000.0 1000000.0
tn
6164 1000000 1000000.0 1000000.0
tn
5573 1000000 1000000.0 1000000.0
tn
10632 625 1000000.0 1000000.0
fp
5820 1000000 1000000.0 1000000.0
tn
4677 180 1000000.0 1000000.0
fp
5965 1000000 1000000.0 1000000.0
tn
5436 1000000 1000000.0 1000000.0
tn
5066 1000000 1000000.0 1000000.0
tn
6133 1000000 1000000.0 1000000.0
tn
5836 1000000 1000000.0 1000000.0
tn
4724 1000000 1000000.0 1000000.0
tn
6349 1000000 1000000.0 1000000.0
tn
5135 1000000 1000000.0 1000000.0
tn
10274 1000000 1000000.0 1000000.0
tn
5070 127 1000000.0 1000000.0
fp
4661 1000000 1000000.0 1000000.0
tn
5709 139 1000000.0 1000000.0
fp
5359 1000000 1000000.0 1000000.0
tn
5532 1000000 1000000.0 1000000.0
tn
4636 1000000 1000000.0 1000000.0
tn
10366 1000000 1000000.0 1000000.0
tn
5477 1000000 1000000.0 1000000.0
tn
5027 1000000 1000000.0 1000000.0
tn
6172

5810 2627 1000000.0 1000000.0
fp
6011 30 17.5 19.166666666666668
tp
5797 1000000 1000000.0 1000000.0
tn
4885 1000000 1000000.0 1000000.0
tn
10439 1000000 1000000.0 1000000.0
tn
5282 1000000 1000000.0 1000000.0
tn
5902 1000000 1000000.0 1000000.0
tn
6338 1000000 1000000.0 1000000.0
tn
6292 1000000 1000000.0 1000000.0
tn
10205 1000000 1000000.0 1000000.0
tn
10090 1000000 1000000.0 1000000.0
tn
6007 1000000 1000000.0 1000000.0
tn
10585 1000000 1000000.0 1000000.0
tn
10643 1000000 1000000.0 1000000.0
tn
4743 1000000 1000000.0 1000000.0
tn
6284 1000000 1000000.0 1000000.0
tn
6400 1000000 1000000.0 1000000.0
tn
4597 1770 1000000.0 1000000.0
fp
5386 1000000 1000000.0 1000000.0
tn
10182 242 1000000.0 1000000.0
fp
5369 1000000 1000000.0 1000000.0
tn
4981 1000000 1000000.0 1000000.0
tn
5693 1000000 1000000.0 1000000.0
tn
6115 1000000 1000000.0 1000000.0
tn
5410 921 1000000.0 1000000.0
fp
5040 1000000 1000000.0 1000000.0
tn
4651 560 1000000.0 1000000.0
fp
5806 1000000 1000000.0 1000000.0
tn
10244

10358 1000000 1000000.0 1000000.0
tn
4921 1000000 1000000.0 1000000.0
tn
5633 1000000 1000000.0 1000000.0
tn
10572 1000000 1000000.0 1000000.0
tn
5799 1000000 1000000.0 1000000.0
tn
6336 1000000 1000000.0 1000000.0
tn
6273 1000000 1000000.0 1000000.0
tn
5371 1000000 1000000.0 1000000.0
tn
10525 168 137.5 1000000.0
tp
5234 1000000 1000000.0 1000000.0
tn
10175 1000000 1000000.0 1000000.0
tn
5664 1000000 1000000.0 1000000.0
tn
6048 1000000 1000000.0 1000000.0
tn
5408 1000000 1000000.0 1000000.0
tn
10319 1000000 1000000.0 1000000.0
tn
5058 1000000 1000000.0 1000000.0
tn
6253 1000000 1000000.0 1000000.0
tn
5490 1000000 1000000.0 1000000.0
tn
5243 1000000 1000000.0 1000000.0
tn
10552 94 1000000.0 1000000.0
fp
4669 1000000 1000000.0 1000000.0
tn
5582 1000000 1000000.0 1000000.0
tn
5097 1000000 1000000.0 1000000.0
tn
4686 1000000 1000000.0 1000000.0
tn
5994 1000000 1000000.0 1000000.0
tn
6341 1000000 1000000.0 1000000.0
tn
10505 1000000 1000000.0 1000000.0
tn
5351 1000000 1000000.0 1000000.0
t

4803 29 1000000.0 1000000.0
fp
5896 1000000 1000000.0 1000000.0
tn
6306 1000000 1000000.0 1000000.0
tn
5480 1000000 1000000.0 1000000.0
tn
10542 1000000 1000000.0 1000000.0
tn
5603 1000000 1000000.0 1000000.0
tn
10112 1000000 1000000.0 1000000.0
tn
5802 124 1000000.0 1000000.0
fp
5551 1000000 1000000.0 1000000.0
tn
10240 1000000 1000000.0 1000000.0
tn
10305 1000000 1000000.0 1000000.0
tn
5414 1000000 1000000.0 1000000.0
tn
5947 153 146.66666666666666 1000000.0
tp
4655 1000000 1000000.0 1000000.0
tn
4985 1000000 1000000.0 1000000.0
tn
6111 1000000 1000000.0 1000000.0
tn
4593 1000000 1000000.0 1000000.0
tn
5382 1000000 1000000.0 1000000.0
tn
6280 96 1000000.0 1000000.0
fp
4747 1000000 1000000.0 1000000.0
tn
5910 1000000 1000000.0 1000000.0
tn
10581 1000000 1000000.0 1000000.0
tn
6003 1000000 1000000.0 1000000.0
tn
4751 1000000 1000000.0 1000000.0
tn
5843 1000000 1000000.0 1000000.0
tn
10201 1000000 1000000.0 1000000.0
tn
5140 1000000 1000000.0 1000000.0
tn
6279 59 1000000.0 1000000.0
fp


(['10625',
  '6132',
  '6165',
  '4764',
  '5899',
  '4924',
  '6364',
  '4841',
  '6190',
  '6344',
  '5354',
  '10500',
  '5712',
  '6305',
  '4768',
  '5250',
  '4986',
  '10347',
  '10202',
  '5952',
  '5816',
  '6152',
  '4895',
  '5046',
  '5553',
  '10239',
  '6332',
  '4925',
  '10632',
  '4677',
  '5070',
  '5709',
  '6172',
  '10290',
  '5485',
  '4914',
  '5368',
  '10479',
  '10438',
  '10253',
  '5684',
  '5759',
  '5535',
  '4723',
  '4689',
  '5173',
  '4963',
  '10530',
  '10422',
  '5959',
  '10198',
  '6158',
  '10423',
  '4919',
  '5133',
  '5563',
  '5810',
  '4597',
  '10182',
  '5410',
  '4651',
  '4915',
  '5813',
  '5405',
  '5281',
  '5794',
  '5278',
  '5385',
  '5983',
  '10504',
  '4687',
  '5491',
  '5665',
  '10565',
  '4672',
  '10363',
  '5536',
  '5218',
  '5998',
  '5062',
  '4624',
  '10134',
  '10552',
  '6091',
  '10378',
  '5039',
  '10303',
  '5042',
  '10207',
  '5957',
  '10483',
  '5819',
  '4588',
  '5233',
  '10133',
  '5435',
  '5966',
  '46

In [15]:
len(fn)

6