In [90]:
ground_truth_filename = "/home/shared/workspace/Resnet3D/3D-ResNets-PyTorch/data/ntu_01.json"

In [91]:
prediction_filename = "/home/shared/workspace/human-activity-recognition/Efficient-3DCNNs/data/results/resnet_101_50_0.001test/val.json"

In [99]:
top_k = 2

In [78]:
import json
import numpy as np
import pandas as pd
from sklearn.metrics import precision_recall_fscore_support, confusion_matrix, classification_report


In [26]:
def load_groundtruth_pred(ground_truth_filename=None, prediction_filename=None, subset='validation', verbose=False, top_k=1):
    if not ground_truth_filename:
        raise IOError('Please input a valid ground truth file.')
    if not prediction_filename:
        raise IOError('Please input a valid prediction file.')

    ap = None
    hit_at_k = None
    # Import ground truth and predictions.
    ground_truth, activity_index = _import_ground_truth(ground_truth_filename)
    prediction = _import_prediction(prediction_filename, activity_index)

    if verbose:
        print('[INIT] Loaded annotations from {} subset.'.format(subset))
        nr_gt = len(ground_truth)
        print('\tNumber of ground truth instances: {}'.format(nr_gt))
        nr_pred = len(prediction)
        print('\tNumber of predictions: {}'.format(nr_pred))
        
    return ground_truth, prediction

In [21]:
def _import_ground_truth(ground_truth_filename):
    """Reads ground truth file, checks if it is well formatted, and returns
       the ground truth instances and the activity classes.
    Parameters
    ----------
    ground_truth_filename : str
        Full path to the ground truth json file.
    Outputs
    -------
    ground_truth : df
        Data frame containing the ground truth instances.
    activity_index : dict
        Dictionary containing class index.
    """
    with open(ground_truth_filename, 'r') as fobj:
        data = json.load(fobj)
    # Checking format
    # if not all([field in data.keys() for field in self.gt_fields]):
        # raise IOError('Please input a valid ground truth file.')

    # Initialize data frame
    activity_index, cidx = {}, 0
    video_lst, label_lst = [], []
    for videoid, v in data['database'].items():
        if 'validation' != v['subset']:
            continue
        this_label = v['annotations']['label']
        if this_label not in activity_index:
            activity_index[this_label] = cidx
            cidx += 1
        video_lst.append(videoid)
        label_lst.append(activity_index[this_label])
    ground_truth = pd.DataFrame({'video-id': video_lst,
                                 'label': label_lst})
    ground_truth = ground_truth.drop_duplicates().reset_index(drop=True)
    return ground_truth, activity_index

In [22]:
def _import_prediction(prediction_filename, activity_index):
    """Reads prediction file, checks if it is well formatted, and returns
       the prediction instances.
    Parameters
    ----------
    prediction_filename : str
        Full path to the prediction json file.
    Outputs
    -------
    prediction : df
        Data frame containing the prediction instances.
    """
    with open(prediction_filename, 'r') as fobj:
        data = json.load(fobj)
    # Checking format...
    # if not all([field in data.keys() for field in self.pred_fields]):
        # raise IOError('Please input a valid prediction file.')

    # Initialize data frame
    video_lst, label_lst, score_lst = [], [], []
    for videoid, v in data['results'].items():
        for result in v:
            label = activity_index[result['label']]
            video_lst.append(videoid)
            label_lst.append(label)
            score_lst.append(result['score'])
    prediction = pd.DataFrame({'video-id': video_lst,
                               'label': label_lst,
                               'score': score_lst})
    return prediction

In [92]:
ground_truth, prediction = load_groundtruth_pred(ground_truth_filename, prediction_filename, 'validation', True, top_k)

[INIT] Loaded annotations from validation subset.
	Number of ground truth instances: 1707
	Number of predictions: 5118


In [38]:
print(ground_truth.head)

<bound method NDFrame.head of                       video-id  label
0     S014C001P019R001A041_rgb      0
1     S016C003P021R001A041_rgb      0
2     S008C001P001R001A041_rgb      0
3     S013C003P016R001A041_rgb      0
4     S009C001P015R002A041_rgb      0
...                        ...    ...
1702  S016C003P019R002A049_rgb      8
1703  S015C003P008R002A049_rgb      8
1704  S001C002P006R002A049_rgb      8
1705  S013C001P007R002A049_rgb      8
1706  S009C003P007R002A049_rgb      8

[1707 rows x 2 columns]>


In [39]:
print(prediction.head)

<bound method NDFrame.head of                       video-id  label     score
0     S014C001P019R001A041_rgb      0  0.319168
1     S014C001P019R001A041_rgb      5  0.244419
2     S014C001P019R001A041_rgb      6  0.175087
3     S016C003P021R001A041_rgb      6  0.418693
4     S016C003P021R001A041_rgb      3  0.325127
...                        ...    ...       ...
5113  S001C002P006R002A049_rgb      4  0.204111
5114  S001C002P006R002A049_rgb      8  0.164063
5115  S013C001P007R002A049_rgb      8  0.782207
5116  S013C001P007R002A049_rgb      3  0.084218
5117  S013C001P007R002A049_rgb      6  0.048666

[5118 rows x 3 columns]>


In [97]:
def evaluate(ground_truth, prediction, top_k, verbose ):
    """Evaluates a prediction file. For the detection task we measure the
    interpolated mean average precision to measure the performance of a
    method.
    """
    hit_at_k, y_pred, y_true, video_name = compute_video_hit_at_k(ground_truth, prediction, top_k)
    if verbose:
        print('[RESULTS] Performance on ActivityNet untrimmed video '
               'classification task.')
        print('\tError@{}: {}'.format(top_k, 1.0 - hit_at_k))
        print('\n')
    
    if top_k == 1 :
        conf_mtx = confusion_matrix(y_true, y_pred)
        prec_rec = classification_report(y_true, y_pred)
        print('Confusion Matrix\n')
        print(conf_mtx)
        print('\n')
        print('Other metrics')
        print(prec_rec)
    return hit_at_k

In [80]:
def compute_video_hit_at_k(ground_truth, prediction, top_k=3):
    """Compute accuracy at k prediction between ground truth and
    predictions data frames. This code is greatly inspired by evaluation
    performed in Karpathy et al. CVPR14.
    Parameters
    ----------
    ground_truth : df
        Data frame containing the ground truth instances.
        Required fields: ['video-id', 'label']
    prediction : df
        Data frame containing the prediction instances.
        Required fields: ['video-id, 'label', 'score']
    Outputs
    -------
    acc : float
        Top k accuracy score.
    """
    video_ids = np.unique(ground_truth['video-id'].values)
    avg_hits_per_vid = np.zeros(video_ids.size)
    video_name, y_true,y_pred = [],[],[]
    for i, vid in enumerate(video_ids):
        pred_idx = prediction['video-id'] == vid
        if not pred_idx.any():
            continue
        this_pred = prediction.loc[pred_idx].reset_index(drop=True)
        # Get top K predictions sorted by decreasing score.
        sort_idx = this_pred['score'].values.argsort()[::-1][:top_k]
        this_pred = this_pred.loc[sort_idx].reset_index(drop=True)
        # Get labels and compare against ground truth.
        pred_label = this_pred['label'].tolist()
        gt_idx = ground_truth['video-id'] == vid
        gt_label = ground_truth.loc[gt_idx]['label'].tolist()
        avg_hits_per_vid[i] = np.mean([1 if this_label in pred_label else 0
                                       for this_label in gt_label])
        y_pred.append(pred_label)
        y_true.append(gt_label)
        video_name.append(vid)
    return float(avg_hits_per_vid.mean()), y_pred, y_true, video_name

In [98]:
evaluate(ground_truth, prediction, top_k, True )

[RESULTS] Performance on ActivityNet untrimmed video classification task.
	Error@2: 0.17750439367311077




0.8224956063268892