In [1]:
import os
import numpy as np
import re
import csv

import sys
sys.path.insert(0, '../..')
import metrics.charades_classify as cc
from datasets.prepare_dataset import load_labels_file

import matplotlib.pyplot as plt
from inspect import signature

from scipy import signal
from torch.nn import MaxPool1d

import torch

In [2]:
# gt_path = '/data/Datasets/Charades/Annotations/Charades_v1_causal_clip_test.txt'
classes_file = '/data/Datasets/Charades/Annotations/Charades_v1_classes.txt'
classes_att_file = '/data/Datasets/Charades/Annotations/Charades_v1_attributes_class.csv'
classes_map_file = '/data/Datasets/Charades/Annotations/Charades_v1_mapping.txt'
# gt_path = '/data/Datasets/Charades/Annotations/Charades_v1_causal_allFrames_test.txt'
gt_path = '/data/Datasets/Charades/Annotations/Charades_v1_causal_test.txt'

per_frame = True

# Load classes
classes = load_labels_file(classes_file)

In [16]:
dir_name = "/data/OnlineActionRecognition/final_outputs/"
files_dir =  "causal_eval_charades_resnet50nl32_config1_valFullyConv"

result_files = sorted([os.path.join(dir_name, files_dir, f) for f in os.listdir(dir_name + files_dir) if f.endswith('.txt')])

test_ids = []
test_classes = []
for fname in result_files:
    print(fname)
    ids, classes = cc.read_file(fname)
    test_ids += ids
    test_classes += classes
    print(len(test_ids), len(test_classes))

/data/OnlineActionRecognition/final_outputs/causal_eval_charades_resnet50nl32_config1_valFullyConv/causal_eval_valFullyConv_charades_resnet50nl32_config1_0.txt
134706 134706
/data/OnlineActionRecognition/final_outputs/causal_eval_charades_resnet50nl32_config1_valFullyConv/causal_eval_valFullyConv_charades_resnet50nl32_config1_1.txt
282896 282896
/data/OnlineActionRecognition/final_outputs/causal_eval_charades_resnet50nl32_config1_valFullyConv/causal_eval_valFullyConv_charades_resnet50nl32_config1_2.txt
430820 430820
/data/OnlineActionRecognition/final_outputs/causal_eval_charades_resnet50nl32_config1_valFullyConv/causal_eval_valFullyConv_charades_resnet50nl32_config1_3.txt
575759 575759
/data/OnlineActionRecognition/final_outputs/causal_eval_charades_resnet50nl32_config1_valFullyConv/causal_eval_valFullyConv_charades_resnet50nl32_config1_4.txt
716759 716759
/data/OnlineActionRecognition/final_outputs/causal_eval_charades_resnet50nl32_config1_valFullyConv/causal_eval_valFullyConv_charad

In [4]:
def divide_per_clip(ids, classes):
    clips_ids = []
    clips_classes = []

    video_name = None
    for i, video_frame in enumerate(ids):
        name = video_frame.split('_')[0]
        if name != video_name:
            # new video! But first, save old video
            if i > 0:
                clips_ids.append(np.array(video_ids))
                clips_classes.append(np.array(video_classes))
            # star new one
            video_ids = []
            video_classes = []
            video_name = name
            
        video_ids.append(video_frame)
        video_classes.append(classes[i])
        
    # Append last video
    clips_ids.append(np.array(video_ids))
    clips_classes.append(np.array(video_classes))
        
    return np.array(clips_ids), np.array(clips_classes)

In [17]:
gt_ids, gt_classes = cc.read_file(gt_path) 
gt_classes = np.array(gt_classes)

n_test = len(gt_ids)

# Check if there are duplicate items
test_ids2, test_index_order = np.unique(test_ids, return_index=True)
test_classes2 = np.array(test_classes)[test_index_order]

# Dividing per clip
gt_clips_ids, gt_clips_classes = divide_per_clip(gt_ids, gt_classes)
test_clips_ids, test_clips_classes = divide_per_clip(test_ids2, test_classes2)

In [6]:
def video_output(outputs):
    num_clips, num_classes = outputs.shape
    max_pool = MaxPool1d(num_clips)
    
    outputs = torch.tensor(outputs)

    data = outputs.view(1, -1, num_classes).contiguous()
    data = data.permute(0, 2, 1).contiguous()

    data = max_pool(data)
    video_data = data.view(num_classes).contiguous()

    return video_data

def select_n_clips(video_classes, n=10):
    num_frames = len(video_classes)
    ids = np.linspace(0, num_frames-1, n, dtype=int)
    
    return video_classes[ids]

In [7]:
def map_func(submission_array, gt_array):
    """ Returns mAP, weighted mAP, AP array, precisions, recall and calibrated AP"""
    m_aps = []
    c_aps = []
    fprs = []
    a_prec = np.zeros(submission_array.shape)
    a_recall = np.zeros(submission_array.shape)
    n_samples = submission_array.shape[0]
    n_classes = submission_array.shape[1]
    for oc_i in range(n_classes):
        sorted_idxs = np.argsort(-submission_array[:, oc_i])
        sorted_gt = gt_array[:, oc_i][sorted_idxs]
        tp = sorted_gt == 1
        fp = np.invert(tp)
        n_pos = tp.sum()
        n_gt = sorted_gt.sum()

        t_pcs = np.cumsum(tp)
        f_pcs = np.cumsum(fp)
        prec = t_pcs / (f_pcs+t_pcs).astype(float)
        recall = t_pcs / n_gt.astype(float)
        
        fpr = f_pcs / (n_samples - n_gt).astype(float)
        c_prec = recall / (recall + fpr)
        
        fprs.append(c_prec)
        # Calibrated prec
#         w = (n_samples - n_gt) / float(n_gt)
#         c_t_pcs = t_pcs * w
#         c_prec = c_t_pcs / (f_pcs + c_t_pcs).astype(float)

        avg_prec = 0
        c_avg_prec = 0
        for i in range(submission_array.shape[0]):
            if tp[i]:
                avg_prec += prec[i]
                c_avg_prec += c_prec[i]
        m_aps.append(avg_prec / n_pos.astype(float))
        c_aps.append(c_avg_prec / n_pos.astype(float))
        a_prec[:, oc_i] = prec
        a_recall[:, oc_i] = recall
    m_aps = np.array(m_aps)
    c_aps = np.array(c_aps)
    m_ap = np.nanmean(m_aps)
    c_ap = np.nanmean(c_aps)
    w_ap = np.nansum(m_aps * gt_array.sum(axis=0) / gt_array.sum().astype(float))
    return m_ap, w_ap, m_aps, a_prec, a_recall, c_ap


def charades_map(submission_array, gt_array):
    """
    Approximate version of the charades evaluation function
    For precise numbers, use the submission file with the official matlab script
    """

    fix = submission_array.copy()
    empty = np.sum(gt_array, axis=1) == 0
    fix[empty, :] = np.NINF

    return map_func(fix, gt_array)

In [21]:
len(test_classes), gt_classes.shape

(1153512, (1153408, 157))

In [18]:
# per frame using mean of all clips
mean_map, mean_wap, mean_ap, mean_prec, mean_rec, mean_cap = charades_map(test_classes2, gt_classes)

# per clip result
test_classes_n = [select_n_clips(np.array(clip_data), n=10) for clip_data in test_clips_classes]

test_clip_n_mean = np.array([video_output(t_c).numpy() for t_c in test_classes_n])
gt_clips_n_mean = np.array([(sum(gt_c) > 0).astype(int) for gt_c in gt_clips_classes])

map_10_mean, wap_10_mean, _, _, _, cap_10_mean = charades_map(test_clip_n_mean, gt_clips_n_mean)

print('Per frame:')
print('mAP: {:4.2%}, cAP: {:4.2%}'.format(mean_map, mean_cap))
print('Per clip:')
print('mAP: {:4.2%}, cAP: {:4.2%}'.format(map_10_mean, cap_10_mean))

IndexError: boolean index did not match indexed array along dimension 0; dimension is 36 but corresponding boolean dimension is 1153408

In [None]:
results = {
    'map': mean_map,
    'cap': mean_cap,
    'map_10': map_10_mean, 
    'cap_10': cap_10_mean,
    'wap': mean_wap, 
    'ap': mean_ap, 
    'precision': mean_prec, 
    'recal': mean_rec
}

output_file = dir_name + files_dir + '_results'
np.save(output_file, results)

In [25]:
baseline8_centerCrop = np.load(
    dir_name + 'causal_eval_charades_resnet50baseline8_config1_valCenterCrop_results.npy', allow_pickle=True)[()]
print('Per frame:')
print('mAP: {:4.2%}, cAP: {:4.2%}'.format(baseline8_centerCrop['map'], baseline8_centerCrop['cap']))
print('Per clip:')
print('mAP: {:4.2%}, cAP: {:4.2%}'.format(baseline8_centerCrop['map_10'], baseline8_centerCrop['cap_10']))

print('\n')

baseline8_centerCrop2 = np.load(
    dir_name + 'eval_pred_charades_r50i3d_baseline8_stream_centerCrop_results.npy', allow_pickle=True)[()]
print('Per frame:')
print('mAP: {:4.2%}, cAP: {:4.2%}'.format(baseline8_centerCrop2['map'], baseline8_centerCrop2['cap']))
print('Per clip:')
print('mAP: {:4.2%}, cAP: {:4.2%}'.format(baseline8_centerCrop2['map_10'], baseline8_centerCrop2['cap_10']))

Per frame:
mAP: 14.95%, cAP: 79.53%
Per clip:
mAP: 26.92%, cAP: 80.31%


Per frame:
mAP: 14.95%, cAP: 79.53%
Per clip:
mAP: 26.92%, cAP: 80.31%


In [15]:
baseline8_fullyConv = np.load(
    dir_name + 'causal_eval_charades_resnet50baseline8_config1_valFullyConv_results.npy', allow_pickle=True)[()]

print('Per frame:')
print('mAP: {:4.2%}, cAP: {:4.2%}'.format(baseline8_fullyConv['map'], baseline8_fullyConv['cap']))
print('Per clip:')
print('mAP: {:4.2%}, cAP: {:4.2%}'.format(baseline8_fullyConv['map_10'], baseline8_fullyConv['cap_10']))

print('\n')

baseline8_fullyConv2 = np.load(
    dir_name + 'causal_eval_charades_resnet50baseline8_config1_valFullyConv_print_results.npy', allow_pickle=True)[()]
print('Per frame:')
print('mAP: {:4.2%}, cAP: {:4.2%}'.format(baseline8_fullyConv2['map'], baseline8_fullyConv2['cap']))
print('Per clip:')
print('mAP: {:4.2%}, cAP: {:4.2%}'.format(baseline8_fullyConv2['map_10'], baseline8_fullyConv2['cap_10']))

Per frame:
mAP: 14.07%, cAP: 76.87%
Per clip:
mAP: 24.99%, cAP: 77.62%


Per frame:
mAP: 13.20%, cAP: 75.30%
Per clip:
mAP: 24.13%, cAP: 76.58%


In [23]:
nl8_centerCrop = np.load(
    dir_name + 'eval_pred_charades_r50i3d_nl8_stream_centerCrop_results.npy', allow_pickle=True)[()]

print('Per frame:')
print('mAP: {:4.2%}, cAP: {:4.2%}'.format(nl8_centerCrop['map'], nl8_centerCrop['cap']))
print('Per clip:')
print('mAP: {:4.2%}, cAP: {:4.2%}'.format(nl8_centerCrop['map_10'], nl8_centerCrop['cap_10']))

Per frame:
mAP: 17.17%, cAP: 81.98%
Per clip:
mAP: 30.28%, cAP: 82.49%


In [24]:
nl8_fullyConv = np.load(
    dir_name + 'causal_eval_charades_resnet50nl8_config1_valFullyConv_results.npy', allow_pickle=True)[()]

print('Per frame:')
print('mAP: {:4.2%}, cAP: {:4.2%}'.format(nl8_fullyConv['map'], nl8_fullyConv['cap']))
print('Per clip:')
print('mAP: {:4.2%}, cAP: {:4.2%}'.format(nl8_fullyConv['map_10'], nl8_fullyConv['cap_10']))

Per frame:
mAP: 17.17%, cAP: 81.98%
Per clip:
mAP: 30.28%, cAP: 82.49%


In [23]:
baseline32_centerCrop = np.load(
    dir_name + 'causal_eval_charades_resnet50baseline32_config1_centerCrop_results.npy', allow_pickle=True)[()]

print('Per frame:')
print('mAP: {:4.2%}, cAP: {:4.2%}'.format(baseline32_centerCrop['map'], baseline32_centerCrop['cap']))
print('Per clip:')
print('mAP: {:4.2%}, cAP: {:4.2%}'.format(baseline32_centerCrop['map_10'], baseline32_centerCrop['cap_10']))

Per frame:
mAP: 17.39%, cAP: 82.26%
Per clip:
mAP: 31.05%, cAP: 83.00%


In [10]:
baseline32_fullyConv = np.load(
    dir_name + 'causal_eval_charades_resnet50baseline32_config1_fullyConv_results.npy', allow_pickle=True)[()]

print('Per frame:')
print('mAP: {:4.2%}, cAP: {:4.2%}'.format(baseline32_fullyConv['map'], baseline32_fullyConv['cap']))
print('Per clip:')
print('mAP: {:4.2%}, cAP: {:4.2%}'.format(baseline32_fullyConv['map_10'], baseline32_fullyConv['cap_10']))

Per frame:
mAP: 16.93%, cAP: 81.08%
Per clip:
mAP: 29.41%, cAP: 81.16%


In [10]:
nl32_centerCrop = np.load(
    dir_name + 'causal_eval_charades_resnet50nl32_full_config1_valcentercrop_results.npy', allow_pickle=True)[()]

print('Per frame:')
print('mAP: {:4.2%}, cAP: {:4.2%}'.format(nl32_centerCrop['map'], nl32_centerCrop['cap']))
print('Per clip:')
print('mAP: {:4.2%}, cAP: {:4.2%}'.format(nl32_centerCrop['map_10'], nl32_centerCrop['cap_10']))

Per frame:
mAP: 18.07%, cAP: 82.89%
Per clip:
mAP: 31.99%, cAP: 83.73%


In [15]:
nl32_fullyConv = np.load(
    dir_name + 'causal_eval_charades_resnet50nl32_full_config1_valFullyConv_results.npy', allow_pickle=True)[()]

print('Per frame:')
print('mAP: {:4.2%}, cAP: {:4.2%}'.format(nl32_fullyConv['map'], nl32_fullyConv['cap']))
print('Per clip:')
print('mAP: {:4.2%}, cAP: {:4.2%}'.format(nl32_fullyConv['map_10'], nl32_fullyConv['cap_10']))

Per frame:
mAP: 19.02%, cAP: 83.48%
Per clip:
mAP: 32.76%, cAP: 83.99%
