# Dataset demonstration

This notebook shows you how to read the data using GulpIO and visualise the frames (both RGB and optical flow U/V pairs).

In [236]:
from gulpio2 import GulpDirectory
from pathlib import Path
from moviepy.editor import ImageSequenceClip, clips_array

from collections import defaultdict

import argparse
import logging
import os
import pickle
import pandas as pd
from pathlib import Path

import colorlog
import torch as t
import numpy as np
from omegaconf import OmegaConf
from pytorch_lightning import Callback, Trainer
from typing import Any, Dict, List, Sequence, Union

from systems import EpicActionRecogintionDataModule
from systems import EpicActionRecognitionSystem

from test import ResultsSaver
from scipy.special import softmax

In [195]:
# SETUP LOGGING
LOG = logging.getLogger("test")

logging.basicConfig(level=logging.INFO)

handler = colorlog.StreamHandler()
handler.setFormatter(colorlog.ColoredFormatter("%(log_color)s%(levelname)s:%(name)s:%(message)s"))

logger = colorlog.getLogger("example")
logger.addHandler(handler)

# SETUP TORCH VARIABLES
device = t.device("cuda:0" if t.cuda.is_available() else "cpu")
dtype = t.float

In [196]:
# LOAD IN SAVED CHECKPOINT
ckpt = t.load('../models/trn_rgb.ckpt', map_location='cpu')

# CREATE CONFIG FROM CHECKPOINT
cfg = OmegaConf.create(ckpt['hyper_parameters'])
OmegaConf.set_struct(cfg, False)

# SET GULP DIRECTORY
cfg.data._root_gulp_dir = '/home/ts/C1-Action-Recognition-TSN-TRN-TSM/datasets/epic/gulp/rgb'

# CREATE MODEL
model = EpicActionRecognitionSystem(cfg)
model.load_state_dict(ckpt['state_dict'])
model.to(device)


INFO:models.tsn:Initializing MTRN with base model: resnet50.

MTRN Configuration:
    input_modality:     RGB
    num_segments:       8
    segment_length:     1
    consensus_module:   TRNMultiscale
    img_feature_dim:    256 (only valid for TRN)
    dropout_ratio:      0.7
    partial_bn:         True
        
INFO:models.tsn:Loading backbone model with imagenet weights


Multi-Scale Temporal Relation Network Module in use ['8-frame relation', '7-frame relation', '6-frame relation', '5-frame relation', '4-frame relation', '3-frame relation', '2-frame relation']


EpicActionRecognitionSystem(
  (model): MTRN(
    (base_model): ResNet(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (re

In [197]:
# FOR WHOLE GULPED DATA

# if not cfg.get("log_graph", True):
#     try:
#         delattr(system, "example_input_array")
#     except AttributeError:
#         pass

# LOG.info("Disabling DP/DDP")
# cfg.trainer.accelerator = None
    
# n_gpus = 1

# cfg.trainer.gpus = n_gpus
# cfg['test.results_path'] = '../outputs/exp.pt'

In [198]:
# data_module = EpicActionRecogintionDataModule(cfg)

# dataloader = data_module.test_dataloader()

# saver = ResultsSaver()

# trainer = Trainer(**cfg.trainer, callbacks=[saver])
# trainer.test(system, test_dataloaders=dataloader)
# out = system.forward()

In [199]:
# SET GULP ROOT FOR gulpio2
gulp_root = Path.home()

# LOAD GULPED DATA
rgb_train = GulpDirectory('../datasets/epic/gulp/rgb/rgb_test/')#GulpDirectory('/home/will/P01_rgb_gulp/')

In [200]:
# flow_train = GulpDirectory('../datasets/epic/gulp/flow_train/')#GulpDirectory('/home/will/P01_flow_gulp/')

In [201]:
# SELECT SINGLE CLIP TO PROCESS (uncomment for full list of ids)
# rgb_train.merged_meta_dict.keys()
clip_id = 'P01_01_96'

In [202]:
# LOAD FRAMES AND META FOR SELECTED VIDEO
rgb_frames, rgb_meta = rgb_train[clip_id]
# flow_frames, flow_meta = flow_train[clip_id]

In [203]:
# RESHAPE INPUT DATA FOR RUNNING THROUGH MODEL

rgb_frames = np.array(rgb_frames).transpose(0,3,1,2)
rgb_frames = t.tensor(rgb_frames, device=device, dtype=dtype)
rgb_frames = rgb_frames.unsqueeze(0)

# RUN FRAMES THROUGH MODEL
with t.no_grad():
    result = model.forward_tasks(rgb_frames)

In [207]:
with open('result.pkl', 'wb') as f:
  pickle.dump(result, f)
# probs = t.softmax(result, -1)
# pred_noun, pred_verb = out[:97].argmax().item(), out[97:].argmax().item()
# pred_noun, pred_verb # = out[pred].item(), probs[pred].item()

In [240]:
def top_scores(scores: np.ndarray, top_n: int = 100):
    if scores.ndim == 1:
        top_n_idx = scores.argsort()[::-1][:top_n]
        return top_n_idx, scores[top_n_idx]
    else:
        top_n_scores_idx = np.argsort(scores)[:,::-1][:top_n]#scores.argsort()[:,::-1][:,:top_n]
        return top_n_scores_idx, scores[np.arrange(0, len(scores)).reshape(-1,1), top_n_scores_idx]
    
def compute_scores(verb_scores: np.ndarray, noun_scores: np.ndarray, top_n: int = 100):
    top_verbs, top_verb_scores = top_scores(verb_scores, top_n)
    top_nouns, top_noun_scores = top_scores(noun_scores, top_n)
    top_verb_probs, top_noun_probs = softmax(top_verb_scores), softmax(top_noun_scores)
    action_probs_matrix = (top_verb_probs[:,:,np.newaxis] * top_noun_probs[:,np.newaxis,:])
    instance_count = action_probs_matrix.shape[0]
    action_ranks = action_probs_matrix.reshape(instance_count, -1).argsort(axis=-1)[:,::-1]
    verb_rans_idx, noun_ranks_idx = np.unravel_index(action_ranks[:,:top_n],shape=(action_probs_matrix[1:]))
    segments = np.arrange(0, instance_count).reshape(-1,1)
    
    return ((top_verbs[segments, verb_ranks_idx], top_nouns[segments, noun_ranks_idx]), 
            action_probs_matrix.reshape(instance_count,-1)[segments,action_ranks[:,:top_n]])

In [245]:
# labels: pd.DataFrame = pd.read_pickle('../datasets/epic/labels/EPIC_100_validation.pkl')
labels_path = '../datasets/epic/labels/'

unseen_participants = pd.read_csv(labels_path+'EPIC_100_unseen_participant_ids_test.csv', index_col='participant_id').index.values
tail_verb_classes = pd.read_csv(labels_path+'EPIC_100_tail_verbs.csv', index_col='verb').index.values
tail_noun_classes = pd.read_csv(labels_path+'EPIC_100_tail_nouns.csv', index_col='noun').index.values

result
(verbs, nouns), _scores = compute_scores(result['verb'].cpu(), result['noun'].cpu(), top_n=100)

ValueError: step must be greater than zero

In [82]:
def display_rgb(rgb_frames, fps=50):
    return ImageSequenceClip(rgb_frames, fps=fps)

In [None]:
def display_flow(flow_frames, fps=50):
    u_frames = flow_frames[::2]
    v_frames = flow_frames[1::2]
    
    def flow_to_clip(flow):
        # Convert optical flow magnitude to greyscale RGB
        return ImageSequenceClip(list(np.stack([flow] * 3, axis=-1)), fps=fps)
    
    u_clip = flow_to_clip(u_frames) 
    v_clip = flow_to_clip(v_frames) 
    return clips_array([[u_clip, v_clip]])

In [None]:
# clips_array([[display_rgb(rgb_frames), display_flow(flow_frames)]]).ipython_display()
display_rgb(rgb_frames).ipython_display()

In [164]:
rgb_meta

{'narration_id': 'P01_01_96',
 'participant_id': 'P01',
 'video_id': 'P01_01',
 'narration_timestamp': '00:07:46.520',
 'start_timestamp': '00:07:49.14',
 'stop_timestamp': '00:07:50.32',
 'start_frame': 28148,
 'stop_frame': 28219,
 'narration': 'take glass',
 'verb': 'take',
 'verb_class': 0,
 'noun': 'glass',
 'noun_class': 10,
 'all_nouns': ['glass'],
 'all_noun_classes': [10],
 'frame_size': [256, 456, 3],
 'num_frames': 72}

In [None]:
clip_id = 'P01_06_90'

In [None]:
rgb_frames, rgb_meta = rgb_train[clip_id]

In [None]:
flow_frames, flow_meta = flow_train[clip_id]

In [None]:
clips_array([[display_rgb(rgb_frames, fps=60), display_flow(flow_frames, fps=30)]]).ipython_display()

In [None]:
rgb_meta