In [1]:
from gulpio2 import GulpDirectory
from pathlib import Path
from moviepy.editor import ImageSequenceClip, clips_array

from collections import defaultdict

import pickle
import pandas as pd
from pathlib import Path

import torch as t
import torch.nn as nn
import numpy as np
from omegaconf import OmegaConf
from typing import Any, Dict, List, Sequence, Union

from systems import EpicActionRecognitionSystem

from utils.metrics import compute_metrics
from utils.actions import action_id_from_verb_noun
from scipy.special import softmax

from GPUtil import showUtilization as gpu_usage
from tqdm import tqdm

In [2]:
# SETUP TORCH VARIABLES
device = t.device("cuda:0" if t.cuda.is_available() else "cpu")
dtype = t.float

In [3]:
def generate_model():
    # LOAD IN SAVED CHECKPOINT
    ckpt = t.load('../models/trn_rgb.ckpt', map_location='cpu')

    # CREATE CONFIG FROM CHECKPOINT
    cfg = OmegaConf.create(ckpt['hyper_parameters'])
    OmegaConf.set_struct(cfg, False)

    # SET GULP DIRECTORY
    cfg.data._root_gulp_dir = '/home/ts/C1-Action-Recognition-TSN-TRN-TSM/datasets/epic/gulp/rgb'

    # CREATE MODEL
    model = EpicActionRecognitionSystem(cfg)
    model.load_state_dict(ckpt['state_dict'])
    return model.model.to(device)

In [None]:
class PickleFeatureWriter(FeatureWriter):
    
    def __init__(self, pkl_path: Path, features_dim: int):
        self.pkl_path = pkl_path
        self.narration_ids = []
        self.features = []
        self.labels = []
        self.
        
    def:

In [4]:
class FeatureExtractor:
    
    def __init__(self, backbone_2d: nn.Module, device: t.device, dtype: t.float, frame_batch_size: int = 128):
        self.model = backbone_2d
        self.device = device
        self.dtype = dtype
        self.frame_batch_size = frame_batch_size
    
    def extract(self, dataset: GulpDirectory, feature_writer: Dict[str, Any]) -> int:
        total_instances = 0
        self.model.eval()
        for i, c in enumerate(tqdm(dataset)):
            if i == 0:
                for batch_input, batch_labels in c:
        
                    batch_input = np.array(batch_input).transpose(0,3,1,2)
                    batch_input = t.tensor(batch_input, device=self.device, dtype=self.dtype)
                    batch_input = batch_input.unsqueeze(0)

                    batch_size, n_frames = batch_input.shape[:2]
                    flattened_batch_input = batch_input.view((-1, *batch_input.shape[2:]))

                    n_chunks = int(np.ceil(len(flattened_batch_input)/128))
                    chunks = t.chunk(flattened_batch_input, n_chunks, dim=0)
                    flatten_batch_features = []
                    for chunk in chunks:
                        chunk = chunk.unsqueeze(0)
                        with t.no_grad():
                            chunk_features = self.model.features(chunk.to(self.device))
                            chunk_features = self.model.new_fc(chunk_features)
                            flatten_batch_features.append(chunk_features.squeeze(0))
                    flatten_batch_features = t.cat(flatten_batch_features, dim=0)
                    batch_features = flatten_batch_features.view((batch_size, 
                                                                  n_frames, 
                                                                  *flatten_batch_features.shape[1:]))

                    total_instances += batch_size
                    self._append(batch_features, batch_labels, batch_size, feature_writer)
        return total_instances

    def _append(self, batch_features, batch_labels, batch_size, feature_writer):
        self.end_frame += batch_labels['num_frames']
        feature_writer['features'].extend(batch_features.cpu().numpy())
        feature_writer['metadata'].extend(np.array((batch_labels['narration_id'],
                                                    batch_labels['narration'],
                                                    batch_labels['verb'],
                                                    batch_labels['verb_class'],
                                                    batch_labels['noun'],
                                                    batch_labels['noun_class'],
                                                    self.start_frame,
                                                    self.end_frame)))
        self.start_frame = self.end_frame
        print(batch_labels['narration_id'], "DONE")

In [5]:
rgb_train = GulpDirectory('../datasets/epic/gulp/rgb/rgb_test/')

features_and_meta = {
   'features': [],
   'metadata': []
}
# for i, c in enumerate(rgb_train):
#     if i == 0:
#         for frames, meta in tqdm(c):
#             xd = frames.shape
extractor = FeatureExtractor(generate_model(), device, dtype)
extractor.extract(rgb_train, features_and_meta)

Multi-Scale Temporal Relation Network Module in use ['8-frame relation', '7-frame relation', '6-frame relation', '5-frame relation', '4-frame relation', '3-frame relation', '2-frame relation']


0it [00:00, ?it/s]

P01_01_0 DONE
P01_01_1 DONE
P01_01_10 DONE
P01_01_100 DONE
P01_01_101 DONE
P01_01_102 DONE
P01_01_103 DONE
P01_01_104 DONE
P01_01_105 DONE
P01_01_106 DONE
P01_01_107 DONE
P01_01_108 DONE
P01_01_109 DONE
P01_01_11 DONE
P01_01_110 DONE
P01_01_111 DONE
P01_01_112 DONE
P01_01_113 DONE
P01_01_114 DONE
P01_01_115 DONE
P01_01_116 DONE
P01_01_117 DONE
P01_01_118 DONE
P01_01_12 DONE
P01_01_120 DONE
P01_01_121 DONE
P01_01_122 DONE
P01_01_123 DONE
P01_01_124 DONE
P01_01_125 DONE
P01_01_126 DONE
P01_01_127 DONE
P01_01_128 DONE
P01_01_129 DONE
P01_01_13 DONE
P01_01_130 DONE
P01_01_131 DONE
P01_01_132 DONE
P01_01_133 DONE
P01_01_134 DONE
P01_01_135 DONE
P01_01_136 DONE
P01_01_137 DONE
P01_01_138 DONE
P01_01_139 DONE
P01_01_14 DONE
P01_01_140 DONE
P01_01_141 DONE
P01_01_142 DONE
P01_01_143 DONE
P01_01_144 DONE
P01_01_145 DONE
P01_01_146 DONE
P01_01_147 DONE
P01_01_148 DONE
P01_01_149 DONE
P01_01_15 DONE
P01_01_150 DONE
P01_01_151 DONE
P01_01_152 DONE
P01_01_153 DONE
P01_01_154 DONE
P01_01_155 DONE
P0

4it [02:09, 32.40s/it] 

P01_01_189 DONE





100

In [None]:
with open('p01_01_chunk_1_features.pkl', 'wb') as f:
  pickle.dump(output, f)

In [13]:
features1 = features_and_meta['features']

import cProfile

def numpy_concat(x):
    np.concatenate(x)

cProfile.run('numpy_concat(features1)')

# allow = np.concatenate(features_and_meta['features'])

         7 function calls in 0.003 seconds

   Ordered by: standard name

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.000    0.000    0.003    0.003 <__array_function__ internals>:2(concatenate)
        1    0.000    0.000    0.003    0.003 <ipython-input-13-f1de2ff8162e>:5(numpy_concat)
        1    0.000    0.000    0.003    0.003 <string>:1(<module>)
        1    0.000    0.000    0.000    0.000 multiarray.py:143(concatenate)
        1    0.000    0.000    0.003    0.003 {built-in method builtins.exec}
        1    0.003    0.003    0.003    0.003 {built-in method numpy.core._multiarray_umath.implement_array_function}
        1    0.000    0.000    0.000    0.000 {method 'disable' of '_lsprof.Profiler' objects}




In [11]:

features0 = features_and_meta['metadata']

f_1 = features0[0:8]

start = f_1[7]
start
# rgb_train['P01_01_1'][1], 
allow[]

'195'