## Feature extraction by interpolation
* Get full keypoints by Videopose3D
* Interpolate to 100 time steps
* 951 non-duplicated videos
* Transform keypoints in ubiquitous scale (1920 * 1080 -> 1280 * 720)

In [None]:
%xmode plain
from google.colab import drive
drive.mount('/content/drive')

import os
import re
import pandas as pd
import numpy as np
from glob import glob

os.listdir('/content/drive/My Drive/IOT Classification Challenge/')

PATH = '/content/drive/My Drive/IOT Classification Challenge/combined_dataset/'
DATASET = PATH

path_features = '/content/drive/My Drive/IOT Classification Challenge/Featurized_dataset/'
feature_dirs = [y for x in os.walk(path_features) for y in glob(os.path.join(x[0], "*.mp4.npz"))]
assert len(feature_dirs) == 977

def extract_trajectories(keypoints, dim = 2, with_index = False):
    trajectory = []
    index = []
    for i, (_,k) in enumerate(keypoints):
        if len(k) != 0: # removes data where nothing is happening
            index.append(i)
            three_d_point = k[0,[0,1,3],:] 
            two_d_point = k[0,[0,1],:] # only x and y, no confidence
            if dim == 2:
                trajectory.append(two_d_point)
            elif dim == 3:
                trajectory.append(three_d_point)
    if len(trajectory) == 0:
        return [] # ignore vdieos w/o any trajectory extracted
    if with_index:
        return np.stack(trajectory), index
    else:
        return np.stack(trajectory)


def traj_interp(traj, n_frames=100): # interpolation
    n_samples = traj.shape[0]
    if n_samples == 0:
        raise ValueError("trajectories of length 0!!")
    result = np.empty((n_frames, 2, 17))
    traj = np.asarray(traj)
    dest_x = np.linspace(0, 100, n_frames)
    src_x = np.linspace(0, 100, n_samples)
    for i in range(2):
        for j in range(17):
            result[:, i, j] = np.interp(
                dest_x,
                src_x,
                traj[:, i, j]
            )
    return result.reshape(-1)

def get_full_feature_data(feature_dirs, transform=None, **kwargs):
    features = []
    labels = []
    video_id_list = []
    label_encoder = {'no_interaction': 0, 'open_close_fridge': 1,
                     'put_back_item': 2, 'screen_interaction': 3, 'take_out_item': 4}

    for path in feature_dirs:
        video_id = re.search('(?<=[0-9]\_)[0-9]+?\_[0-9](?=\.mp4|\s2\.mp4)', path).group(0)
        label = re.search('(?<=Featurized_dataset\/).+(?=\_[0-9]+\_[0-9]+\_.*?\.mp4\.npz)', path).group(0)
        d = np.load(path, allow_pickle=True)
        traj = extract_trajectories(d['keypoints'], with_index=False)
        if transform and len(traj) != 0:
            traj = transform(traj, **kwargs)
        if len(traj) != 0 and video_id not in video_id_list:
            video_id_list.append(video_id)
            labels.append(label_encoder.get(label, None))
            features.append(traj)

    return video_id_list, features, np.stack(labels)

video_id_list, feature_df, label_df = get_full_feature_data(feature_dirs, transform = traj_interp) # feature_dirs should be the npz list
feature_df = pd.DataFrame(feature_df, index = video_id_list) # (971,3400) -> remove duplication (951,3400)
label_df = pd.Series(label_df, index = video_id_list) # (971,) -> remove duplication (951,)

# rescale the key points according to resolutions
for i in range(feature_df.shape[0]):
    if np.max(feature_df.iloc[i,:]) > 1280 or np.max(feature_df.iloc[i,list(range(17,34))]) > 720:
        feature_df.iloc[i,:] = feature_df.iloc[i,:] / 1.5

# save result
feature_df.to_csv(
    "/content/drive/MyDrive/IOT Classification Challenge/feature_df_951.csv", 
    header = True
)
label_df.to_csv(
    "/content/drive/MyDrive/IOT Classification Challenge/label_df_951.csv", 
    header = True
)