In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
import os
import pickle
import pathlib
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from scipy.interpolate import interp1d
from tqdm import tqdm
from sklearn.preprocessing import StandardScaler

In [4]:
import sys
sys.path.append('../..')
import lared.accel.constants.constants as const
from lared.constants.laughter_constants import data_path, cloud_data_path
from jose.accel.preproc import interpolate
from lared_laughter.constants import datasets_path

In [5]:
cloud_data_path

'/mnt/c/Users/Jose/gdrive/data/lared_laughter'

In [6]:
DATA_ROOT = pathlib.Path("/mnt/e/data/lared")
CHALCEDONY_ROOT = DATA_ROOT / "accel"
MAPPING_FILE    = CHALCEDONY_ROOT / "mapping.csv"
MASTER_PICKLE_PATH = CHALCEDONY_ROOT / "master_data.pkl"
VALID_AUDIO_SEGMENTS_PATH = "./valid_audio_segments.pkl"

accel_ds_path = os.path.join(cloud_data_path, 'accel', 'accel_ds_human.pkl')

# Interpolation

In [7]:
balloon_pop_1_video_frame = 23030 # to 
balloon_pop_1_accel_frame = 45977 + 19/34

balloon_pop_2_video_frame = 74844
balloon_pop_2_accel_frame = 47706 + 23/28

balloon_pop_3_video_frame = 166836.5
balloon_pop_3_accel_frame = 50776 + 30.5/32

frame_to_accel = interp1d([balloon_pop_1_video_frame, balloon_pop_3_video_frame], [balloon_pop_1_accel_frame, balloon_pop_3_accel_frame], fill_value="extrapolate")
video_seconds_to_accel_sample = interp1d([balloon_pop_1_video_frame/29.97, balloon_pop_3_video_frame/29.97], [balloon_pop_1_accel_frame, balloon_pop_3_accel_frame], fill_value="extrapolate")

In [8]:
# load the array to map from PID to node ID
mapping_arr = np.loadtxt(MAPPING_FILE, delimiter=',',dtype=int)
keys = mapping_arr[:,1]
vals = mapping_arr[:,0]
mapping = dict(zip(keys,vals))

In [9]:
# load the accel stuff
master_df = pd.read_pickle(str(MASTER_PICKLE_PATH))

In [10]:
valid_seg = pickle.load(open(VALID_AUDIO_SEGMENTS_PATH,'rb'))
valid_seg = [el[1] for el in valid_seg]
pid_to_valid_seg = {el[0]: (video_seconds_to_accel_sample(el[1]/1000).item(), video_seconds_to_accel_sample(el[2]/1000).item()) for el in valid_seg}

# Extracting accel per subject
Mapping from pid to accel

In [11]:
def interpolate(accel):
    f = interp1d(accel[:, 0], accel[:, 1:], axis=0)

    if not np.all( np.diff(accel[:,0].squeeze()) >= 0 ):
        print('not in order')

    x = np.arange(accel[0, 0], accel[-2, 0], 0.05)

    try:
        fx = f(x)
    except Exception as ex:
        raise ex
    
    return np.hstack([x[:,None], fx])

In [12]:
grouped_df = master_df.groupby('Node')
subj_accel = dict()
subj_accel_interp = {}
for name, group in grouped_df:
    
    if name not in mapping:
        print('{:d} not in mapping'.format(name))
        continue
        
    pid = mapping[name]
    
    if pid not in pid_to_valid_seg:
        # print('{:d} not in pid_to_valid_seg'.format(pid))
        continue
    subj_valid_seg = pid_to_valid_seg[pid]
    
    if name in const.FAILED_ACCEL:
        # print('failed accel for pid {:d}'.format(pid))
        continue
    
    def assert_len(x):
        assert len(x) == 20
        
    frame_nums = group['Frame_No'].to_numpy()
    diffs = np.diff(frame_nums)
    first_idx = np.argmax(diffs) # find when the sync jump occurs
        
    group['x'].apply(assert_len)
    group['y'].apply(assert_len)
    group['z'].apply(assert_len)
    
    t = np.concatenate([np.arange(t, t+1, 0.05) for t in frame_nums])
    x = np.concatenate(group['x'].tolist())
    y = np.concatenate(group['y'].tolist())
    z = np.concatenate(group['z'].tolist())
        
    accel = np.hstack([t[:,None],x[:,None],y[:,None],z[:,None]])[(first_idx+1)*20:,:]
    accel = accel[(accel[:,0] > subj_valid_seg[0]) & (accel[:,0] < subj_valid_seg[1]), :] # filter out accel out of valid segment
    
    # Normalization per subject
    accel[:,1:] = StandardScaler().fit_transform(accel[:,1:])
    
    interp_accel = interpolate(accel)
    subj_accel[pid] = accel
    subj_accel_interp[pid] = interp_accel

2 not in mapping
16 not in mapping
18 not in mapping
23 not in mapping
26 not in mapping
29 not in mapping
49 not in mapping
52 not in mapping
54 not in mapping
56 not in mapping
57 not in mapping
64 not in mapping
73 not in mapping
76 not in mapping
78 not in mapping
84 not in mapping
91 not in mapping
107 not in mapping


# Extracting accel for examples

In [13]:
examples = pd.read_csv('../annotation/analysis/annotation_results.csv')
examples = examples.groupby('hash').agg({
    'hash': 'first',
    'cam': 'first',
    'person': 'first',
    'ini_time': 'first',
    'end_time': 'first',
    '_ini_time': 'first',
    '_end_time': 'first',
})

In [14]:
examples[['_ini_time', '_end_time']].head()

Unnamed: 0_level_0,_ini_time,_end_time
hash,Unnamed: 1_level_1,Unnamed: 2_level_1
002043005fa746f076c845dc38dd1bd97327bde09e17fcb953102c7ff0b277ac,7960.60239,7968.53386
006f74addfc99845bf6c9f80d13d52ccc189341031525530762bb83dd8b713af,8287.730008,8295.177193
0070b1ba6e8de828aea5c6d3b9d1c3662959fb5f032a679cac78e5ca98fcb1ff,1994.302349,2002.112154
00bcb7cfe796c64cf5fa4195248553f4a5937897650cee2e2bc2d80fba782667,3697.597087,3704.53656
0160da1890b3fc4a923586e460bb781ada3bc56ea9908aea299ce3828a629c42,3526.003181,3532.905657


In [15]:
def extract_accel(ex, ini_key = '_ini_time', end_key='_end_time', delta=1):
    correction = -14 # accel error, obtained from visual check
    
    cam2_correction = 0
    if ex['cam'] == 2:
        cam2_correction = 5 # cam 2 is ahead by 0.25s (5 samples @ 20Hz)
    
    if ex['person'] not in subj_accel_interp:
        return None
        
    my_subj_accel = subj_accel_interp[ex['person']]
    
    accel_ini = video_seconds_to_accel_sample(ex[ini_key])
    accel_fin = video_seconds_to_accel_sample(ex[end_key])

    # number of the first accel sample
    ini_idx = np.argmax(my_subj_accel[:,0] > accel_ini)
    fin_idx = np.argmax(my_subj_accel[:,0] > accel_fin) + delta
    if ini_idx == 0:
        print('out of bounds. pid={:d}, accel_ini={:.2f}'.format(ex['person'], accel_ini))
    
    ini_idx += (correction + cam2_correction)
    fin_idx += (correction + cam2_correction)

    accel = my_subj_accel[ini_idx: fin_idx, 1:]
    return accel

In [16]:
def get_all_accel(ini_key = '_ini_time', end_key='_end_time'):
    all_accel = dict()
    for index, ex in tqdm(examples.iterrows()):
        ex = ex.to_dict()
        accel = extract_accel(ex, ini_key, end_key)
        if accel is not None:
            all_accel[ex['hash']] = accel
    return all_accel

In [17]:
accel_long = get_all_accel('_ini_time', '_end_time')
pickle.dump(accel_long, open(os.path.join(datasets_path, 'tight', 'accel_long.pkl'), 'wb'))

510it [00:00, 3074.82it/s]


In [18]:
accel_short = get_all_accel('ini_time', 'end_time')
pickle.dump(accel_short, open(os.path.join(datasets_path, 'tight', 'accel_short.pkl'), 'wb'))

510it [00:00, 3003.60it/s]
