In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
import pickle
import pathlib
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from scipy.interpolate import interp1d
from tqdm import tqdm
from sklearn.preprocessing import StandardScaler

In [16]:
import sys
sys.path.append('../..')
import lared.accel.constants.constants as const
from jose.accel.preproc import interpolate
from lared_dataset.constants import (
    raw_accel_path,
    processed_accel_path
)

In [4]:
base_path = pathlib.Path(raw_accel_path)
MAPPING_FILE    = base_path / "mapping.csv"
MASTER_PICKLE_PATH = base_path / "master_data.pkl"
VALID_AUDIO_SEGMENTS_PATH = "../valid_audio_segments.pkl"

# Interpolation

In [5]:
balloon_pop_1_video_frame = 23030 # to 
balloon_pop_1_accel_frame = 45977 + 19/34

balloon_pop_2_video_frame = 74844
balloon_pop_2_accel_frame = 47706 + 23/28

balloon_pop_3_video_frame = 166836.5
balloon_pop_3_accel_frame = 50776 + 30.5/32

frame_to_accel = interp1d([balloon_pop_1_video_frame, balloon_pop_3_video_frame], [balloon_pop_1_accel_frame, balloon_pop_3_accel_frame], fill_value="extrapolate")
video_seconds_to_accel_sample = interp1d([balloon_pop_1_video_frame/29.97, balloon_pop_3_video_frame/29.97], [balloon_pop_1_accel_frame, balloon_pop_3_accel_frame], fill_value="extrapolate")

In [6]:
# load the array to map from PID to node ID
mapping_arr = np.loadtxt(MAPPING_FILE, delimiter=',',dtype=int)
keys = mapping_arr[:,1]
vals = mapping_arr[:,0]
mapping = dict(zip(keys,vals))

In [7]:
# load the accel stuff
master_df = pd.read_pickle(str(MASTER_PICKLE_PATH))

In [8]:
valid_seg = pickle.load(open(VALID_AUDIO_SEGMENTS_PATH,'rb'))
valid_seg = [el[1] for el in valid_seg]
pid_to_valid_seg = {el[0]: (video_seconds_to_accel_sample(el[1]/1000).item(), video_seconds_to_accel_sample(el[2]/1000).item()) for el in valid_seg}

# Extracting accel per subject
Mapping from pid to accel

In [9]:
def interpolate(accel):
    f = interp1d(accel[:, 0], accel[:, 1:], axis=0)

    if not np.all( np.diff(accel[:,0].squeeze()) >= 0 ):
        print('not in order')

    x = np.arange(accel[0, 0], accel[-2, 0], 0.05)

    try:
        fx = f(x)
    except Exception as ex:
        raise ex
    
    return np.hstack([x[:,None], fx])

In [11]:
grouped_df = master_df.groupby('Node')
subj_accel = dict()
subj_accel_interp = {}
for name, group in grouped_df:
    
    # Midge was not used in the data collection
    if name not in mapping:
        print('{:d} not in mapping'.format(name))
        continue
        
    pid = mapping[name]
    
    # There was no valid data for this Midge
    # Possibly because it was worn very shortly or
    # not during mingling
    if pid not in pid_to_valid_seg:
        print('{:d} not in pid_to_valid_seg'.format(pid))
        continue
    subj_valid_seg = pid_to_valid_seg[pid]
    
    # Filter device that failed during data collection
    if name in const.FAILED_ACCEL:
        print('failed accel for pid {:d}'.format(pid))
        continue
    
    # Chalcedony frames are 1s, with 20 samples each
    def assert_len(x):
        assert len(x) == 20
        
    frame_nums = group['Frame_No'].to_numpy()
    diffs = np.diff(frame_nums)
    first_idx = np.argmax(diffs) # find when the sync jump occurs
        
    group['x'].apply(assert_len)
    group['y'].apply(assert_len)
    group['z'].apply(assert_len)
    
    t = np.concatenate([np.arange(t, t+1, 0.05) for t in frame_nums])
    x = np.concatenate(group['x'].tolist())
    y = np.concatenate(group['y'].tolist())
    z = np.concatenate(group['z'].tolist())
        
    accel = np.hstack([t[:,None],x[:,None],y[:,None],z[:,None]])[(first_idx+1)*20:,:]
    accel = accel[(accel[:,0] > subj_valid_seg[0]) & (accel[:,0] < subj_valid_seg[1]), :] # filter out accel out of valid segment
    
    # Normalization per subject
    accel[:,1:] = StandardScaler().fit_transform(accel[:,1:])
    
    # Interpolation to fill in any holes in the data.
    # Should not change existing samples.
    interp_accel = interpolate(accel)
    
    subj_accel[pid] = accel
    subj_accel_interp[pid] = interp_accel

2 not in mapping
failed accel for pid 21
8 not in pid_to_valid_seg
failed accel for pid 9
16 not in mapping
18 not in mapping
23 not in mapping
26 not in mapping
29 not in mapping
39 not in pid_to_valid_seg
49 not in mapping
failed accel for pid 1
52 not in mapping
54 not in mapping
56 not in mapping
57 not in mapping
64 not in mapping
38 not in pid_to_valid_seg
73 not in mapping
76 not in mapping
78 not in mapping
40 not in pid_to_valid_seg
37 not in pid_to_valid_seg
84 not in mapping
91 not in mapping
36 not in pid_to_valid_seg
107 not in mapping


In [17]:
pickle.dump(subj_accel_interp, open(os.path.join(processed_accel_path, 'subj_accel_interp.pkl'), 'wb'))

In [13]:
subj_accel[10]

array([[ 4.52090000e+04, -1.24252884e-01, -2.28880847e+00,
        -2.26937107e+00],
       [ 4.52090500e+04, -1.42610644e-01, -2.28880847e+00,
        -2.08869005e+00],
       [ 4.52091000e+04, -1.42610644e-01, -2.00305177e+00,
        -2.08869005e+00],
       ...,
       [ 5.08208500e+04, -6.12569281e-01,  5.68758467e-01,
         6.32153493e-01],
       [ 5.08209000e+04, -8.47548600e-01,  8.54515161e-01,
         6.32153493e-01],
       [ 5.08209500e+04, -8.47548600e-01,  5.68758467e-01,
         4.50763923e-01]])

In [15]:
subj_accel_interp[10]

array([[ 4.52090000e+04, -1.24252884e-01, -2.28880847e+00,
        -2.26937107e+00],
       [ 4.52090500e+04, -1.42610644e-01, -2.28880847e+00,
        -2.08869005e+00],
       [ 4.52091000e+04, -1.42610644e-01, -2.00305177e+00,
        -2.08869005e+00],
       ...,
       [ 5.08208000e+04, -6.12569281e-01,  5.68758467e-01,
         4.50765108e-01],
       [ 5.08208500e+04, -6.12570816e-01,  5.68760334e-01,
         6.32153493e-01],
       [ 5.08209000e+04, -8.47548600e-01,  8.54513294e-01,
         6.32152308e-01]])