In [109]:
import os
import glob
import pandas as pd
from tqdm.auto import tqdm
import time
import numpy as np
import numpy.ma as ma

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

from keras.models import Sequential
from keras import layers
from keras.optimizers import RMSprop


In [110]:
actions = ['1', '2', '3', '4-1', '4-2', '5', '6', '7']
subject = range(1,31)
headers = {
    'act': ['times', 'X', 'Y', 'Z'],
    'acw': ['times', 'X', 'Y', 'Z'],
    'dc': ['times'],
    'pm': ['times']
}
for i in range(1,513):
    headers['pm'].append(i)
for i in range(1,193):
    headers['dc'].append(i)
def get_subject_action(sensor, full_path):
    index = full_path.find(sensor)
    index += len(sensor)
    subject = int(full_path[index+1:index+3])
    action = int(full_path[index+4:index+6])
    if action == 4:
        if full_path.find(f"{sensor}_1") > 0:
            action = '4-1'
        else:
            action = '4-2'
        
    return (subject, str(action))

## load the act data

In [111]:
df_act = None
for index, folder in enumerate(glob.iglob(f"data\\act\\*")):
    for index_1, file in enumerate(os.listdir(folder)):
        full_path = f"{folder}\\{file}"
        df_tmp = pd.read_csv(full_path, names=headers['act'])
        subject, action = get_subject_action('act', full_path)
        df_tmp['subject'] = subject
        df_tmp['action'] = action
        if df_act is None:
            df_act = df_tmp
        else:
            df_act = pd.concat([df_act, df_tmp])
            
df_acw = None
for index, folder in enumerate(glob.iglob(f"data\\acw\\*")):
    for index_1, file in enumerate(os.listdir(folder)):
        full_path = f"{folder}\\{file}"
        df_tmp = pd.read_csv(full_path, names=headers['acw'])
        subject, action = get_subject_action('acw', full_path)
        df_tmp['subject'] = subject
        df_tmp['action'] = action
        if df_acw is None:
            df_acw = df_tmp
        else:
            df_acw = pd.concat([df_acw, df_tmp])

In [112]:
print(len(df_act))
print(len(df_acw))

1400856
1313695


In [113]:
%%time
formated_data = {}
formated_data['accerometor'] = np.empty(shape=(0,9))
for subject in df_act['subject'].unique():
    for action in df_act['action'].unique():
        df_wrist = df_acw[(df_acw['subject'] == subject) & (df_acw['action'] == action )]
        df_thigh = df_act[(df_act['subject'] == subject) & (df_act['action'] == action)]
        if(len(df_wrist) < len(df_thigh)):
            df_thigh = df_thigh.head(len(df_wrist))
        else:
            df_wrist = df_wrist.head(len(df_thigh))
        
        arr = df_wrist[['times','action','subject']].to_numpy()
        arr = np.insert(arr,arr.shape[1], df_wrist['X'].to_numpy(), axis=1)
        arr = np.insert(arr,arr.shape[1], df_wrist['Y'].to_numpy(), axis=1)
        arr = np.insert(arr,arr.shape[1], df_wrist['Z'].to_numpy(), axis=1)
        arr = np.insert(arr,arr.shape[1], df_thigh['X'].to_numpy(), axis=1)
        arr = np.insert(arr,arr.shape[1], df_thigh['Y'].to_numpy(), axis=1)
        arr = np.insert(arr,arr.shape[1], df_thigh['Z'].to_numpy(), axis=1)
        formated_data['accerometor'] = np.append(formated_data['accerometor'], arr, axis=0)


Wall time: 59.6 s


In [114]:
formated_data['accerometor']

array([['2018-11-08 11:34:51.474000', '1', 1, ..., -0.546875, -0.828125,
        -0.109375],
       ['2018-11-08 11:34:51.485000', '1', 1, ..., -0.53125, -0.828125,
        -0.109375],
       ['2018-11-08 11:34:51.495000', '1', 1, ..., -0.53125, -0.828125,
        -0.109375],
       ...,
       ['2019-03-26 16:45:51.696000', '7', 30, ..., -1.015625, 0.046875,
        -0.109375],
       ['2019-03-26 16:45:51.706000', '7', 30, ..., -1.015625, 0.015625,
        -0.109375],
       ['2019-03-26 16:45:51.717000', '7', 30, ..., -0.984375, 0.0,
        -0.09375]], dtype=object)

In [115]:
formated_data['accerometor'] = pd.DataFrame(data=formated_data['accerometor'], index=None, columns=
                                           ['times', 'action','subject','wrist_X','wrist_Y',
                                            'wirst_Z','thigh_X','thigh_Y','thigh_Z'])
formated_data['accerometor']

Unnamed: 0,times,action,subject,wrist_X,wrist_Y,wirst_Z,thigh_X,thigh_Y,thigh_Z
0,2018-11-08 11:34:51.474000,1,1,0.125,-0.046875,0.90625,-0.546875,-0.828125,-0.109375
1,2018-11-08 11:34:51.485000,1,1,0.109375,-0.0625,0.90625,-0.53125,-0.828125,-0.109375
2,2018-11-08 11:34:51.495000,1,1,0.109375,-0.0625,0.90625,-0.53125,-0.828125,-0.109375
3,2018-11-08 11:34:51.505000,1,1,0.125,-0.0625,0.90625,-0.53125,-0.84375,-0.109375
4,2018-11-08 11:34:51.516000,1,1,0.125,-0.0625,0.90625,-0.515625,-0.84375,-0.109375
...,...,...,...,...,...,...,...,...,...
1313690,2019-03-26 16:45:51.675000,7,30,-0.296875,-0.640625,0.59375,-1.0,0.015625,-0.09375
1313691,2019-03-26 16:45:51.685000,7,30,-0.296875,-0.640625,0.59375,-1.015625,0.046875,-0.140625
1313692,2019-03-26 16:45:51.696000,7,30,-0.3125,-0.640625,0.59375,-1.015625,0.046875,-0.109375
1313693,2019-03-26 16:45:51.706000,7,30,-0.3125,-0.625,0.59375,-1.015625,0.015625,-0.109375


In [121]:
%%time
df_dc = None
for index, folder in enumerate(glob.iglob(f"data\\dc\\*")):
    for index_1, file in enumerate(os.listdir(folder)):
        full_path = f"{folder}\\{file}"
        df_tmp = pd.read_csv(full_path, names=headers['dc'])
        subject, action = get_subject_action('dc', full_path)
        df_tmp['subject'] = subject
        df_tmp['action'] = action
        
        if df_dc is None:
            df_dc = df_tmp
        else:
            df_dc = pd.concat([df_dc, df_tmp])

Wall time: 14.8 s


In [123]:
image_headers_dc = range(1,193)
df_dc


Unnamed: 0,times,1,2,3,4,5,6,7,8,9,...,185,186,187,188,189,190,191,192,subject,action
0,2018-11-08 11:34:51.538000,0.4583,0.9214,0.9221,0.9167,0.9136,0.9136,0.9106,0.9075,0.9052,...,0.9429,0.6804,0.936,0.9360,0.9360,0.9329,0.9260,0.9279,1,1
1,2018-11-08 11:34:51.632000,0.4583,0.9214,0.9214,0.9167,0.9144,0.9136,0.9106,0.9075,0.9060,...,0.9429,0.9150,0.936,0.9360,0.9360,0.9352,0.9260,0.9279,1,1
2,2018-11-08 11:34:51.804000,0.4583,0.9214,0.9214,0.9167,0.9136,0.9121,0.9121,0.9075,0.9060,...,0.9429,0.9165,0.936,0.9360,0.9360,0.9329,0.9260,0.9260,1,1
3,2018-11-08 11:34:51.977000,0.4591,0.9214,0.9206,0.9167,0.9136,0.9128,0.9106,0.9075,0.9060,...,0.9429,0.9150,0.936,0.9360,0.9345,0.9329,0.9260,0.9260,1,1
4,2018-11-08 11:34:52.133000,0.4591,0.9214,0.9198,0.9167,0.9144,0.9136,0.9106,0.9075,0.9060,...,0.9429,0.6804,0.936,0.9360,0.9360,0.9329,0.9260,0.9260,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
620,2019-03-26 16:45:50.642000,0.4691,0.9352,0.9314,0.9284,0.9246,0.9216,0.9198,0.9171,0.9148,...,0.9488,0.9331,0.942,0.9382,0.9352,0.9333,0.9284,0.9274,30,7
621,2019-03-26 16:45:50.752000,0.4700,0.9352,0.9314,0.9284,0.9246,0.9216,0.9207,0.9171,0.9148,...,0.9488,0.9350,0.942,0.9382,0.9352,0.9352,0.9284,0.9255,30,7
622,2019-03-26 16:45:50.814000,0.4691,0.9352,0.9314,0.9284,0.9246,0.9216,0.9188,0.9171,0.9148,...,0.9478,0.9375,0.942,0.9382,0.9352,0.9352,0.9284,0.9265,30,7
623,2019-03-26 16:45:50.877000,0.4691,0.9352,0.9314,0.9284,0.9246,0.9224,0.9198,0.9171,0.9148,...,0.9488,0.4700,0.941,0.9382,0.9352,0.9343,0.9284,0.9255,30,7


In [124]:
%%time
df_pm = None
for index, folder in enumerate(glob.iglob(f"data\\pm\\*")):
    for index_1, file in enumerate(os.listdir(folder)):
        full_path = f"{folder}\\{file}"
        df_tmp = pd.read_csv(full_path, names=headers['pm'])
        subject, action = get_subject_action('pm', full_path)
        df_tmp['subject'] = subject
        df_tmp['action'] = action
        
        if df_pm is None:
            df_pm = df_tmp
        else:
            df_pm = pd.concat([df_pm, df_tmp])


Wall time: 45.2 s


In [125]:
df_pm.head()

Unnamed: 0,times,1,2,3,4,5,6,7,8,9,...,505,506,507,508,509,510,511,512,subject,action
0,2018-11-08 11:34:51.468000,20.0,3.0,2.0,0.0,0.0,0.0,72.0,1493.0,1949.0,...,68.0,77.0,55.0,193.0,387.0,331.0,125.0,6.0,1,1
1,2018-11-08 11:34:51.535000,20.0,3.0,2.0,0.0,0.0,0.0,72.0,1493.0,1949.0,...,58.0,78.0,53.0,192.0,388.0,330.0,123.0,6.0,1,1
2,2018-11-08 11:34:51.602000,20.0,3.0,2.0,0.0,0.0,0.0,72.0,1493.0,1949.0,...,64.0,78.0,53.0,195.0,390.0,330.0,119.0,7.0,1,1
3,2018-11-08 11:34:51.669000,20.0,3.0,2.0,0.0,0.0,0.0,72.0,1493.0,1949.0,...,66.0,79.0,55.0,196.0,391.0,324.0,106.0,5.0,1,1
4,2018-11-08 11:34:51.737000,20.0,3.0,2.0,0.0,0.0,0.0,72.0,1493.0,1949.0,...,64.0,79.0,55.0,194.0,391.0,321.0,114.0,6.0,1,1


In [126]:
#create the columns array 
columns_cameras = ['times', 'action','subject']
col_depth = []
col_mat = []
for i in range (1,193):
    col_depth.append('depth_' + str(i))
col_mat
for i in range (1,513):
    col_mat.append('mat_' + str(i))

In [127]:
df_pm

Unnamed: 0,times,1,2,3,4,5,6,7,8,9,...,505,506,507,508,509,510,511,512,subject,action
0,2018-11-08 11:34:51.468000,20.0,3.0,2.0,0.0,0.0,0.0,72.0,1493.0,1949.0,...,68.0,77.0,55.0,193.0,387.0,331.0,125.0,6.0,1,1
1,2018-11-08 11:34:51.535000,20.0,3.0,2.0,0.0,0.0,0.0,72.0,1493.0,1949.0,...,58.0,78.0,53.0,192.0,388.0,330.0,123.0,6.0,1,1
2,2018-11-08 11:34:51.602000,20.0,3.0,2.0,0.0,0.0,0.0,72.0,1493.0,1949.0,...,64.0,78.0,53.0,195.0,390.0,330.0,119.0,7.0,1,1
3,2018-11-08 11:34:51.669000,20.0,3.0,2.0,0.0,0.0,0.0,72.0,1493.0,1949.0,...,66.0,79.0,55.0,196.0,391.0,324.0,106.0,5.0,1,1
4,2018-11-08 11:34:51.737000,20.0,3.0,2.0,0.0,0.0,0.0,72.0,1493.0,1949.0,...,64.0,79.0,55.0,194.0,391.0,321.0,114.0,6.0,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
923,2019-03-26 16:45:51.454000,6.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,30,7
924,2019-03-26 16:45:51.522000,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,30,7
925,2019-03-26 16:45:51.589000,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,30,7
926,2019-03-26 16:45:51.656000,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,30,7


In [128]:
%%time
arr_pm = df_pm.to_numpy()
arr_dc = df_dc.to_numpy()
df_camera_formated = None
camera_data = np.empty(shape=(0,707))
for subject in df_pm['subject'].unique():
    for action in df_pm['action'].unique():
        arr_pm_tmp = arr_pm[arr_pm[:,arr_pm.shape[1] - 1] == action]
        arr_pm_tmp = arr_pm[arr_pm[:,arr_pm.shape[1] - 2] == subject]
        arr_dc_tmp = arr_dc[arr_dc[:,arr_dc.shape[1] - 1] == action]
        arr_dc_tmp = arr_dc[arr_dc[:,arr_dc.shape[1] - 2] == subject]
        

        if len(arr_pm_tmp) < len(arr_dc_tmp):
            arr_dc_tmp = arr_dc_tmp[range(0,len(arr_pm_tmp))]
        else:
            arr_pm_tmp = arr_pm_tmp[range(0,len(arr_dc_tmp))]
        arr_timestamps = arr_pm_tmp[:,0]
        arr_actions = arr_pm_tmp[:,arr_pm_tmp.shape[1] -1]
        arr_subjects = arr_pm_tmp[:,arr_pm_tmp.shape[1] -2]
        
        #drop the timestamps and actions and subjects
        arr_pm_tmp = arr_pm_tmp[:,range(1,513)]
        arr_dc_tmp = arr_dc_tmp[:,range(1,193)]
        
        print(arr_timestamps.shape)
        print(arr_actions.shape)
        print(arr_subjects.shape)
        print(arr_pm_tmp.shape)
        print(arr_dc_tmp.shape)of course 
        
        if df_camera_formated is None:
            df_times =  pd.DataFrame(data=arr_timestamps, index=None, columns=['times'])
            df_subjects =  pd.DataFrame(data=arr_subjects, index=None, columns=['subject'])
            df_actions =  pd.DataFrame(data=arr_actions, index=None, columns=['action'])
            df_pm_tmp =  pd.DataFrame(data=arr_pm_tmp, index=None, columns=col_mat)
            df_dc_tmp =  pd.DataFrame(data=arr_dc_tmp, index=None, columns=col_depth)
            df_camera_formated = pd.concat([df_times, df_subjects,df_actions,df_pm_tmp,df_dc_tmp], axis=1)
        else:
            df_times =  pd.DataFrame(data=arr_timestamps, index=None, columns=['times'])
            df_subjects =  pd.DataFrame(data=arr_subjects, index=None, columns=['subjecta'])
            df_actions =  pd.DataFrame(data=arr_actions, index=None, columns=['actiona'])
            df_pm_tmp =  pd.DataFrame(data=arr_pm_tmp, index=None, columns=col_mat)
            df_dc_tmp =  pd.DataFrame(data=arr_dc_tmp, index=None, columns=col_depth)
            df_temp = pd.concat([df_times, df_subjects,df_actions,df_pm_tmp,df_dc_tmp], axis=1)
            df_camera_formated = pd.concat([df_camera_formated, df_temp], axis=0)
        
        break
    break

(4970,)
(4970,)
(4970,)
(4970, 512)
(4970, 192)
Wall time: 4.44 s


In [136]:
df_camera_formated

Unnamed: 0,times,subject,action,mat_1,mat_2,mat_3,mat_4,mat_5,mat_6,mat_7,...,depth_183,depth_184,depth_185,depth_186,depth_187,depth_188,depth_189,depth_190,depth_191,depth_192
0,2018-11-08 11:34:51.468000,1,1,20.0,3.0,2.0,0.0,0.0,0.0,72.0,...,0.9479,0.946,0.9429,0.6804,0.936,0.936,0.936,0.9329,0.926,0.9279
1,2018-11-08 11:34:51.535000,1,1,20.0,3.0,2.0,0.0,0.0,0.0,72.0,...,0.9479,0.946,0.9429,0.915,0.936,0.936,0.936,0.9352,0.926,0.9279
2,2018-11-08 11:34:51.602000,1,1,20.0,3.0,2.0,0.0,0.0,0.0,72.0,...,0.947,0.9452,0.9429,0.9165,0.936,0.936,0.936,0.9329,0.926,0.926
3,2018-11-08 11:34:51.669000,1,1,20.0,3.0,2.0,0.0,0.0,0.0,72.0,...,0.9479,0.9445,0.9429,0.915,0.936,0.936,0.9345,0.9329,0.926,0.926
4,2018-11-08 11:34:51.737000,1,1,20.0,3.0,2.0,0.0,0.0,0.0,72.0,...,0.946,0.946,0.9429,0.6804,0.936,0.936,0.936,0.9329,0.926,0.926
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4965,2018-11-08 11:49:42.115000,1,5,0.0,0.0,352.0,0.0,433.0,0.0,0.0,...,0.9534,0.9534,0.9503,0.9474,0.9441,0.9433,0.9425,0.9433,0.9394,0.9386
4966,2018-11-08 11:49:42.183000,1,5,2.0,0.0,327.0,0.0,422.0,0.0,0.0,...,0.9534,0.9534,0.9503,0.9484,0.9441,0.9433,0.9433,0.9433,0.9402,0.9402
4967,2018-11-08 11:49:42.250000,1,5,2.0,0.0,299.0,0.0,397.0,0.0,0.0,...,0.9534,0.9534,0.9493,0.7108,0.9433,0.9433,0.9433,0.9433,0.9402,0.9402
4968,2018-11-08 11:49:42.318000,1,5,0.0,0.0,244.0,0.0,396.0,0.0,0.0,...,0.9534,0.9534,0.9503,0.9491,0.9448,0.9433,0.9433,0.9433,0.9394,0.9402


In [51]:
%%time
camera_data = np.empty(shape=(0,707))
for subject in df_pm['subject'].unique():
    df_depth_camera = df_dc[(df_pm['subject'] == subject)]
    df_pressure_mat = df_pm[(df_pm['subject'] == subject)]
    for action in df_pm['action'].unique():
        df_depth_camera_tmp = df_depth_camera[(df_depth_camera['action'] == action )]
        df_pressure_mat_tmp = df_pressure_mat[(df_pressure_mat['action'] == action)]
        
        if(len(df_depth_camera_tmp) < len(df_pressure_mat_tmp)):
            df_pressure_mat_tmp = df_pressure_mat_tmp.head(len(df_depth_camera_tmp))
        else:
            df_depth_camera_tmp = df_depth_camera_tmp.head(len(df_pressure_mat_tmp))
        
        arr = df_depth_camera_tmp[['times','action','subject']].to_numpy()
        #add the depthcamera columns 
        for i in range (1,193):
            arr = np.insert(arr,arr.shape[1], df_depth_camera_tmp[i].to_numpy(), axis=1)
    
        #add the pressure_mat columns 
        print(df_pressure_mat_tmp.head())
        for i in range (1,513):
            arr = np.insert(arr,arr.shape[1], df_pressure_mat_tmp[i].to_numpy(), axis=1)
        camera_data =  np.append(camera_data, arr, axis=0)
    break
break

  This is separate from the ipykernel package so we can avoid doing imports until


ValueError: cannot reindex from a duplicate axis

In [36]:
print(len(df_dc))
print(len(df_pm))

140694
202682


In [168]:
#create the columns array 
columns_cameras = ['times', 'action','subject']
for i in range (1,193):
    columns_cameras.append('depth_' + str(i))
for i in range (1,513):
    columns_cameras.append('mat_' + str(i))

In [169]:
pd_camera = pd.DataFrame(data=camera_data, index=None, columns=columns_cameras)

### Now we are going to combine the 15Hz data with the 100Hz data

The general strategy is to concatenate the 100Hz together in groups of 15Hz.  Then we will have 15Hz observations that have a series of 100Hz observations.

In [156]:
from datetime import datetime
pd_camera['times']
#pd_camera['times'] = pd.to_datetime(pd_camera['times'], infer_datetime_format=True)
pd_acc['times'] = pd.to_datetime(pd_acc['times'], infer_datetime_format=True)

In [175]:
for subject in pd_camera['subject'].unique():
    for action in pd_camera['action'].unique():
        print('here')
        pd_camera_temp = pd_camera[(pd_camera['subject'] == subject) & (pd_camera['action'] == action)]
        pd_acc_temp = pd_acc_temp[(pd_camera['subject'] == subject) & (pd_acc_temp['action'] == action)]
        print(len(pd_camera_temp), print(len(pd_acc_temp)))
        break
    break

In [None]:
)()