### extract time steps 5 seconds before the robot started driving to participants and 5 seconds before the robot started reaching out its arm to capture the "start gesture" for OTP classification

In [None]:
# imports
import pandas as pd
import numpy as np
from itertools import product

In [None]:
# extract key time segments
def keytime(df):
    # find when df['base status'] changes
    current_base = df['base status'].values[1:]
    previous_base = df['base status'].values[:-1]
    base_change = current_base != previous_base
    # find when df['arm status'] changes
    current_arm = df['arm status'].values[1:]
    previous_arm = df['arm status'].values[:-1]
    arm_change = current_arm != previous_arm

    # robot moving to participant (start the episode)
    to_participant = current_base == 'TO PARTICIPANT'
    # robot reaching out arm (start the object exchange)
    arm_reaching = current_arm == 'REACHING'
    # only look at object exchange at participant side
    current_status = df['status'].values[1:]
    p_handover = current_status != 'OPERATOR HANDOVER'

    # get data 5s before the key changes in base and arm status
    buffer = 50
    ends_base = np.where(base_change & to_participant)[0]
    starts_base = ends_base - buffer
    ends_arm = np.where(arm_change & arm_reaching & p_handover)[0]
    starts_arm = ends_arm - buffer

    idx_base = np.array([list(range(start_base,end_base)) for start_base, end_base in zip(starts_base, ends_base)]).flatten()
    idx_arm = np.array([list(range(start_arm,end_arm)) for start_arm, end_arm in zip(starts_arm, ends_arm)]).flatten()
    idx = np.concatenate((idx_base,idx_arm))
    idx.sort(kind='mergesort')
    
    return idx

In [None]:
# data type of columns
int_cols = ['episode']
cat_cols = ['status', 'handover quality', 'handover type', 'arm status', 'base status', 'handover status']

# load raw data
participants = ['p1_2022-07-25', 'p2_2022-08-03', 'p3_2022-08-04', 'p4_2022-08-10', 'p5_2022-08-12', 
                'p6_2022-08-15', 'p7_2022-08-15', 'p8_2022-08-19', 'p9_2022-08-22', 'p10_2022-08-26', 
                'p11_2022-08-29', 'p12_2022-08-29', 'p13_2022-08-29', 'p14_2022-08-31', 'p15_2022-09-02', 
                'p16_2022-09-05', 'p17_2022-09-06', 'p18_2022-09-07', 'p19_2022-09-08', 'p20_2022-09-09']

for participant in participants:
    raw_data = 'data/ML/' + participant

    # read csv into dataframe
    df = pd.read_csv((raw_data + '.csv'), header = 0)
    # specific data type of columns
    for column in df:
        if column in int_cols:
            df[column] = df[column].astype('int')
        elif column in cat_cols:
            df[column] = df[column].astype('category')
        else:
            df[column] = df[column].astype('float')

    # fill missing values in the manually entered handover quality and type columns
    df['handover quality'] = df['handover quality'].cat.add_categories('NEUTRAL')
    df['handover quality'].fillna('NEUTRAL', inplace =True)
    #df['handover type'] = df['handover type'].cat.add_categories('NEITHER')
    #df['handover type'].fillna('NEITHER', inplace =True) 
    df['handover type'].fillna('ROBOT TO HUMAN', inplace =True)
    
    # extract key segments
    idx = keytime(df)
    # save as new data files
    df.iloc[idx].to_csv((raw_data + '_keytime.csv'), index=False)
    
    print(f'Finished processing participant: {participant}')


In [None]:
# create concatenated files for 5-fold cv and train-dev segmentation
def combcsv(segment, cv, mode):
    # list of processed df 
    list_comb = []
    # name of combined dataset file
    combined_csv = 'data/ML/combined_' + mode +'_' + cv + '.csv'
    # concatenate
    for participant in segment:
        proc_data_each = 'data/ML/' + participant + '_' + mode + '.csv'
        df_each = pd.read_csv(proc_data_each, header = 0)
        list_comb.append(df_each)
        print('Read processed data:', participant)
    print('Concatenating...')
    df_comb = pd.concat(list_comb)
    print('Saving concatenated dataset...')
    df_comb.to_csv(combined_csv, index=False)

In [None]:
# tranining and development sets for parameter grid search
ML_trn = ['p2_2022-08-03', 'p3_2022-08-04', 'p4_2022-08-10', 
          'p6_2022-08-15', 'p7_2022-08-15', 'p8_2022-08-19', 'p10_2022-08-26', 
          'p11_2022-08-29', 'p12_2022-08-29', 'p14_2022-08-31', 'p15_2022-09-02', 
          'p16_2022-09-05', 'p18_2022-09-07', 'p19_2022-09-08', 'p20_2022-09-09']
ML_dev = ['p1_2022-07-25', 'p5_2022-08-12', 'p9_2022-08-22', 'p13_2022-08-29', 'p17_2022-09-06']

# for 5-fold cross-validation experiments
cv_1 = participants[0:4]
cv_2 = participants[4:8]
cv_3 = participants[8:12]
cv_4 = participants[12:16]
cv_5 = participants[16:]
ML_trn_cv_1 = cv_2 + cv_3 + cv_4 + cv_5
ML_tst_cv_1 = cv_1
ML_trn_cv_2 = cv_1 + cv_3 + cv_4 + cv_5
ML_tst_cv_2 = cv_2
ML_trn_cv_3 = cv_1 + cv_2 + cv_4 + cv_5
ML_tst_cv_3 = cv_3
ML_trn_cv_4 = cv_1 + cv_2 + cv_3 + cv_5
ML_tst_cv_4 = cv_4
ML_trn_cv_5 = cv_1 + cv_2 + cv_3 + cv_4
ML_tst_cv_5 = cv_5
list_comb_agg = []
list_comb_pad = []

mode_list = ['keytime']
cv_list = ['ML_trn','ML_dev','ML_trn_cv_1','ML_tst_cv_1','ML_trn_cv_2','ML_tst_cv_2',
           'ML_trn_cv_3','ML_tst_cv_3','ML_trn_cv_4','ML_tst_cv_4','ML_trn_cv_5','ML_tst_cv_5']

# create different data segments
for cv in cv_list:
    for mode in mode_list:
        combcsv(globals()[cv], cv, mode)