In [19]:
import pandas as pd
import os
from datetime import datetime
import utils.data_utils as data_utils
from collections import defaultdict

LONG_TERM_ACTIONS = ['walking', 'eating', 'smoking', 'discussion']
ACTIONS = data_utils.define_actions('all')
ACTIONS.sort(key=len)

In [24]:
def csv_iter():
    for f in os.listdir('./checkpoint/test'):
        if not f.startswith('short_term'):
            continue
        try:
            datetime.strptime(f[-23:-4], "%d-%m-%Y-%H:%M:%S")
            if f.split('.')[-1] == 'csv':
                day = int(f[-23:-21])
                if day > 20:
                    yield f
        except:
            pass

def extend_df(df, long_term=True):
    if long_term:
        actions = LONG_TERM_ACTIONS
        time_lens = [560, 1000]
    else:
        actions = ACTIONS
        time_lens = [80, 160, 320, 400]
    columns_mapping = defaultdict(list)
    for time_len in time_lens:
        for action_name in actions:
            columns_mapping['3d' + str(time_len)].append(action_name + '3d' + str(time_len))
            columns_mapping['usingfulltestset_3d' + str(time_len)].append('usingfulltestset_' + action_name + '3d' + str(time_len))

    for key in columns_mapping:
        df[key] = df[columns_mapping[key]].mean(axis=1)
    
    return list(columns_mapping.keys())

def get_summary_df(merged):
    cols = list(set([i[:-5] for i in merged.columns]))
    new_cols = []
    for col in cols:
        col_name = [c for c in merged.columns if c.startswith(col)]
        new_cols.append(col+'_mean')
        new_cols.append(col+'_std')
        merged[col+'_mean'] = merged[col_name].mean(axis=1)
        merged[col+'_std'] = merged[col_name].std(axis=1)
    return merged[new_cols]

In [28]:
df_lst = []
mini_df_lst = []
for i, f in enumerate(csv_iter()):
    print(i, f)
    df = pd.read_csv(os.path.join('checkpoint/test', "./", f))
    df = df.sort_values(by='v_3d')
    df_lst.append(df[extend_df(df, long_term=False)].add_suffix('_run' + str(i)))
    mini_df_lst.append(df[extend_df(df, long_term=False)].head(1).add_suffix('_run' + str(i)).reset_index(drop=True))

merged = pd.concat(mini_df_lst, axis=1)

0 short_termmain_3d_3D_in10_out10_dct_n_30_27-06-2020-05:14:06.csv
1 short_termmain_3d_3D_in10_out10_dct_n_30_28-06-2020-15:51:56.csv
2 short_termmain_3d_3D_in10_out10_dct_n_30_26-06-2020-17:51:42.csv
3 short_termmain_3d_3D_in10_out10_dct_n_30_28-06-2020-04:30:52.csv
4 short_termmain_3d_3D_in10_out10_dct_n_30_27-06-2020-17:10:16.csv


In [29]:
tmp = get_summary_df(merged)
tmp[[i for i in tmp.columns if not i.startswith('using')]]

Unnamed: 0,3d320_mean,3d320_std,3d400_mean,3d400_std,3d80_mean,3d80_std,3d160_mean,3d160_std
0,51.481868,0.420609,62.144715,0.514755,11.972224,0.119307,25.211424,0.224522


In [18]:
mini_df_lst[0]

Unnamed: 0,3d560_run0,usingfulltestset_3d560_run0,3d1000_run0,usingfulltestset_3d1000_run0
0,48.846711,59.58333,67.447821,80.191269


In [19]:
mini_df_lst[1]

Unnamed: 0,3d560_run1,usingfulltestset_3d560_run1,3d1000_run1,usingfulltestset_3d1000_run1
0,49.677015,59.760531,69.034007,80.295964


In [20]:
mini_df_lst[2]

Unnamed: 0,3d560_run2,usingfulltestset_3d560_run2,3d1000_run2,usingfulltestset_3d1000_run2
0,49.96481,59.909003,67.859545,79.994567


In [21]:
mini_df_lst[3]

Unnamed: 0,3d560_run3,usingfulltestset_3d560_run3,3d1000_run3,usingfulltestset_3d1000_run3
0,51.103317,60.097925,71.083157,80.568536


In [22]:
mini_df_lst[4]

Unnamed: 0,3d560_run4,usingfulltestset_3d560_run4,3d1000_run4,usingfulltestset_3d1000_run4
0,48.528764,60.29383,67.361385,80.856485
