# Merge Behavioral Ground Truth with Accelerometer Features

## Setup

In [26]:
import pandas as pd
import numpy as np
import os

## Merge

**1. Get list of obs + id**

In [21]:
log = pd.read_csv("do_log_final.csv")
id_obs = log["id"].astype(str) + "_" + log["do"].astype(str)
id_obs

0     102_1
1     102_2
2     116_1
3     116_2
4     117_1
5     117_2
6     122_1
7     122_2
8     124_1
9     124_2
10    126_1
11    126_2
12    127_1
13    127_2
14    128_1
15    128_2
16    129_1
17    129_2
18    130_1
19    130_2
20    131_1
21    131_2
22    132_1
23    132_2
24    133_1
25    133_2
26    134_1
27    134_2
28    135_1
29    135_2
30    136_1
31    136_2
32    138_1
33    138_2
34    139_1
35    139_2
36    140_1
37    140_2
38    141_1
39    141_2
40    143_1
41    143_2
42    144_1
43    144_2
44    150_1
45    150_2
46    154_1
47    154_2
dtype: object

**2. Get ground truth**

In [10]:
gt = pd.read_csv("merged_groundtruth_secbysec.csv", low_memory=False)
gt

Unnamed: 0,id,observation,date,date_time,activity_type,broad_activity_type,work_type,posture,sedentary_not,walking_not,activity_intensity,quality,step
0,102,1,2019-07-24,2019-07-24 08:20:19,WRK- general,work_education,SP- Education and Health Services,stand,not_sedentary,not_walking,light,Codable,0
1,102,1,2019-07-24,2019-07-24 08:20:20,WRK- general,work_education,SP- Education and Health Services,stand,not_sedentary,not_walking,light,Codable,0
2,102,1,2019-07-24,2019-07-24 08:20:21,WRK- general,work_education,SP- Education and Health Services,stand,not_sedentary,not_walking,light,Codable,0
3,102,1,2019-07-24,2019-07-24 08:20:22,WRK- general,work_education,SP- Education and Health Services,stand,not_sedentary,not_walking,light,Codable,0
4,102,1,2019-07-24,2019-07-24 08:20:23,WRK- general,work_education,SP- Education and Health Services,stand,not_sedentary,not_walking,light,Codable,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
414579,154,2,2020-02-23,2020-02-23 15:55:56,"EDU- taking class, research, homework",work_education,,sitting,sedentary,not_walking,sedentary,Codable,0
414580,154,2,2020-02-23,2020-02-23 15:55:57,"EDU- taking class, research, homework",work_education,,sitting,sedentary,not_walking,sedentary,Codable,0
414581,154,2,2020-02-23,2020-02-23 15:55:58,"EDU- taking class, research, homework",work_education,,sitting,sedentary,not_walking,sedentary,Codable,0
414582,154,2,2020-02-23,2020-02-23 15:55:59,"EDU- taking class, research, homework",work_education,,sitting,sedentary,not_walking,sedentary,Codable,0


**3. Create merged datasets destination directory**

In [None]:
folder_path = "classifier_trainsets"
if not os.path.exists(folder_path):
    os.makedirs(folder_path)

**4. Merge and output**

NOTE: No step ground truth, this training set to be used for classifier only. Will add additional code to output ground truth with steps for peak detection

In [31]:
merged_final = pd.DataFrame()
for obs in id_obs:
    try:
        act24_obs_path = "ACT24_" + obs + ".csv"
        act24_obs = pd.read_csv("process_sessions_output/" + act24_obs_path)
        act24_obs["id"] = [act24_obs_path.split("_")[1]] * len(act24_obs)
        act24_obs["id"] = act24_obs["id"].astype(int)
        # get session from file name
        act24_obs["observation"] = [int(act24_obs_path.split(".")[0][-1])] * len(act24_obs)
        # get only the HH:MM:SS in accelerometer to merge on date_time
        act24_obs["date_time"] = act24_obs["time"].str.split(".").str[0]
        
        merged_obs = pd.merge(act24_obs, gt, on = ["id", "observation", "date_time"])[["id", "observation", "time", "date_time", "date", "activity_type", "broad_activity_type", "work_type", "posture", "sedentary_not", "walking_not", "activity_intensity", "quality", "x", "y", "z"]]
        
        merged_obs.to_csv("classifier_trainsets/ACT24_" + obs + "_CTRAIN.csv", index=False)
        print("File output:    " + "classifier_trainsets/ACT24_" + obs + "_CTRAIN.csv")
    except FileNotFoundError:
        print("File not found: " + "ACT24_" + obs + ".csv")

File not found: ACT24_102_1.csv
File not found: ACT24_102_2.csv
File output:    classifier_trainsets/ACT24_116_1_CTRAIN.csv
File output:    classifier_trainsets/ACT24_116_2_CTRAIN.csv
File output:    classifier_trainsets/ACT24_117_1_CTRAIN.csv
File output:    classifier_trainsets/ACT24_117_2_CTRAIN.csv
File output:    classifier_trainsets/ACT24_122_1_CTRAIN.csv
File output:    classifier_trainsets/ACT24_122_2_CTRAIN.csv
File output:    classifier_trainsets/ACT24_124_1_CTRAIN.csv
File output:    classifier_trainsets/ACT24_124_2_CTRAIN.csv
File output:    classifier_trainsets/ACT24_126_1_CTRAIN.csv
File output:    classifier_trainsets/ACT24_126_2_CTRAIN.csv
File output:    classifier_trainsets/ACT24_127_1_CTRAIN.csv
File output:    classifier_trainsets/ACT24_127_2_CTRAIN.csv
File output:    classifier_trainsets/ACT24_128_1_CTRAIN.csv
File output:    classifier_trainsets/ACT24_128_2_CTRAIN.csv
File output:    classifier_trainsets/ACT24_129_1_CTRAIN.csv
File output:    classifier_trainsets