In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from src.extraction import write_channel_csvs, parse_folder_to_channel_dfs

In [3]:
import os

ROOT_DATA_FOLDER = "./data"
ROOT_FOLDER_OLDER_ADULTS = os.path.join(ROOT_DATA_FOLDER, "older_adults")
ROOT_ORIGINAL = os.path.join(ROOT_FOLDER_OLDER_ADULTS, "original_files")
SELF_REPORT_FOLDER = os.path.join(ROOT_ORIGINAL, "self_report")
EMPATICA_EMBRACE_PLUS_FOLDER = os.path.join(ROOT_ORIGINAL, "physiological_signal/00{subject_id}-3YK9K1J2D2/raw_data/v6")
EXTRACTION_OUTPUT_DIR = os.path.join(ROOT_FOLDER_OLDER_ADULTS, "extraction_from_original_files")
PROCESSED_DATA_OUTPUT_DIR = os.path.join(ROOT_FOLDER_OLDER_ADULTS, "processed_data")

In [4]:
for subject_id in range(1, 8):
    write_channel_csvs(
        #parse_folder_to_channel_dfs(f'./{ROOT_FOLDER_OLDER_ADULTS}/original_files/physiological_signal/00{subject_id}-3YK9K1J2D2/raw_data/v6'), 
        parse_folder_to_channel_dfs(EMPATICA_EMBRACE_PLUS_FOLDER.format(subject_id=subject_id)), 
        out_dir= os.path.join(EXTRACTION_OUTPUT_DIR, f"00{subject_id}"),#f'./{ROOT_FOLDER_OLDER_ADULTS}/extraction_from_original_files/00{subject_id}', 
        prefix=f'00{subject_id}')

[WARN] No data for accelerometer
[OK] wrote accelerometer: ./data/older_adults/extraction_from_original_files/001/001_accelerometer.csv (0 rows)
[OK] wrote eda: ./data/older_adults/extraction_from_original_files/001/001_eda.csv (7328 rows)
[OK] wrote temperature: ./data/older_adults/extraction_from_original_files/001/001_temperature.csv (1832 rows)
[OK] wrote bvp: ./data/older_adults/extraction_from_original_files/001/001_bvp.csv (117120 rows)
[OK] wrote systolicPeaks: ./data/older_adults/extraction_from_original_files/001/001_systolicPeaks.csv (2119 rows)
[WARN] No data for accelerometer
[OK] wrote accelerometer: ./data/older_adults/extraction_from_original_files/002/002_accelerometer.csv (0 rows)
[OK] wrote eda: ./data/older_adults/extraction_from_original_files/002/002_eda.csv (7052 rows)
[OK] wrote temperature: ./data/older_adults/extraction_from_original_files/002/002_temperature.csv (1763 rows)
[OK] wrote bvp: ./data/older_adults/extraction_from_original_files/002/002_bvp.csv (11

In [5]:
from src.preprocessing import MergeConfig, process_all_subjects, SCIPY_OK

cfg = MergeConfig(
    # all are default parameter values; explicitly specified here for readability
    #   for a complete list of default configurations, see the definition of class `MergeConfig`
    gap_threshold_s=3.0,
    map_method="snap",
    map_snap_kind = "one_to_one",   # "one_to_one" or "per_grid"
    map_interp_kind="linear",
    # band pass filter frequency of inferred heart rate (HR)
    hr_bp_low_hz=0,
    hr_bp_high_hz=4,
    # interpolation parameters of HR
    hr_target="1hz",
    hr_interp_kind="cubic",
)


if (cfg.map_method == "interp" or cfg.hr_interp_kind in ("quadratic", "cubic")) and not SCIPY_OK:
    print("[WARN] SciPy not available: quadratic/cubic will fall back to linear; bandpass disabled.")

process_all_subjects(
    root=EXTRACTION_OUTPUT_DIR, 
    combined_out_dir=os.path.join(PROCESSED_DATA_OUTPUT_DIR, "combined"), 
    cfg=cfg)

[SUBJECT 001] -> ./data/older_adults/processed_data/combined/001_merged_64hz.csv
[SUBJECT 002] -> ./data/older_adults/processed_data/combined/002_merged_64hz.csv
[SUBJECT 003] -> ./data/older_adults/processed_data/combined/003_merged_64hz.csv
[SUBJECT 004] -> ./data/older_adults/processed_data/combined/004_merged_64hz.csv
[SUBJECT 005] -> ./data/older_adults/processed_data/combined/005_merged_64hz.csv
[SUBJECT 006] -> ./data/older_adults/processed_data/combined/006_merged_64hz.csv
[SUBJECT 007] -> ./data/older_adults/processed_data/combined/007_merged_64hz.csv


In [6]:
from src.merge import batch_join

batch_join(
    merged_dir=os.path.join(PROCESSED_DATA_OUTPUT_DIR, "combined"), 
    self_report_dir=SELF_REPORT_FOLDER, 
    out_dir=os.path.join(PROCESSED_DATA_OUTPUT_DIR, "with_self_report"),
    time_window_csv=os.path.join(ROOT_FOLDER_OLDER_ADULTS, 'experiment_time.csv'),
    merged_glob = "*_merged_*hz.csv",
    max_snap_s=cfg.pain_max_snap_s
)

[OK] 001: ./data/older_adults/processed_data/combined/001_merged_64hz.csv + ./data/older_adults/original_files/self_report/001.csv -> ./data/older_adults/processed_data/with_self_report/001_merged_64hz_with_self_report.csv
[OK] 002: ./data/older_adults/processed_data/combined/002_merged_64hz.csv + ./data/older_adults/original_files/self_report/002.csv -> ./data/older_adults/processed_data/with_self_report/002_merged_64hz_with_self_report.csv
[OK] 003: ./data/older_adults/processed_data/combined/003_merged_64hz.csv + ./data/older_adults/original_files/self_report/003.csv -> ./data/older_adults/processed_data/with_self_report/003_merged_64hz_with_self_report.csv
[OK] 004: ./data/older_adults/processed_data/combined/004_merged_64hz.csv + ./data/older_adults/original_files/self_report/004.csv -> ./data/older_adults/processed_data/with_self_report/004_merged_64hz_with_self_report.csv
[OK] 005: ./data/older_adults/processed_data/combined/005_merged_64hz.csv + ./data/older_adults/original_fil

In [10]:
import os
import pandas as pd
from pathlib import Path

directory_path = Path(os.path.join(ROOT_FOLDER_OLDER_ADULTS, 'processed_data/with_self_report'))
df = pd.DataFrame()

for file_path in sorted(directory_path.glob('*.csv')):
    if file_path.is_file():
        print(file_path)
        subject_df = pd.read_csv(file_path)
        subject_id = os.path.split(file_path)[-1].split('_')[0]
        subject_df['subject'] = subject_id
        df = pd.concat([df, subject_df], ignore_index=True)

# save all subjects' data to a single .feather file
df.to_feather(os.path.join(ROOT_FOLDER_OLDER_ADULTS, 'older_adults.feather'))

data/older_adults/processed_data/with_self_report/001_merged_64hz_with_self_report.csv
data/older_adults/processed_data/with_self_report/002_merged_64hz_with_self_report.csv
data/older_adults/processed_data/with_self_report/003_merged_64hz_with_self_report.csv
data/older_adults/processed_data/with_self_report/004_merged_64hz_with_self_report.csv
data/older_adults/processed_data/with_self_report/005_merged_64hz_with_self_report.csv
data/older_adults/processed_data/with_self_report/006_merged_64hz_with_self_report.csv
data/older_adults/processed_data/with_self_report/007_merged_64hz_with_self_report.csv
