In [1]:
import os
import pandas as pd
import numpy as np
from data_processing import (
    load_sensor_data, 
    load_meta_data, 
    read_sensor_data, 
    read_meta_data, 
    add_diagnosis_to_sensor, 
    filter_and_label_diagnosis,
    add_feature_to_sensor,
    convert_yob_to_age,
    impute_age_by_patient_avg,
    binary_gender_encode,
    impute_updrs3_by_knn,
    split_dataset,
    normalize_data
)
from data_visuals import plot_sensor_signals

pd.set_option('display.max_columns', None)
pd.set_option('display.expand_frame_repr', False)

In [2]:
root_directory = "G:/My Drive/fog_dataset/"

### Loading Data

In [4]:
diagnosis_dir = os.path.join(root_directory, "diagnosis")

tasks = ["TUG", "2minwalk", "4x10mFastWithStop", "4x10mPrefWithoutStop", "4x10mSlowWithStop"]

if not os.path.exists(diagnosis_dir):
    os.makedirs(diagnosis_dir)
    print(f"Created directory: {diagnosis_dir}")

for task_name in tasks:
    print(f"\nProcessing task: {task_name}...")

    sensor_save_path = os.path.join(diagnosis_dir, f"sensor_{task_name}.csv")
    meta_save_path = os.path.join(diagnosis_dir, f"meta_{task_name}.csv")

    if os.path.exists(sensor_save_path) and os.path.exists(meta_save_path):
        print(f"Files for {task_name} already exist. Skipping...")
        continue  

    if not os.path.exists(sensor_save_path):
        try:
            sensor_df = load_sensor_data(root_directory, task_name)
            if not sensor_df.empty:
                sensor_df.to_csv(sensor_save_path, index=False)
            else:
                print(f"No sensor data found for {task_name}. Skipping...")
        except Exception as e:
            print(f"Error loading sensor data for {task_name}: {e}")

    if not os.path.exists(meta_save_path):
        try:
            meta_df = load_meta_data(root_directory, task_name)
            if not meta_df.empty:
                meta_df.to_csv(meta_save_path, index=False)
            else:
                print(f"No metadata found for {task_name}. Skipping...")
        except Exception as e:
            print(f"Error loading metadata for {task_name}: {e}")

print("\nAll tasks processed and saved!")


Created directory: G:/My Drive/fog_dataset/diagnosis

Processing task: TUG...


Loading Sensor Data for TUG: 100%|██████████| 122/122 [00:31<00:00,  3.86it/s]
Loading Metadata for TUG: 100%|██████████| 122/122 [00:05<00:00, 24.34it/s]



Processing task: 2minwalk...


Loading Sensor Data for 2minwalk: 100%|██████████| 122/122 [01:31<00:00,  1.34it/s]
Loading Metadata for 2minwalk: 100%|██████████| 122/122 [00:05<00:00, 23.60it/s]



Processing task: 4x10mFastWithStop...


Loading Sensor Data for 4x10mFastWithStop: 100%|██████████| 122/122 [00:39<00:00,  3.09it/s]
Loading Metadata for 4x10mFastWithStop: 100%|██████████| 122/122 [00:05<00:00, 22.40it/s]



Processing task: 4x10mPrefWithoutStop...


Loading Sensor Data for 4x10mPrefWithoutStop: 100%|██████████| 122/122 [00:45<00:00,  2.67it/s]
Loading Metadata for 4x10mPrefWithoutStop: 100%|██████████| 122/122 [00:05<00:00, 24.04it/s]



Processing task: 4x10mSlowWithStop...


Loading Sensor Data for 4x10mSlowWithStop: 100%|██████████| 122/122 [00:55<00:00,  2.18it/s]
Loading Metadata for 4x10mSlowWithStop: 100%|██████████| 122/122 [00:05<00:00, 24.36it/s]


All tasks processed and saved!





### Reading Data

In [3]:
sensor_tug = read_sensor_data(root_directory, "TUG")
meta_tug = read_meta_data(root_directory, "TUG")

sensor_2minwalk = read_sensor_data(root_directory, "2minwalk")
meta_2minwalk = read_meta_data(root_directory, "2minwalk")

sensor_slow = read_sensor_data(root_directory, "4x10mSlowWithStop")
meta_slow = read_meta_data(root_directory, "4x10mSlowWithStop")

sensor_pref = read_sensor_data(root_directory, "4x10mPrefWithoutStop")
meta_pref = read_meta_data(root_directory, "4x10mPrefWithoutStop")

sensor_fast = read_sensor_data(root_directory, "4x10mFastWithStop")
meta_fast = read_meta_data(root_directory, "4x10mFastWithStop")


Reading sensor data for TUG...
Reading metadata for TUG...
Reading sensor data for 2minwalk...
Reading metadata for 2minwalk...
Reading sensor data for 4x10mSlowWithStop...
Reading metadata for 4x10mSlowWithStop...
Reading sensor data for 4x10mPrefWithoutStop...
Reading metadata for 4x10mPrefWithoutStop...
Reading sensor data for 4x10mFastWithStop...
Reading metadata for 4x10mFastWithStop...


### Diagnosis Classification

#### Data Processing

In [4]:
# Add diagnosis to sensor data
sensor_tug = add_diagnosis_to_sensor(sensor_tug, meta_tug)
sensor_2minwalk = add_diagnosis_to_sensor(sensor_2minwalk, meta_2minwalk)
sensor_slow = add_diagnosis_to_sensor(sensor_slow, meta_slow)
sensor_pref = add_diagnosis_to_sensor(sensor_pref, meta_pref)
sensor_fast = add_diagnosis_to_sensor(sensor_fast, meta_fast)

In [5]:
diagnosis_to_keep = [
    "Parkinson's disease",
    "Control",
    "Parkinson's disease and dementia",
    "Parkinsonism unspecified",
    "Secondary parkinsonism: other"
]

label_1_diagnoses = [
    "Parkinson's disease",
    "Parkinson's disease and dementia",
    "Parkinsonism unspecified",
    "Secondary parkinsonism: other"
]

label_0_diagnoses = ["Control"]

# Apply filtering & mapping (Diagnosis column will now be 0 or 1)
sensor_tug = filter_and_label_diagnosis(sensor_tug, diagnosis_to_keep, label_1_diagnoses, label_0_diagnoses)
sensor_2minwalk = filter_and_label_diagnosis(sensor_2minwalk, diagnosis_to_keep, label_1_diagnoses, label_0_diagnoses)
sensor_slow = filter_and_label_diagnosis(sensor_slow, diagnosis_to_keep, label_1_diagnoses, label_0_diagnoses)
sensor_pref = filter_and_label_diagnosis(sensor_pref, diagnosis_to_keep, label_1_diagnoses, label_0_diagnoses)
sensor_fast = filter_and_label_diagnosis(sensor_fast, diagnosis_to_keep, label_1_diagnoses, label_0_diagnoses)

In [6]:
# Add Year of Birth (YOB) to sensor data 
sensor_tug = add_feature_to_sensor(sensor_tug, meta_tug, "YOB")
sensor_2minwalk = add_feature_to_sensor(sensor_2minwalk, meta_2minwalk, "YOB")
sensor_slow = add_feature_to_sensor(sensor_slow, meta_slow, "YOB")
sensor_pref = add_feature_to_sensor(sensor_pref, meta_pref, "YOB")
sensor_fast = add_feature_to_sensor(sensor_fast, meta_fast, "YOB")

# Convert YOB to Age
sensor_tug = convert_yob_to_age(sensor_tug)
sensor_2minwalk = convert_yob_to_age(sensor_2minwalk)
sensor_slow = convert_yob_to_age(sensor_slow)
sensor_pref = convert_yob_to_age(sensor_pref)
sensor_fast = convert_yob_to_age(sensor_fast)

# Impute missing Age values based on unique patient averages 
sensor_tug = impute_age_by_patient_avg(sensor_tug)
sensor_2minwalk = impute_age_by_patient_avg(sensor_2minwalk)
sensor_slow = impute_age_by_patient_avg(sensor_slow)
sensor_pref = impute_age_by_patient_avg(sensor_pref)
sensor_fast = impute_age_by_patient_avg(sensor_fast)

# Drop YOB
sensor_tug.drop(columns=["YOB"], inplace=True)
sensor_2minwalk.drop(columns=["YOB"], inplace=True)
sensor_slow.drop(columns=["YOB"], inplace=True)
sensor_pref.drop(columns=["YOB"], inplace=True)
sensor_fast.drop(columns=["YOB"], inplace=True)

In [7]:
# Add Gender to sensor data
sensor_tug = add_feature_to_sensor(sensor_tug, meta_tug, "Gender")
sensor_2minwalk = add_feature_to_sensor(sensor_2minwalk, meta_2minwalk, "Gender")
sensor_slow = add_feature_to_sensor(sensor_slow, meta_slow, "Gender")
sensor_pref = add_feature_to_sensor(sensor_pref, meta_pref, "Gender")
sensor_fast = add_feature_to_sensor(sensor_fast, meta_fast, "Gender")

# Convert Gender to binary 
sensor_tug = binary_gender_encode(sensor_tug)
sensor_2minwalk = binary_gender_encode(sensor_2minwalk)
sensor_slow = binary_gender_encode(sensor_slow)
sensor_pref = binary_gender_encode(sensor_pref)
sensor_fast = binary_gender_encode(sensor_fast)


In [8]:
# Add UPDRS3 to sensor data
sensor_tug = add_feature_to_sensor(sensor_tug, meta_tug, "MSDS-UPDRS part 3").rename(columns={"MSDS-UPDRS part 3": "UPDRS3"})
sensor_2minwalk = add_feature_to_sensor(sensor_2minwalk, meta_2minwalk, "MSDS-UPDRS part 3").rename(columns={"MSDS-UPDRS part 3": "UPDRS3"})
sensor_slow = add_feature_to_sensor(sensor_slow, meta_slow, "MSDS-UPDRS part 3").rename(columns={"MSDS-UPDRS part 3": "UPDRS3"})
sensor_pref = add_feature_to_sensor(sensor_pref, meta_pref, "MSDS-UPDRS part 3").rename(columns={"MSDS-UPDRS part 3": "UPDRS3"})
sensor_fast = add_feature_to_sensor(sensor_fast, meta_fast, "MSDS-UPDRS part 3").rename(columns={"MSDS-UPDRS part 3": "UPDRS3"})

# Impute missing UPDRS3 values 
sensor_tug = impute_updrs3_by_knn(sensor_tug)
sensor_2minwalk = impute_updrs3_by_knn(sensor_2minwalk)
sensor_slow = impute_updrs3_by_knn(sensor_slow)
sensor_pref = impute_updrs3_by_knn(sensor_pref)
sensor_fast = impute_updrs3_by_knn(sensor_fast)


#### Train Test Split

In [9]:
df_dict = {
    "TUG": sensor_tug,
    "2minwalk": sensor_2minwalk,
    "Slow": sensor_slow,
    "Pref": sensor_pref,
    "Fast": sensor_fast,
}

save_paths = {task: f"{root_directory}/diagnosis/df_{task}" for task in df_dict.keys()}

split_results = {}

# Perform train-validation-test split 
for task_name, df in df_dict.items():
    split_results[task_name] = split_dataset(df, dataset_name=task_name, save_path=save_paths[task_name])



Found existing split files for TUG. Loading data...
Dataset Split Summary for TUG:
  - Total Patients: 112 (Original)
  - Train Patients: 71
  - Validation Patients: 18
  - Test Patients: 23

Found existing split files for 2minwalk. Loading data...
Dataset Split Summary for 2minwalk:
  - Total Patients: 115 (Original)
  - Train Patients: 73
  - Validation Patients: 19
  - Test Patients: 23

Found existing split files for Slow. Loading data...
Dataset Split Summary for Slow:
  - Total Patients: 110 (Original)
  - Train Patients: 70
  - Validation Patients: 18
  - Test Patients: 22

Found existing split files for Pref. Loading data...
Dataset Split Summary for Pref:
  - Total Patients: 109 (Original)
  - Train Patients: 69
  - Validation Patients: 18
  - Test Patients: 22

Found existing split files for Fast. Loading data...
Dataset Split Summary for Fast:
  - Total Patients: 110 (Original)
  - Train Patients: 70
  - Validation Patients: 18
  - Test Patients: 22


#### Normalizaiton

In [10]:
sensor_columns = [
    'L_AccX_filt', 'L_AccY_filt', 'L_AccZ_filt',
    'L_GyrX_filt', 'L_GyrY_filt', 'L_GyrZ_filt',
    'R_AccX_filt', 'R_AccY_filt', 'R_AccZ_filt',
    'R_GyrX_filt', 'R_GyrY_filt', 'R_GyrZ_filt'
]

# Apply normalization for Train, Validation, and Test sets
for task_name, splits in split_results.items():
    split_results[task_name]["train"] = normalize_data(splits["train"], sensor_columns)
    split_results[task_name]["val"] = normalize_data(splits["val"], sensor_columns)
    split_results[task_name]["test"] = normalize_data(splits["test"], sensor_columns)
