In [57]:
# ADHD Classification Based on HRV
# Purpose: Determine whether heart rate variability can predict diagnosis of ADHD
# Author: Alexander Maksiaev

In [58]:
# Housekeeping

import pandas as pd
import numpy as np
import sklearn 
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
import glob
import os
import csv 
from datetime import datetime
from sktime.classification.feature_based import RandomIntervalClassifier
from sktime.transformations.panel.padder import PaddingTransformer
import sktime.datasets


# Directories

adhd_dir_raw = "C:/Users\maksi\Documents\Statistics\Projects\Movement_Mental_Health\hyperaktiv\hyperaktiv_with_controls\hyperaktiv_with_controls"
adhd_dir = adhd_dir_raw.replace("\\", "/") # Not gonna change all that manually 
activity_dir = os.path.join(adhd_dir, "activity_data")
hrv_dir = os.path.join(adhd_dir, "hrv_data")
output_dir = os.path.join(adhd_dir, "pt_features") 
# controls_dir = os.path.join(adhd_dir, "hyperaktiv_with_controls/hyperaktiv_with_controls/")
# activity_dir_controls = os.path.join(controls_dir, "activity_data/")

os.chdir(adhd_dir)

# Participant/patient info -- in particular, ADHD diagnosis status 
patient_info = pd.read_csv("patient_info.csv", delimiter=";")
patient_info['ID'] = patient_info['ID'].astype("string").str.zfill(2)

patient_info

# os.chdir(hrv_dir)
# hrv_data = pd.read_csv("patient_hr_1.csv", delimiter=";")
# hrv_data

Unnamed: 0,ID,SEX,AGE,ACC,ACC_TIME,ACC_DAYS,HRV,HRV_TIME,HRV_HOURS,CPT_II,...,HADS_D,MED,MED_Antidepr,MED_Moodstab,MED_Antipsych,MED_Anxiety_Benzo,MED_Sleep,MED_Analgesics_Opioids,MED_Stimulants,filter_$
0,01,0,3,1,16:00:00,6,1,11:00:00,21.0,0,...,2.0,1.0,1.0,,,,,,,1.0
1,02,0,4,1,10:54:00,6.8,0,,,1,...,7.0,0.0,,,,,,,,1.0
2,03,1,2,1,15:28:00,7.2,1,15:25:00,21.0,1,...,0.0,0.0,,,,,,,,1.0
3,04,1,3,0,,,1,16:55:00,22.0,1,...,6.0,1.0,1.0,,,1.0,,1.0,,0.0
4,05,1,1,1,14:24:00,5.9,1,16:00:00,12.0,1,...,5.0,0.0,,,,,,,,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
129,236,1,2,1,9:30:00,133,0,,,0,...,,,,,,,,,,
130,237,0,1,1,15:00:00,14,0,,,0,...,,,,,,,,,,
131,238,0,4,1,9:00:00,146,0,,,0,...,,,,,,,,,,
132,239,0,4,1,9:00:00,143,0,,,0,...,,,,,,,,,,


In [59]:
# First, activity data feature extraction using sktime 

# Authors' function to read in activity file
# Hicks et al. 2021
# HYPERAKTIV: An Activity Dataset from Patients with Attention-Deficit/Hyperactivity Disorder (ADHD)
def read_activity_file(filepath, patient_id):
    data = [ ]
    with open(filepath) as f:
        csv_reader = csv.reader(f, delimiter=";")
        next(csv_reader)
        for line in csv_reader:
            data.append([ datetime.strptime(line[0], "%m-%d-%Y %H:%M").timestamp(), int(line[1].split(" ")[0])])
    data = pd.DataFrame(data, columns=["TIME", "ACC"])
    data["ID"] = patient_id
    return data

# Creating data frames for classification 
all_participants = [] # List of dataframes (each dataframe is a different participant)
pt_ids = [] # List of participant IDs from raw data
for filepath in glob.glob(os.path.join(activity_dir, "*.csv")):
    print("Reading %s" % filepath)
    patient_id_raw = os.path.splitext(os.path.basename(filepath))[0]
    patient_id = patient_id_raw.split("_")[-1] # Find ID
    
    data = read_activity_file(filepath, patient_id)
    pt_ids.append(patient_id) # These will be used to filter patient_info later

    all_participants.append(data)

no_id_pts = []
for df in all_participants:
    no_id_pts.append(df[["TIME", "ACC"]]) # Remove "categorical" ID data

# Don't include participants in patient_info who weren't in the files
pt_info_acc = patient_info[patient_info.ID.isin(pt_ids)]

# convert dataframe list into acceptable format
all_participants_3D = sktime.datatypes.convert_to(all_participants, to_type="pd-multiindex")

# Split data
X_train, X_test, y_train, y_test = train_test_split(no_id_pts, pt_info_acc["ADHD"], test_size=0.3, random_state=42)

# Find biggest length of data for padding 
lengths = []
for pt in no_id_pts:
    lengths.append(len(pt))

### THIS WORKS DON'T MESS WITH THIS LINE ###
# padded_clf = PaddingTransformer(pad_length=max(lengths)) * RandomIntervalClassifier(n_intervals=5)
padded_clf = PaddingTransformer(pad_length=max(lengths)) * RandomForestClassifier(n_estimators=100)
padded_clf.fit(X_train, y_train)

# Model performance

y_pred = padded_clf.predict(X_test)

report = sklearn.metrics.classification_report(y_test, y_pred)

print(report)

Reading C:/Users/maksi/Documents/Statistics/Projects/Movement_Mental_Health/hyperaktiv/hyperaktiv_with_controls/hyperaktiv_with_controls\activity_data\patient_activity_01.csv
Reading C:/Users/maksi/Documents/Statistics/Projects/Movement_Mental_Health/hyperaktiv/hyperaktiv_with_controls/hyperaktiv_with_controls\activity_data\patient_activity_02.csv
Reading C:/Users/maksi/Documents/Statistics/Projects/Movement_Mental_Health/hyperaktiv/hyperaktiv_with_controls/hyperaktiv_with_controls\activity_data\patient_activity_03.csv
Reading C:/Users/maksi/Documents/Statistics/Projects/Movement_Mental_Health/hyperaktiv/hyperaktiv_with_controls/hyperaktiv_with_controls\activity_data\patient_activity_05.csv
Reading C:/Users/maksi/Documents/Statistics/Projects/Movement_Mental_Health/hyperaktiv/hyperaktiv_with_controls/hyperaktiv_with_controls\activity_data\patient_activity_07.csv
Reading C:/Users/maksi/Documents/Statistics/Projects/Movement_Mental_Health/hyperaktiv/hyperaktiv_with_controls/hyperaktiv_wi