In [1]:
# ADHD Classification Based on Activity
# Purpose: Determine whether activity can predict diagnosis of ADHD
# Author: Alexander Maksiaev

In [54]:
# Housekeeping

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import os

# Directories
main_dir = "c:/Users/maksi/Documents/Statistics/Projects/Movement_Mental_Health/"

adhd_dir = os.path.join(main_dir, "hyperaktiv/")
activity_dir = os.path.join(adhd_dir, "activity_data/")
hrv_dir = os.path.join(adhd_dir, "hrv_data/")
controls_dir = os.path.join(adhd_dir, "hyperaktiv_with_controls/hyperaktiv_with_controls/")

os.chdir(adhd_dir)

# Main files
patient_info = pd.read_csv("patient_info.csv", delimiter=";")
features = pd.read_csv("features.csv", delimiter=";")
patient_info
# print(features)

os.chdir(activity_dir)
activity_data = pd.read_csv("patient_activity_01.csv", delimiter=";")
activity_data

# os.chdir(hrv_dir)
# hrv_data = pd.read_csv("patient_hr_1.csv", delimiter=";")
# hrv_data

Unnamed: 0,TIMESTAMP,ACTIVITY
0,02-23-2009 16:00,0
1,02-23-2009 16:01,195
2,02-23-2009 16:02,240
3,02-23-2009 16:03,209
4,02-23-2009 16:04,202
...,...,...
8631,03-01-2009 15:51,0
8632,03-01-2009 15:52,0
8633,03-01-2009 15:53,0
8634,03-01-2009 15:54,131


In [53]:
# Data initialization

os.chdir(controls_dir)

controls_info = pd.read_csv("patient_info.csv", delimiter=";") # Includes patients as well
# print(controls_info)

adhd_patients = controls_info[controls_info["ADHD"] == 1.0] # ADHD only
# print(adhd_patients)
# print(controls_info.columns)

controls = controls_info[np.sum(controls_info.loc[:, "ADHD":"OTHER"], axis=1) == 0] # Neurotypicals only
non_adhd_controls = controls_info[controls_info["ADHD"] == 0] # Non-ADHD only
# print(controls)
# print(non_adhd_controls)


      ID  SEX  AGE  ACC  ACC_TIME ACC_DAYS  HRV  HRV_TIME  HRV_HOURS  CPT_II  \
0      1    0    3    1  16:00:00        6    1  11:00:00       21.0       0   
1      2    0    4    1  10:54:00      6.8    0       NaN        NaN       1   
2      3    1    2    1  15:28:00      7.2    1  15:25:00       21.0       1   
3      4    1    3    0       NaN      NaN    1  16:55:00       22.0       1   
4      5    1    1    1  14:24:00      5.9    1  16:00:00       12.0       1   
..   ...  ...  ...  ...       ...      ...  ...       ...        ...     ...   
129  236    1    2    1   9:30:00     13,3    0       NaN        NaN       0   
130  237    0    1    1  15:00:00       14    0       NaN        NaN       0   
131  238    0    4    1   9:00:00     14,6    0       NaN        NaN       0   
132  239    0    4    1   9:00:00     14,3    0       NaN        NaN       0   
133  240    1    1    1   9:00:00     14,6    0       NaN        NaN       0   

     ...  HADS_D  MED  MED_Antidepr  ME

In [55]:
# Pre-processing: Activity

# Pts with motor activity recordings
activity_data = controls_info[controls_info["ACC"] == 1]
activity_data

# Each pt has their own csv file...


Unnamed: 0,ID,SEX,AGE,ACC,ACC_TIME,ACC_DAYS,HRV,HRV_TIME,HRV_HOURS,CPT_II,...,HADS_D,MED,MED_Antidepr,MED_Moodstab,MED_Antipsych,MED_Anxiety_Benzo,MED_Sleep,MED_Analgesics_Opioids,MED_Stimulants,filter_$
0,1,0,3,1,16:00:00,6,1,11:00:00,21.0,0,...,2.0,1.0,1.0,,,,,,,1.0
1,2,0,4,1,10:54:00,6.8,0,,,1,...,7.0,0.0,,,,,,,,1.0
2,3,1,2,1,15:28:00,7.2,1,15:25:00,21.0,1,...,0.0,0.0,,,,,,,,1.0
4,5,1,1,1,14:24:00,5.9,1,16:00:00,12.0,1,...,5.0,0.0,,,,,,,,1.0
5,7,0,3,1,14:30:00,8,1,10:20:00,21.0,1,...,0.0,0.0,,,,,,,,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
129,236,1,2,1,9:30:00,133,0,,,0,...,,,,,,,,,,
130,237,0,1,1,15:00:00,14,0,,,0,...,,,,,,,,,,
131,238,0,4,1,9:00:00,146,0,,,0,...,,,,,,,,,,
132,239,0,4,1,9:00:00,143,0,,,0,...,,,,,,,,,,


In [52]:
# Splitting into training set and testing set

# Don't let the machine know which ones are ADHD and which ones aren't
X = controls_info.drop("ADHD", axis=1)
y = controls_info["ADHD"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, stratify = y, random_state=2025)
