In [1]:
import numpy as np
import pandas as pd
import os
from pathlib import Path
import json

In [2]:
basic_info_df = pd.read_csv('./raw_data/pads-parkinsons-disease-smartwatch-dataset-1.0.0/preprocessed/file_list.csv')

In [3]:
basic_info_df.head(5)

Unnamed: 0,resource_type,id,study_id,condition,disease_comment,age_at_diagnosis,age,height,weight,gender,handedness,appearance_in_kinship,appearance_in_first_grade_kinship,effect_of_alcohol_on_tremor,label
0,patient,1,PADS,Healthy,-,56,56,173,78,male,right,True,True,Unknown,0
1,patient,2,PADS,Other Movement Disorders,Left-Sided resting tremor and hypokinesia with...,69,81,193,104,male,right,False,,No effect,2
2,patient,3,PADS,Healthy,-,45,45,170,78,female,right,False,,Unknown,0
3,patient,4,PADS,Parkinson's,IPS akinetic-rigid type,63,67,161,90,female,right,False,,No effect,1
4,patient,5,PADS,Parkinson's,IPS tremordominant type,65,75,172,86,male,left,False,,Unknown,1


In [4]:
basic_info_df.shape

(469, 15)

In [5]:
json_files = list(Path('./raw_data/pads-parkinsons-disease-smartwatch-dataset-1.0.0/questionnaire/').glob("*.json"))
print(f"Found {len(json_files)} JSON files")

Found 469 JSON files


In [6]:
questionnaire_data = []

In [8]:
for json_file in json_files:
        try:
            with open(json_file, 'r', encoding='utf-8') as f:
                data = json.load(f)
            
            subject_id = data.get('subject_id')
            study_id = data.get('study_id')
            questionnaire_name = data.get('questionnaire_name')
            questionnaire_id = data.get('id')
            
            
            row_data = {
                'subject_id': subject_id,
                'study_id': study_id,
                'questionnaire_name': questionnaire_name,
                'questionnaire_id': questionnaire_id
            }
            
           
            items = data.get('item', [])
            for item in items:
                link_id = item.get('link_id')
                answer = item.get('answer')
                
                
                row_data[link_id] = answer
                
            
            questionnaire_data.append(row_data)
            
        except json.JSONDecodeError as e:
            print(f"JSON parsing error in {json_file}: {e}")
        except Exception as e:
            print(f"Error processing {json_file}: {e}")

In [9]:
questionnaire_data

[{'subject_id': '280',
  'study_id': 'PADS',
  'questionnaire_name': 'NMS',
  'questionnaire_id': 'Non-motor Symptoms',
  '01': False,
  '02': False,
  '03': True,
  '04': False,
  '05': True,
  '06': False,
  '07': True,
  '08': True,
  '09': False,
  '10': True,
  '11': False,
  '12': False,
  '13': False,
  '14': False,
  '15': False,
  '16': False,
  '17': False,
  '18': False,
  '19': False,
  '20': False,
  '21': False,
  '22': False,
  '23': False,
  '24': True,
  '25': False,
  '26': False,
  '27': False,
  '28': True,
  '29': False,
  '30': False},
 {'subject_id': '146',
  'study_id': 'PADS',
  'questionnaire_name': 'NMS',
  'questionnaire_id': 'Non-motor Symptoms',
  '01': False,
  '02': True,
  '03': False,
  '04': False,
  '05': False,
  '06': False,
  '07': False,
  '08': True,
  '09': True,
  '10': False,
  '11': False,
  '12': True,
  '13': False,
  '14': False,
  '15': False,
  '16': False,
  '17': False,
  '18': True,
  '19': True,
  '20': False,
  '21': False,
  '22':

In [10]:
df = pd.DataFrame(questionnaire_data)

In [11]:
df

Unnamed: 0,subject_id,study_id,questionnaire_name,questionnaire_id,01,02,03,04,05,06,...,21,22,23,24,25,26,27,28,29,30
0,280,PADS,NMS,Non-motor Symptoms,False,False,True,False,True,False,...,False,False,False,True,False,False,False,True,False,False
1,146,PADS,NMS,Non-motor Symptoms,False,True,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
2,453,PADS,NMS,Non-motor Symptoms,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
3,003,PADS,NMS,Non-motor Symptoms,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
4,392,PADS,NMS,Non-motor Symptoms,True,False,True,False,False,False,...,True,False,False,False,False,True,True,True,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
464,169,PADS,NMS,Non-motor Symptoms,False,False,False,False,True,False,...,False,True,False,True,False,True,True,True,False,False
465,186,PADS,NMS,Non-motor Symptoms,True,False,True,True,True,False,...,True,True,False,True,False,False,True,True,True,True
466,352,PADS,NMS,Non-motor Symptoms,False,True,True,False,False,False,...,False,False,True,True,False,True,True,False,False,False
467,217,PADS,NMS,Non-motor Symptoms,True,False,False,False,False,False,...,False,False,False,False,False,False,True,False,False,False


In [13]:
basic_info_df.columns.tolist()

['resource_type',
 'id',
 'study_id',
 'condition',
 'disease_comment',
 'age_at_diagnosis',
 'age',
 'height',
 'weight',
 'gender',
 'handedness',
 'appearance_in_kinship',
 'appearance_in_first_grade_kinship',
 'effect_of_alcohol_on_tremor',
 'label']

In [12]:
df['subject_id'] = df['subject_id'].astype(int)

In [14]:
df

Unnamed: 0,subject_id,study_id,questionnaire_name,questionnaire_id,01,02,03,04,05,06,...,21,22,23,24,25,26,27,28,29,30
0,280,PADS,NMS,Non-motor Symptoms,False,False,True,False,True,False,...,False,False,False,True,False,False,False,True,False,False
1,146,PADS,NMS,Non-motor Symptoms,False,True,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
2,453,PADS,NMS,Non-motor Symptoms,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
3,3,PADS,NMS,Non-motor Symptoms,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
4,392,PADS,NMS,Non-motor Symptoms,True,False,True,False,False,False,...,True,False,False,False,False,True,True,True,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
464,169,PADS,NMS,Non-motor Symptoms,False,False,False,False,True,False,...,False,True,False,True,False,True,True,True,False,False
465,186,PADS,NMS,Non-motor Symptoms,True,False,True,True,True,False,...,True,True,False,True,False,False,True,True,True,True
466,352,PADS,NMS,Non-motor Symptoms,False,True,True,False,False,False,...,False,False,True,True,False,True,True,False,False,False
467,217,PADS,NMS,Non-motor Symptoms,True,False,False,False,False,False,...,False,False,False,False,False,False,True,False,False,False


In [15]:
basic_info_df

Unnamed: 0,resource_type,id,study_id,condition,disease_comment,age_at_diagnosis,age,height,weight,gender,handedness,appearance_in_kinship,appearance_in_first_grade_kinship,effect_of_alcohol_on_tremor,label
0,patient,1,PADS,Healthy,-,56,56,173,78,male,right,True,True,Unknown,0
1,patient,2,PADS,Other Movement Disorders,Left-Sided resting tremor and hypokinesia with...,69,81,193,104,male,right,False,,No effect,2
2,patient,3,PADS,Healthy,-,45,45,170,78,female,right,False,,Unknown,0
3,patient,4,PADS,Parkinson's,IPS akinetic-rigid type,63,67,161,90,female,right,False,,No effect,1
4,patient,5,PADS,Parkinson's,IPS tremordominant type,65,75,172,86,male,left,False,,Unknown,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
464,patient,465,PADS,Parkinson's,IPS mixed type,62,65,175,80,male,right,True,False,No effect,1
465,patient,466,PADS,Healthy,-,84,84,172,74,female,right,True,True,No effect,0
466,patient,467,PADS,Parkinson's,"Essential Tremor, starting IPS tremordominant ...",55,57,190,100,male,right,False,,Improvement,1
467,patient,468,PADS,Parkinson's,IPS mixed type,73,76,198,118,male,right,False,,No effect,1


In [16]:
basic_info_df['id'] = basic_info_df['id'].astype(int)

In [20]:
merged_df = pd.merge(df, basic_info_df, left_on='subject_id', right_on='id', how='inner')

In [18]:
basic_info_df.dtypes

resource_type                        object
id                                    int64
study_id                             object
condition                            object
disease_comment                      object
age_at_diagnosis                      int64
age                                   int64
height                                int64
weight                                int64
gender                               object
handedness                           object
appearance_in_kinship                  bool
appearance_in_first_grade_kinship    object
effect_of_alcohol_on_tremor          object
label                                 int64
dtype: object

In [19]:
df.dtypes

subject_id             int64
study_id              object
questionnaire_name    object
questionnaire_id      object
01                      bool
02                      bool
03                      bool
04                      bool
05                      bool
06                      bool
07                      bool
08                      bool
09                      bool
10                      bool
11                      bool
12                      bool
13                      bool
14                      bool
15                      bool
16                      bool
17                      bool
18                      bool
19                      bool
20                      bool
21                      bool
22                      bool
23                      bool
24                      bool
25                      bool
26                      bool
27                      bool
28                      bool
29                      bool
30                      bool
dtype: object

In [21]:
merged_df

Unnamed: 0,subject_id,study_id_x,questionnaire_name,questionnaire_id,01,02,03,04,05,06,...,age_at_diagnosis,age,height,weight,gender,handedness,appearance_in_kinship,appearance_in_first_grade_kinship,effect_of_alcohol_on_tremor,label
0,280,PADS,NMS,Non-motor Symptoms,False,False,True,False,True,False,...,73,77,162,65,female,right,False,,Unknown,2
1,146,PADS,NMS,Non-motor Symptoms,False,True,False,False,False,False,...,68,71,186,89,male,right,True,False,Improvement,1
2,453,PADS,NMS,Non-motor Symptoms,False,False,False,False,False,False,...,0,48,164,59,female,right,False,False,Unknown,2
3,3,PADS,NMS,Non-motor Symptoms,False,False,False,False,False,False,...,45,45,170,78,female,right,False,,Unknown,0
4,392,PADS,NMS,Non-motor Symptoms,True,False,True,False,False,False,...,48,55,165,64,female,right,False,,Improvement,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
464,169,PADS,NMS,Non-motor Symptoms,False,False,False,False,True,False,...,51,56,180,82,female,right,True,True,Unknown,1
465,186,PADS,NMS,Non-motor Symptoms,True,False,True,True,True,False,...,60,65,160,73,female,right,False,,No effect,1
466,352,PADS,NMS,Non-motor Symptoms,False,True,True,False,False,False,...,45,55,172,80,female,right,True,False,No effect,1
467,217,PADS,NMS,Non-motor Symptoms,True,False,False,False,False,False,...,65,73,182,85,male,right,False,,No effect,1


In [22]:
merged_df.to_csv('merged_df.csv')