In [1]:
import pandas as pd
import numpy as np

# disable warnings
import warnings
warnings.filterwarnings('ignore')

csv_file = '../ukb52305.csv'
data = pd.read_csv(csv_file, nrows=100)

In [2]:
# 20149 -> Interval between previous point and current one in numeric path, array with length 25
# 20155 -> Interval between previous point and current one in alphanumeric path, array with length 25
# 20147 -> 	Errors before selecting correct item in numeric path (trail #1), array with length 25
# 20148 -> 	Errors before selecting correct item in alphanumeric path (trail #2), array with length 25
# are also available, they are in the form of an array with length 24
# 20246 -> trail making completion status
# 20136 -> when trail making test completed
trail_making = [
    '20156-0.0', # sum of 20149
    '20157-0.0', # sum of 20155
    '20247-0.0', # total error traversing numerical path, only participants with error > 0 has value
    '20248-0.0'  # total error traversing alphanumeric path, only participants with error > 0 has value
]

In [3]:
cols = ['20156', '20157', '20247', '20248']
to_retrieve = ['eid']
for col in cols:
    for data_col in data.columns:
        if col in data_col:
            to_retrieve.append(data_col)

print(f"Found {len(to_retrieve)} columns to be retrieved: {to_retrieve}")

trail_making_df = pd.read_csv(csv_file, usecols=to_retrieve)

Found 5 columns to be retrieved: ['eid', '20156-0.0', '20157-0.0', '20247-0.0', '20248-0.0']


In [4]:
# drop those who did not take the trail making test 
# only keep those with both 20156 and 20157
data = trail_making_df[~(trail_making_df['20156-0.0'].isnull() & trail_making_df['20157-0.0'].isnull())]

# N.A errors mean the person made 0 mistakes during the test
data = data.fillna(0)

In [5]:
data

Unnamed: 0,eid,20156-0.0,20157-0.0,20247-0.0,20248-0.0
9,1000085,43.202,64.591,0.0,0.0
15,1000158,48.845,96.175,3.0,5.0
25,1000251,23.656,36.673,0.0,0.0
27,1000279,25.344,51.384,0.0,1.0
29,1000295,33.183,52.936,0.0,0.0
...,...,...,...,...,...
502376,6024594,48.132,59.984,3.0,0.0
502385,6024683,53.382,57.337,5.0,0.0
502395,6024781,24.867,44.819,0.0,0.0
502399,6024822,47.510,71.331,0.0,0.0


In [6]:
data.to_csv('../data/trail_making.csv', index=False)