# Task1

In [1]:
import dropbox
import argparse
import pandas as pd
import numpy as np
from io import StringIO
from datetime import date, datetime, timedelta
import random

In [3]:
TOKEN = 'removed for privacy'
dbx = dropbox.Dropbox(TOKEN)
#download file
md, res = dbx.files_download('/recruitment_project_2/enroll_data.csv')

In [4]:
#read byte to string
s = str(res.content, 'utf-8')
data = StringIO(s)
#load string to dataframe
df = pd.read_csv(data)

In [5]:
df.head()

Unnamed: 0,site ID,date of consent,cohort,birth date
0,BWH,1/1/2020,CHR,1990-01-01
1,BWH,1/2/2020,CHR,1989-01-02
2,BWH,1/2/2020,HC,1998-01-03
3,BWH,1/2/2020,HC,1987-01-04
4,BWH,1/2/2020,CHR,1986-01-05


In [6]:
# find the ages of participant at date of consent

# transfer string columns (birthday and DoC) to datetime
birthd = pd.to_datetime(df['birth date'], format = '%Y-%m-%d')
constd = pd.to_datetime(df['date of consent'], format = '%m/%d/%Y')

# age in number of days at consent
age_in_days = (constd - birthd).dt.days
# age in years (i.e. 36.5 years old -> 36 years old)
df['age'] = np.floor(age_in_days / 365).astype(int)

df = df.drop('birth date', axis = 1)

In [7]:
# disguise date of conset

today = datetime.today()
strt_day = pd.to_datetime('1925-01-01')

# diff in days between 1925-01-01 and today
diff_days = (today - strt_day).days

# randomize num of days offset
days_offset = []
for i in range(0, df.shape[0]):
    days_offset += [random.randint(0, 30000) + diff_days]
    
# store days offset as dataframe
offset_df = pd.DataFrame(days_offset, columns = ['days_offset'])

# create a new column for DoC by finding number of days (offset) before today
df['date of consent'] = offset_df.days_offset.apply(lambda x: today - timedelta(x))
df['date of consent'] = pd.to_datetime(df['date of consent']).dt.strftime('%-m/%-d/%Y')

In [8]:
df

Unnamed: 0,site ID,date of consent,cohort,age
0,BWH,9/28/1865,CHR,30
1,BWH,10/9/1897,CHR,31
2,BWH,10/15/1894,HC,22
3,BWH,1/19/1886,HC,33
4,BWH,11/28/1908,CHR,34
...,...,...,...,...
7941,PNC,10/16/1844,CHR,21
7942,PNC,11/18/1888,HC,32
7943,PNC,9/30/1881,CHR,33
7944,PNC,1/16/1887,CHR,24


In [9]:
offset_df

Unnamed: 0,days_offset
0,57337
1,45638
2,46728
3,49919
4,41571
...,...
7941,64989
7942,48885
7943,51491
7944,49557


In [16]:
df.to_csv('output_files/enroll_data_anon_YT.csv', index = False)

#try uploading to temp folder in dropbox
with open('output_files/enroll_data_anon_YT.csv', 'rb') as f:    
    meta = dbx.files_upload(f.read(), '/temp/a.csv', mode=dropbox.files.WriteMode("overwrite"))

# Task 2

In [1]:
import pandas as pd
import nibabel as nib
import numpy as np
import re

In [15]:
# load and read registered data
reg = nib.load('brain_data/registered_Warped.nii.gz')
reg_data = reg.get_fdata()

In [16]:
reg_data.shape

(182, 218, 182)

In [17]:
#load and read label data
label = nib.load('brain_data/atlas-integer-labels.nii.gz')
label_data = label.get_fdata()

In [18]:
label_data.shape

(182, 218, 182)

In [19]:
# find unique labels
values, counts = np.unique(label_data, return_counts=True)

In [23]:
# for each label, binarize the atlas in label region and multiply to registered image
# save label-volume combinations to a dataframe
brain_v = []
for i in values[1:]:
    binary_label = label_data == i
    volume = np.count_nonzero(binary_label * reg_data)
    brain_v += [[i, volume]]

In [24]:
brain_v_df = pd.DataFrame(brain_v, columns = ['label', 'volume'])

In [25]:
brain_v_df.head()

Unnamed: 0,label,volume
0,1.0,15644
1,2.0,1500
2,3.0,8851
3,4.0,13711
4,5.0,12729


In [26]:
# open the raw FreeSurfer text downloaded directly from the web
# extract only lines start with numbers 
with open('FS_Raw.txt', 'r') as f1:
    with open('FS.txt', 'w') as f2:
        for line in f1:
            if re.match(r'^\d+', line):
                f2.write(line)

In [27]:
# read the cleaned txt into dataframe
fs_df = pd.read_csv('FS.txt', delim_whitespace=True, header = None)
# keep first two columns
brain_label = fs_df.iloc[1:, 0:2].set_axis(['label', 'name'], axis = 1)
brain_label.head()

Unnamed: 0,label,name
1,1,Left-Cerebral-Exterior
2,2,Left-Cerebral-White-Matter
3,3,Left-Cerebral-Cortex
4,4,Left-Lateral-Ventricle
5,5,Left-Inf-Lat-Vent


In [28]:
#join free surfer dataframe with the brain volume dataframe on label
result = pd.merge(brain_label, brain_v_df, how="right", on=['label'])

In [29]:
result.head()

Unnamed: 0,label,name,volume
0,1,Left-Cerebral-Exterior,15644
1,2,Left-Cerebral-White-Matter,1500
2,3,Left-Cerebral-Cortex,8851
3,4,Left-Lateral-Ventricle,13711
4,5,Left-Inf-Lat-Vent,12729


In [30]:
result.to_csv('brain_region_volume_YT.csv', index = False)