In [32]:
from pathlib import Path
from datetime import date, timedelta
import re

from mri_data import file_manager as fm 

In [2]:
dataroot = Path("/mnt/h/3Tpioneer_bids")
dataset = fm.scan_3Tpioneer_bids(dataroot)

In [5]:
subjects = sorted(list(set([s.subid for s in dataset])))

(Scan(subid='1001', sesid='20170215', _dataroot=PosixPath('/mnt/h/3Tpioneer_bids'), _root=PosixPath('/mnt/h/3Tpioneer_bids/sub-ms1001/ses-20170215'), image=None, label=None, cond=None, id=20190385215),
 Scan(subid='1001', sesid='20180323', _dataroot=PosixPath('/mnt/h/3Tpioneer_bids'), _root=PosixPath('/mnt/h/3Tpioneer_bids/sub-ms1001/ses-20180323'), image=None, label=None, cond=None, id=20200503323),
 Scan(subid='1001', sesid='20200523', _dataroot=PosixPath('/mnt/h/3Tpioneer_bids'), _root=PosixPath('/mnt/h/3Tpioneer_bids/sub-ms1001/ses-20200523'), image=None, label=None, cond=None, id=20220723523))

In [20]:
def sesid_to_date(sesid):
    sesid_match = re.match(r"(?P<yyyy>\d{4})(?P<mm>\d{2})(?P<dd>\d{2})", sesid)
    return date(int(sesid_match['yyyy']), int(sesid_match['mm']), int(sesid_match['dd']))

In [49]:
nscans = [len(dataset.find_scan(sub)) for sub in subjects]
avg_nscan = sum(nscans) / len(nscans)
max_nscans = max(nscans)
print(avg_nscan)
print(max_nscans)

2.786086956521739
10


In [48]:
lon_interval = []
for sub in subjects:
    scans = dataset.find_scan(sub)
    lon_interval.append(sesid_to_date(scans[-1].sesid) - sesid_to_date(scans[0].sesid))

lon_interval = [interval.days for interval in lon_interval if interval != timedelta(0)]
avg_interval = sum(lon_interval) / len(lon_interval) / 365
max_interval = max(lon_interval) / 365

In [70]:
all_sesids = [s.sesid for s in dataset]
all_dates = [sesid_to_date(sesid) for sesid in all_sesids]

print(all_dates[0])
print(all_dates[1])
print(all_dates[0] > all_dates[1])
print(min(all_dates))

2017-02-15
2018-03-23
False
2016-09-02


## Clinical Characteristics

In [50]:
import pandas as pd

In [52]:
secure_data_dir = Path("/mnt/h/Secure_Data")
df = pd.read_csv(secure_data_dir / "Large" / "Clinical_Data_All.csv")

In [58]:
df_rrms = df.loc[df['ms_type'] == "RRMS"]
df_spms = df.loc[df['ms_type'] == "SPMS"]
df_ppms = df.loc[df['ms_type'] == "PPMS"]

ms_tot = len(df_rrms) + len(df_ppms) + len(df_spms)
print("n MS: ", ms_tot)
print("n RRMS: ", len(df_rrms))
print("n SPMS: ", len(df_spms))
print("n PPMS: ", len(df_ppms))

n MS:  448
n RRMS:  361
n SPMS:  52
n PPMS:  35


In [65]:
df_ms = pd.concat([df_rrms, df_spms, df_ppms])
df_ms['dzdur'] = df_ms['dzdur'].map(float)

In [71]:
print(df_ms['dzdur'].mean())
print(df_ms['dzdur'].min())
print(df_ms['dzdur'].max())

12.287590151089287
-2.077971856
54.88692986


In [67]:
sum(df['sex'] == "Female") / len(df)

0.7890070921985816