In [1]:
import pandas as pd

In [88]:
import numpy as np

In [135]:
base_imaging_data = pd.read_csv('imaging_data.csv')
st_olav_36 = pd.read_csv('st_olav_36.csv')
freesurfer = pd.read_csv('freesurfer_and_bianca_results.csv')
dwi_segmentation = pd.read_csv('DWI_segmentations.csv')

In [48]:
st_olav_36.head()

Unnamed: 0.1,Unnamed: 0,subject-id,stolav_36,timepoint,Type of imaging
0,0,sub-400639,True,36-mon-follow-up,MR
1,1,sub-400460,True,36-mon-follow-up,MR
2,2,sub-400778,True,36-mon-follow-up,MR
3,3,sub-400698,True,36-mon-follow-up,MR
4,4,sub-400175,True,36-mon-follow-up,MR


In [47]:
st_olav_36['timepoint'] = '36-mon-follow-up'
st_olav_36['Type of imaging'] = 'MR'

In [9]:
def covert_modality(modality):
    if not 'CT' in modality:
        return 'MR'
    else:
        return 'CT'

In [49]:
st_36 = st_olav_36[['subject-id', 'timepoint', 'Type of imaging']]

In [35]:
def define_hosp(subject_id):
    match subject_id[:7]:
        case 'sub-100':
            return 'Ullevål'
        case 'sub-221':
            return 'Bærum'
        case 'sub-400':
            return 'St. Olav'
        case 'sub-300':
            return 'Haukeland'
        case 'sub-410':
            return 'Ålesund'

In [52]:
merged_imaging_data = pd.concat([base_imaging_data, st_36])

In [148]:
base_imaging_data['Hospital'] = base_imaging_data['subject-id'].apply(define_hosp)

In [28]:
base_imaging_data['Type of imaging'] = base_imaging_data['modality'].apply(covert_modality)

In [41]:
base_imaging_data.loc[base_imaging_data['timepoint'] == '36-mon-follow-up', ['subject-id','timepoint','Hospital']]['Hospital'].unique()

array(['Bærum'], dtype=object)

In [39]:
base_imaging_data['Hospital'].unique()

array(['Ullevål', 'St. Olav', 'Bærum', 'Haukeland', None, 'Ålesund'],
      dtype=object)

In [53]:
freesurfer.head()

Unnamed: 0.1,Unnamed: 0,pno,time,hosp,wmh_vol_bianca_ml_t0,freesurfer_t0,freesurfer_t2,freesurfer_t3,subject-id,hosp-id,timepoint
0,0,1,0,St. Olavs Hospital,,,,,sub-400001,sub-400,ses-study-MR
1,1,1,1,St. Olavs Hospital,,,,,sub-400001,sub-400,3-mon-follow-up
2,2,1,2,St. Olavs Hospital,,,,,sub-400001,sub-400,18-mon-follow-up
3,3,2,0,St. Olavs Hospital,,,,,sub-400002,sub-400,ses-study-MR
4,4,2,1,St. Olavs Hospital,,,,,sub-400002,sub-400,3-mon-follow-up


In [54]:
free_df = freesurfer[['subject-id', 'timepoint', 'wmh_vol_bianca_ml_t0', 'freesurfer_t0', 'freesurfer_t2','freesurfer_t3']]

In [71]:
free_df.to_csv('fresesurfer_sorted.csv')

In [72]:
postprocess = pd.melt(free_df, id_vars=['subject-id', 'timepoint'], value_vars=['wmh_vol_bianca_ml_t0', 'freesurfer_t0', 'freesurfer_t2','freesurfer_t3'], var_name='Postprocessing type', value_name='Postprocessing done')

In [73]:
postprocess['Postprocessing'] = postprocess.loc[postprocess['Postprocessing done'].notna(), 'Postprocessing type']

In [109]:
def correct_timepoint(row):
    if not pd.isnull(row['Postprocessing']):
        if 't0' in row['Postprocessing']:
            return 'ses-study-MR'
        elif 't2' in row['Postprocessing']:
            return '18-mon-follow-up'
        elif 't3' in row['Postprocessing']:
            return '36-mon-follow-up'

In [113]:
postprocess.loc[postprocess['Postprocessing'].notna(), ['Postprocessing', 'timepoint']]

Unnamed: 0,Postprocessing,timepoint
66,wmh_vol_bianca_ml_t0,ses-study-MR
67,wmh_vol_bianca_ml_t0,ses-study-MR
68,wmh_vol_bianca_ml_t0,ses-study-MR
101,wmh_vol_bianca_ml_t0,ses-study-MR
102,wmh_vol_bianca_ml_t0,ses-study-MR
...,...,...
10545,freesurfer_t3,36-mon-follow-up
10597,freesurfer_t3,36-mon-follow-up
10598,freesurfer_t3,36-mon-follow-up
10599,freesurfer_t3,36-mon-follow-up


In [110]:
postprocess['timepoint'] = postprocess.apply(lambda row: correct_timepoint(row), axis=1)

In [128]:
freesurfer_cleaned_results = postprocess.loc[postprocess['Postprocessing'].notna(), ['subject-id','timepoint','Postprocessing']].drop_duplicates()

In [129]:
freesurfer_cleaned_results

Unnamed: 0,subject-id,timepoint,Postprocessing
66,sub-400022,ses-study-MR,wmh_vol_bianca_ml_t0
101,sub-400033,ses-study-MR,wmh_vol_bianca_ml_t0
112,sub-400036,ses-study-MR,wmh_vol_bianca_ml_t0
119,sub-400038,ses-study-MR,wmh_vol_bianca_ml_t0
130,sub-400042,ses-study-MR,wmh_vol_bianca_ml_t0
...,...,...,...
10460,sub-400805,36-mon-follow-up,freesurfer_t3
10479,sub-400812,36-mon-follow-up,freesurfer_t3
10524,sub-400826,36-mon-follow-up,freesurfer_t3
10542,sub-400833,36-mon-follow-up,freesurfer_t3


In [130]:
base_imaging_data = pd.concat([base_imaging_data, freesurfer_cleaned_results])

In [132]:
def convert_postprocessing(row):
    if not pd.isnull(row['Postprocessing']):
        if 'wmh' in row['Postprocessing']:
            return 'Bianca'
        elif 'freesurfer' in row['Postprocessing']:
            return 'Freesurfer'

In [137]:
dwi_segmentation['Postprocessing'] = 'DWI stroke lesion segmentation'

In [139]:
dwi_segmentation['timepoint'] = 'ses-study-MR'

In [142]:
pd.concat([base_imaging_data, dwi_segmentation[['subject-id', 'timepoint', 'Postprocessing']]])


Unnamed: 0,subject-id,timepoint,modality,file-name,file-path,examintaion,order,Postprocessing
0,sub-100426,ses-clinical,CT-1,sub-100426_ses-clinical_CT-1_Tilt_1,../the_complete_NorCOAST_dataset/sub-100426/se...,1,1.0,
1,sub-100426,ses-clinical,CT-1,sub-100426_ses-clinical_CT-1,../the_complete_NorCOAST_dataset/sub-100426/se...,CT-1,1.0,
2,sub-100426,ses-clinical,CT-1,sub-100426_ses-clinical_CT-1,../the_complete_NorCOAST_dataset/sub-100426/se...,CT-1,1.0,
3,sub-100426,18-mon-follow-up,DWI,sub-100426_18-month-follow-up_DWI,../the_complete_NorCOAST_dataset/sub-100426/18...,DWI,4.0,
4,sub-100426,18-mon-follow-up,DWI,sub-100426_18-month-follow-up_DWI,../the_complete_NorCOAST_dataset/sub-100426/18...,DWI,4.0,
...,...,...,...,...,...,...,...,...
269,sub-400573,ses-study-MR,,,,,,DWI stroke lesion segmentation
270,sub-400754,ses-study-MR,,,,,,DWI stroke lesion segmentation
271,sub-300270,ses-study-MR,,,,,,DWI stroke lesion segmentation
272,sub-400286,ses-study-MR,,,,,,DWI stroke lesion segmentation


In [143]:
cognition = pd.read_csv('cognition_table.csv')

In [144]:
cognition.head()

Unnamed: 0.1,Unnamed: 0,pNo,Hosp,cognitive_status_model_b_T1,cognitive_status_model_b_T2,subject-id,Cognitive testing
0,0,1,St. Olavs Hospital,0.0,1.0,sub-400001,Cognitive testing T1
1,1,2,St. Olavs Hospital,2.0,1.0,sub-400002,Cognitive testing T1
2,2,3,St. Olavs Hospital,1.0,0.0,sub-400003,Cognitive testing T1
3,3,4,St. Olavs Hospital,0.0,,sub-400004,Cognitive testing T1
4,4,5,St. Olavs Hospital,,,sub-400005,


In [146]:
base_imaging_data = pd.concat([base_imaging_data, cognition[['subject-id', 'Cognitive testing', 'cognitive_status_model_b_T1','cognitive_status_model_b_T2']]])

In [133]:
base_imaging_data['Postprocessing'] = base_imaging_data.apply(lambda row: convert_postprocessing(row), axis=1)

In [149]:
base_imaging_data.to_csv('merged_imaging_data.csv', index=False)