# Select Trait-Matched Controls for WML Quantification

In [1]:
%%bash
pip install --upgrade pip -q && pip install opencv-python-headless -q

[0m

In [3]:
%%bash
cd wml
dx download ms_wml_quantification/wml_functions.py
dx download ms_wml_quantification/cohorts/*.csv

In [6]:
import pandas as pd

control_df = pd.read_csv('/opt/notebooks/control_table.csv')
fabry_df = pd.read_csv('/opt/notebooks/fabry_table.csv')

fabry_df

Unnamed: 0,Participant ID,Short axis heart images - DICOM | Instance 2,Sex,Age when attended assessment centre | Instance 2,T2 FLAIR structural brain images - NIFTI | Instance 2
0,1550259,1550259_20209_2_0.zip,Female,55,1550259_20253_2_0.zip
1,1811876,1811876_20209_2_0.zip,Female,57,1811876_20253_2_0.zip
2,2237128,2237128_20209_2_0.zip,Female,66,2237128_20253_2_0.zip
3,2794818,,Female,76,2794818_20253_2_0.zip
4,3485721,3485721_20209_2_0.zip,Male,47,3485721_20253_2_0.zip
5,3494046,3494046_20209_2_0.zip,Female,72,3494046_20253_2_0.zip
6,3516034,3516034_20209_2_0.zip,Male,62,3516034_20253_2_0.zip
7,3596843,3596843_20209_2_0.zip,Female,72,3596843_20253_2_0.zip
8,4386972,4386972_20209_2_0.zip,Female,54,4386972_20253_2_0.zip
9,4475226,4475226_20209_2_0.zip,Female,72,4475226_20253_2_0.zip


In [7]:
## match controls

from wml_functions import select_matching_patients

matches = select_matching_patients(fabry_df, control_df, nmatches=3, traits=[2, 3])

print('Matches:\t', ' '.join([str(m)+'_20253_2_0' for m in matches]))

Patients:	 15 
Matches each:	 3 
Traits:	 	 ['Sex', 'Age when attended assessment centre | Instance 2']
Matches:	 1010011_20253_2_0 1019658_20253_2_0 1026304_20253_2_0 1044451_20253_2_0 1048476_20253_2_0 1048788_20253_2_0 1005319_20253_2_0 1005545_20253_2_0 1008002_20253_2_0 1012751_20253_2_0 1057110_20253_2_0 1080119_20253_2_0 1691380_20253_2_0 1742481_20253_2_0 1942230_20253_2_0 1004915_20253_2_0 1007756_20253_2_0 1013102_20253_2_0 1036384_20253_2_0 1042080_20253_2_0 1087159_20253_2_0 1024913_20253_2_0 1040740_20253_2_0 1045317_20253_2_0 1052078_20253_2_0 1056396_20253_2_0 1058771_20253_2_0 1050731_20253_2_0 1053913_20253_2_0 1057528_20253_2_0 1001332_20253_2_0 1008017_20253_2_0 1008416_20253_2_0 1020244_20253_2_0 1026664_20253_2_0 1027142_20253_2_0 1031718_20253_2_0 1033288_20253_2_0 1033424_20253_2_0 1051418_20253_2_0 1065318_20253_2_0 1066209_20253_2_0 1016186_20253_2_0 1043118_20253_2_0 1127639_20253_2_0


In [16]:
## divide into batches for input into swiss army knife

for i, m in enumerate(matches):
    print(m)
    if (i+1) % 4 == 0:
        print('')

print((i+1)/4, 'batches')

1010011
1019658
1026304
1044451

1048476
1048788
1005319
1005545

1008002
1012751
1057110
1080119

1691380
1742481
1942230
1004915

1007756
1013102
1036384
1042080

1087159
1024913
1040740
1045317

1052078
1056396
1058771
1050731

1053913
1057528
1001332
1008017

1008416
1020244
1026664
1027142

1031718
1033288
1033424
1051418

1065318
1066209
1016186
1043118

1127639
11.25 batches


In [64]:
import zipfile

## check if all archives contain valid data

# fabry files
zips = fabry_df['T2 FLAIR structural brain images - NIFTI | Instance 2'].tolist()

rmindices = []
for z in zips:
    with zipfile.ZipFile('/mnt/project/Bulk/Brain MRI/T2 FLAIR/'+z[:2]+'/'+z, 'r') as archive:
        try:
            archive.read('T2_FLAIR/T2_FLAIR.nii.gz')
        except:
            print('!! Archive', z, 'does not contain neccessary data.')

'2237128_20253_2_0 3494046_20253_2_0 4885674_20253_2_0 5080697_20253_2_0'

In [65]:
import zipfile

## check if all archives contain valid data

# control files
zips = [str(m)+'_20253_2_0' for m in matches]

rmindices = []
for z in zips:
    with zipfile.ZipFile('/mnt/project/Bulk/Brain MRI/T2 FLAIR/'+z[:2]+'/'+z+'.zip', 'r') as archive:
        try:
            archive.read('T2_FLAIR/T2_FLAIR.nii.gz')
        except:
            print('!! Archive', z, 'does not contain neccessary data.')

' '.join([z.removesuffix('.zip') for z in zips])

'1005319_20253_2_0 1005545_20253_2_0 1008002_20253_2_0 1004915_20253_2_0 1007756_20253_2_0 1013102_20253_2_0 1001332_20253_2_0 1008017_20253_2_0 1008416_20253_2_0 1020244_20253_2_0 1026664_20253_2_0 1027142_20253_2_0'