In [1]:
%matplotlib inline

from glob import glob
import os
from tqdm import tqdm_notebook as tqdm
import matplotlib.pyplot as plt
import numpy as np
import subtle.utils.experiment as su_exp
import subtle.utils.io as suio
from plot_grid import plot_h5

plt.set_cmap('gray')
plt.rcParams['figure.figsize'] = (12, 10)

def has_sequence(case_dir, kw):
    ser_dirs = sorted([d for d in glob('{}/*'.format(case_dir)) if os.path.isdir(d)])
    has_seq = [d for d in ser_dirs if kw in d.lower()]
    return len(has_seq) > 0

def get_dirs_with_kw(case_dir, kw):
    ser_dirs = sorted([d for d in glob('{}/*'.format(case_dir)) if os.path.isdir(d)])
    return sorted([d for d in ser_dirs if kw in d.lower()])



In [None]:
base_path = '/mnt/datasets/ben/backup/raid/SubtleGAD_Stanford/Siemens'
cases = sorted([f.split('/')[-1] for f in glob('{}/Patient*'.format(base_path))])
dest_path = '/home/srivathsa/projects/studies/gad/stanford/data'

In [None]:
# kws = ['ax_bravo', 'ax_t2', 'flair']
kws = ['mprage']
cases = ['Patient_0203']
for cnum in tqdm(cases, total=len(cases)):
    case_dirs = [
        d for d in glob('{}/{}/*'.format(base_path, cnum)) 
        if os.path.isdir(d)
    ]
    
    kw_dirs = []
    for kw in kws:
        mlist = []
        for d in case_dirs:
            if kw in d.lower() and 'ax' in d.lower():
                mlist.append(d)
        kw_dirs.extend(mlist)
    kw_dirs = list(set(kw_dirs))
    sym_dest = '{}/{}'.format(dest_path, cnum)

    if not os.path.exists(sym_dest):
        os.makedirs(sym_dest)
    for ser in kw_dirs:
        os.symlink(ser, '{}/{}'.format(sym_dest, ser.split('/')[-1]))

In [None]:
base_path = '/home/srivathsa/projects/studies/gad/stanford/data'
cases = sorted([f.split('/')[-1] for f in glob('{}/Patient*'.format(base_path))])

pp_base = '/home/srivathsa/projects/studies/gad/stanford/preprocess/data'

fl_cases = sorted([f.split('/')[-1].replace('_FLAIR.npy', '') for f in glob('{}/*FLAIR*'.format(pp_base))])
t2_cases = sorted([f.split('/')[-1].replace('_T2.npy', '') for f in glob('{}/*T2*'.format(pp_base))])

mis_cases = sorted([f for f in fl_cases if f not in t2_cases])

for cnum in mis_cases:
    case_dir = os.path.join(base_path, cnum)
    
    has_mprage = has_sequence(case_dir, 'mprage')
    has_bravo = has_sequence(case_dir, 'bravo')
    has_t1 = has_mprage or has_bravo
    
    has_t2 = has_sequence(case_dir, 't2')
    has_fl = has_sequence(case_dir, 'flair')
    
    has_rfm = has_sequence(case_dir, 'reformat')
    
    if not has_t2:
        print(cnum)
    
#     if has_bravo:
#         print(cnum)
    
#     if has_t1 and has_t2 and has_fl:
#         continue
#     else:
#         print(cnum)

In [None]:
pp_path = '/home/srivathsa/projects/studies/gad/stanford/preprocess/data_fp64'
pp_dest = '/home/srivathsa/projects/studies/gad/stanford/preprocess/data'
cases = sorted([f.split('/')[-1].replace('.npy', '') for f in glob('{}/*.npy'.format(pp_path))])

for cnum in tqdm(cases, total=len(cases)):
    data = np.load('{}/{}.npy'.format(pp_path, cnum)).astype(np.float16)
    if data.shape[2] == 4:
        data_t2 = data[:, :, -1]
    np.save('{}/{}_T2.npy'.format(pp_dest, cnum), data_t2)

In [None]:
fl_data = np.load('/home/srivathsa/projects/studies/gad/stanford/preprocess/data/Patient_0085_FLAIR.npy').astype(np.float32)
t1_data = np.load('/home/srivathsa/projects/studies/gad/stanford/preprocess/data/Patient_0085.npy').astype(np.float32)

t1_pre = t1_data[0, :, 0]
t1_pre = t1_pre / t1_pre.mean()

t1_low = t1_data[0, :, 1] 
t1_low = t1_low / t1_low.mean()

t1_post = t1_data[0, :, 2]
t1_post = t1_post / t1_post.mean()

In [None]:
sl = 184
plt.imshow(np.hstack([t1_pre[sl], t1_low[sl], t1_post[sl], fl_data[0, sl]]))

In [None]:
all_cases = su_exp.get_experiment_data('stanford_sri', 
                                       dirpath_exp='/home/srivathsa/projects/SubtleGad/train/configs/experiments')
pp_base = '/home/srivathsa/projects/studies/gad/stanford/preprocess/data'
fl_cases = sorted([d.split('/')[-1].replace('_FLAIR.npy', '') for d in glob('{}/*FLAIR*'.format(pp_base))])
mis_cases = [c for c in all_cases if c not in fl_cases]
print(mis_cases)

In [None]:
dbase = '/home/srivathsa/projects/studies/gad/stanford/data'
for cnum in mis_cases:
    if os.path.exists('{}/{}.npy'.format(pp_base, cnum)):
        print(cnum)

In [None]:
data = np.load('/home/srivathsa/projects/studies/gad/stanford/preprocess/data/Patient_0186.npy')

In [None]:
plt.imshow(data[0, 180, 0])
plt.colorbar()

In [2]:
all_cases = su_exp.get_experiment_data('stanford_sri', 
                                       dirpath_exp='/home/srivathsa/projects/SubtleGad/train/configs/experiments')

pp_base = '/home/srivathsa/projects/studies/gad/stanford/preprocess/data'
sc_factor = 1.25
for cnum in tqdm(all_cases, total=len(all_cases)):
    data = np.load('{}/{}.npy'.format(pp_base, cnum))
    data_rs = data.copy()
    new_max = data_rs[0, :, 1].max() * sc_factor
    data_rs[0, :, 2] = np.interp(
        data_rs[0, :, 2], (data_rs[0, :, 2].min(), data_rs[0, :, 2].max()),
        (data_rs[0, :, 1].min(), new_max)
    )
    
    np.save('{}/{}_rs.npy'.format(pp_base, cnum), data_rs)

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  


  0%|          | 0/1 [00:00<?, ?it/s]

In [10]:
d1 = np.load('/home/srivathsa/projects/studies/gad/stanford/preprocess/data/Patient_0323.npy')
d2 = np.load('/home/srivathsa/projects/studies/gad/stanford/preprocess/data/Patient_0323_T2.npy')

d3 = np.array([d1[0, :, 0], d1[0, :, 1], d1[0, :, 2], d2[0]])
d3_m = np.array([d1[1, :, 0], d1[1, :, 1], d1[1, :, 2], d2[1]])

d3_new = np.array([d3, d3_m]).transpose(0, 2, 1, 3, 4)
print(d3_new.shape)

(2, 336, 4, 512, 512)


In [11]:
np.save('/home/srivathsa/projects/studies/gad/stanford/preprocess/data/Patient_0323.npy', d3_new)

In [11]:
for fpath in glob('/home/srivathsa/projects/studies/gad/stanford/preprocess/data/*rs.npy'):
    os.rename(fpath, fpath.replace('_rs', ''))

In [None]:
data = np.load('/home/srivathsa/projects/studies/gad/stanford/preprocess/data/Patient_0085_full.npy').astype(np.float32)

In [None]:
fl_cases = sorted([f.split('/')[-1].replace('.npy', '') 
                   for f in glob('/home/srivathsa/projects/studies/gad/stanford/preprocess/uad_fl/*.npy')])
t2_cases = sorted([f.split('/')[-1].replace('.npy', '') 
                   for f in glob('/home/srivathsa/projects/studies/gad/stanford/preprocess/data/*.npy')])

for cnum in t2_cases:
    if cnum not in fl_cases:
        print(cnum)

## Downsample to 256 x 256

In [2]:
fpath_t1 = '/home/srivathsa/projects/studies/gad/stanford/preprocess/data'
fpath_fl = '/home/srivathsa/projects/studies/gad/stanford/preprocess/data_fl'

cases = sorted([f.split('/')[-1].replace('.npy', '') for f in glob('{}/*.npy'.format(fpath_t1))])

for cnum in tqdm(cases, total=len(cases)):
    t1pre, t1low, t1post, t2 = suio.load_file(
        '{}/{}.npy'.format(fpath_t1, cnum), params={'h5_key': 'data_mask'}
    ).transpose(1, 0, 2, 3)
        
    plt.imshow(t2[180])

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  


  0%|          | 0/459 [00:00<?, ?it/s]

In [4]:
plt.imshow(t2[180])

<matplotlib.image.AxesImage at 0x7fa0f11205f8>

In [4]:
data = np.load('/home/srivathsa/projects/studies/gad/stanford/preprocess/data/Patient_0118.npy')[0]

In [7]:
plt.imshow(data[180, 0])

<matplotlib.image.AxesImage at 0x7f08df617860>

## Choose cases - train/val/test split

In [29]:
exc_cases = [
    "Patient_0138",
    "Patient_0162",
    "Patient_0170",
    "Patient_0183",
    "Patient_0235",
    "Patient_0253",
    "Patient_0285",
    "Patient_0378",
    "Patient_0399"
]

all_cases = su_exp.get_experiment_data('stanford_sri', 
                                       dirpath_exp='/home/srivathsa/projects/SubtleGad/train/configs/experiments')

avail_cases = sorted([c for c in all_cases if c not in exc_cases])

train_cases = np.random.choice(avail_cases, size=375, replace=False)
val_cases = [c for c in avail_cases if c not in train_cases] + exc_cases
val_cases = sorted(val_cases)

In [30]:
print(len(train_cases))
print(len(val_cases))

375
67


In [34]:
list(sorted(val_cases))

['Patient_0093',
 'Patient_0135',
 'Patient_0138',
 'Patient_0139',
 'Patient_0153',
 'Patient_0154',
 'Patient_0160',
 'Patient_0162',
 'Patient_0170',
 'Patient_0183',
 'Patient_0189',
 'Patient_0198',
 'Patient_0205',
 'Patient_0218',
 'Patient_0235',
 'Patient_0253',
 'Patient_0274',
 'Patient_0278',
 'Patient_0285',
 'Patient_0292',
 'Patient_0293',
 'Patient_0294',
 'Patient_0297',
 'Patient_0301',
 'Patient_0308',
 'Patient_0316',
 'Patient_0317',
 'Patient_0327',
 'Patient_0328',
 'Patient_0331',
 'Patient_0342',
 'Patient_0350',
 'Patient_0354',
 'Patient_0355',
 'Patient_0356',
 'Patient_0361',
 'Patient_0378',
 'Patient_0379',
 'Patient_0393',
 'Patient_0394',
 'Patient_0399',
 'Patient_0401',
 'Patient_0404',
 'Patient_0406',
 'Patient_0416',
 'Patient_0418',
 'Patient_0419',
 'Patient_0427',
 'Patient_0430',
 'Patient_0437',
 'Patient_0444',
 'Patient_0450',
 'Patient_0474',
 'Patient_0482',
 'Patient_0486',
 'Patient_0493',
 'Patient_0495',
 'Patient_0498',
 'Patient_0502

In [7]:
all_cases = su_exp.get_experiment_data('stanford_mcon', dataset='train', 
                                       dirpath_exp='/home/srivathsa/projects/SubtleGad/train/configs/experiments')


In [8]:
print(len(all_cases))

374


In [10]:
sub_cases = np.random.choice(all_cases, size=150, replace=False)

In [12]:
sorted(sub_cases)

['Patient_0085',
 'Patient_0086',
 'Patient_0088',
 'Patient_0092',
 'Patient_0102',
 'Patient_0106',
 'Patient_0108',
 'Patient_0109',
 'Patient_0113',
 'Patient_0115',
 'Patient_0116',
 'Patient_0120',
 'Patient_0121',
 'Patient_0123',
 'Patient_0124',
 'Patient_0127',
 'Patient_0128',
 'Patient_0134',
 'Patient_0137',
 'Patient_0142',
 'Patient_0143',
 'Patient_0144',
 'Patient_0147',
 'Patient_0150',
 'Patient_0151',
 'Patient_0157',
 'Patient_0159',
 'Patient_0161',
 'Patient_0168',
 'Patient_0169',
 'Patient_0171',
 'Patient_0173',
 'Patient_0175',
 'Patient_0176',
 'Patient_0177',
 'Patient_0181',
 'Patient_0186',
 'Patient_0195',
 'Patient_0201',
 'Patient_0202',
 'Patient_0206',
 'Patient_0208',
 'Patient_0211',
 'Patient_0212',
 'Patient_0214',
 'Patient_0215',
 'Patient_0217',
 'Patient_0222',
 'Patient_0223',
 'Patient_0226',
 'Patient_0228',
 'Patient_0231',
 'Patient_0232',
 'Patient_0246',
 'Patient_0254',
 'Patient_0256',
 'Patient_0257',
 'Patient_0259',
 'Patient_0267

In [18]:
exc_cases = [
    "Patient_0138",
    "Patient_0162",
    "Patient_0170",
    "Patient_0183",
    "Patient_0235",
    "Patient_0253",
    "Patient_0285",
    "Patient_0378",
    "Patient_0399"
]

val_cases = su_exp.get_experiment_data('stanford_mcon', dataset='val', 
                                       dirpath_exp='/home/srivathsa/projects/SubtleGad/train/configs/experiments')
val_cases = [c for c in val_cases if c not in exc_cases]
print(len(val_cases))

58


In [20]:
sub_val_cases = np.random.choice(val_cases, size=26, replace=False)

In [23]:
sub_val_cases = exc_cases + list(sub_val_cases)

In [24]:
sorted(sub_val_cases)

['Patient_0138',
 'Patient_0154',
 'Patient_0160',
 'Patient_0162',
 'Patient_0170',
 'Patient_0183',
 'Patient_0189',
 'Patient_0198',
 'Patient_0205',
 'Patient_0235',
 'Patient_0253',
 'Patient_0278',
 'Patient_0285',
 'Patient_0292',
 'Patient_0293',
 'Patient_0316',
 'Patient_0317',
 'Patient_0328',
 'Patient_0342',
 'Patient_0350',
 'Patient_0361',
 'Patient_0378',
 'Patient_0379',
 'Patient_0394',
 'Patient_0399',
 'Patient_0427',
 'Patient_0437',
 'Patient_0486',
 'Patient_0495',
 'Patient_0515',
 'Patient_0529',
 'Patient_0530',
 'Patient_0566',
 'Patient_0567',
 'Patient_0580']

In [25]:
print(len(sub_val_cases))

35
