In [1]:
# Cell #1
# The first cell (run Cell #1) loads in a few data sets of interest, including: T1, T2, GAD on several subjects with brain tumors. 
# these are DICOM images, so we have to first convert them to nifti
# and the nifti converter leaves the output in the source folder, so we have
# to move them and also rename them. Will use python for that

!rm -rf MC-ImageWrangling
!git clone https://github.com/slowvak/MC-ImageWrangling.git
  
!unzip -q MC-ImageWrangling/Bet-n-dcm2nii.zip
!rm -rf images

!mkdir images
!cd images; unzip -q "../MC-ImageWrangling/S1-4.zip" 
!cd images; unzip -q "../MC-ImageWrangling/S5-8.zip" 
!cd images; unzip -q "../MC-ImageWrangling/S9-12.zip" 
!cd images; unzip -q "../MC-ImageWrangling/S13-16.zip" 

!rm -rf nii
!rm -rf tmp

!mkdir tmp
!mkdir nii

NIFTI_PATH = './nii'
# now some python code to loop over all the folders and convert from DICOM to nifti
# and then since the command doesn't allow speciyfing the name, will also move
#them from temp directory to an organized directory.
# this is why seemingly simple tasks always take longer than planned...

import os, fnmatch

for subj in fnmatch.filter(os.listdir('./images'), 'S*'):
    subj_path = os.path.join('./images', subj)
    series = os.listdir(subj_path)
    series = ['T1', 'T2', 'GAD']
    for ser in series:
        dcm_path = os.path.join (subj_path, ser)
        cmd = "./dcm2nii -o tmp %s" % (dcm_path)
        os.system(cmd)
#        print (cmd)
        # now have to find tehe .nii.gz and move it out
        f = fnmatch.filter(os.listdir('./tmp'), '*.nii.gz')
        out_dir = os.path.join (NIFTI_PATH, subj)
        if not os.path.exists(out_dir):
            os.mkdir(out_dir)
        out_path = os.path.join (out_dir, ser)
        cmd = "mv ./tmp/%s %s.nii.gz" % (f[0], out_path)
        os.system(cmd)
#        print (cmd)


!ls -l nii/S1


Cloning into 'MC-ImageWrangling'...
remote: Enumerating objects: 18, done.[K
remote: Counting objects: 100% (18/18), done.[K
remote: Compressing objects: 100% (16/16), done.[K
remote: Total 18 (delta 4), reused 0 (delta 0), pack-reused 0[K
Unpacking objects: 100% (18/18), done.
total 5888
-rw-r--r-- 1 root root 2023457 Jul  4 15:19 GAD.nii.gz
-rw-r--r-- 1 root root 1951148 Jul  4 15:19 T1.nii.gz
-rw-r--r-- 1 root root 2044295 Jul  4 15:19 T2.nii.gz


In [24]:
!ls -l ./nii/S2

total 28844
-rw-r--r-- 1 root root 5146176 Jul  4 15:51 GAD.nii.gz
-rw-r--r-- 1 root root 5146176 Jul  4 15:51 N4-GAD.nii.gz
-rw-r--r-- 1 root root 5182360 Jul  4 15:51 N4-T1.nii.gz
-rw-r--r-- 1 root root 5346775 Jul  4 15:51 N4-T2.nii.gz
-rw-r--r-- 1 root root 5279815 Jul  4 15:58 Reg-T2.nii.gz
-rw-r--r-- 1 root root 1626931 Jul  4 15:19 T1.nii.gz
-rw-r--r-- 1 root root 1789775 Jul  4 15:19 T2.nii.gz


In [25]:
# CELL #2
# Now do some basic processing to prepare the images
#!ls ./nii
#!ls ./nii/S1

# FSL (flirt-the image registration application) complains if this environment variable is not set
os.environ['FSLOUTPUTTYPE'] = 'NIFTI_GZ'

#for subj in ['S1','S2']:
for subj in os.listdir(NIFTI_PATH):
    subj_path = os.path.join(NIFTI_PATH, subj)
    print ("Working on " + subj_path)
    series = fnmatch.filter(os.listdir(subj_path), '*.nii.gz')
    for ser in ['T1', 'T2', 'GAD']:
        nii_file = os.path.join (subj_path, ser)
# First, perform N4 bias correction. Not required, but may improve results. Also must track new names
        new_file = os.path.join (subj_path, 'N4-' + ser)
        cmd = "./N4BiasFieldCorrection -i %s.nii.gz -o %s.nii.gz" % (nii_file, new_file)
        os.system(cmd)
#        print (cmd)
# Next, Register the images to the post-Gad images so skip the GAD
    GAD_file = os.path.join (subj_path, 'N4-GAD.nii.gz')
    T1_file =  os.path.join (subj_path, 'N4-T1.nii.gz')
    new_file = os.path.join (subj_path, 'Reg-T1.nii.gz')
    cmd = "./flirt -in %s -ref %s  -out %s" % (T1_file, GAD_file, new_file)
    os.system(cmd)
#    print (cmd)
    T2_file =  os.path.join (subj_path, 'N4-T2.nii.gz')
    new_file = os.path.join (subj_path, 'Reg-T2.nii.gz')
    cmd = "./flirt -in %s -ref %s  -out %s" % (T2_file, GAD_file, new_file)
    os.system(cmd)
#    print (cmd)
# can also try to mask out non-brain tissue, but that doesn't work as well on thick slice MRI

# finally, copy over original so you can skip these steps if you like
    cmd = "cp %s.nii.gz %s.nii.gz" % (os.path.join (subj_path, 'Reg-T1'), os.path.join (subj_path, 'T1'))
    os.system(cmd)
    cmd = "cp %s.nii.gz %s.nii.gz" % (os.path.join (subj_path, 'Reg-T2'), os.path.join (subj_path, 'T2'))
    os.system(cmd)
    cmd = "cp %s.nii.gz %s.nii.gz" % (os.path.join (subj_path, 'N4-GAD'), os.path.join (subj_path, 'GAD'))
    os.system(cmd) 

#!ls ./nii
!ls -l ./nii/S1        
!ls -l ./nii/S2

./N4BiasFieldCorrection -i ./nii/S12/T1.nii.gz -o ./nii/S12/N4-T1.nii.gz
./N4BiasFieldCorrection -i ./nii/S12/T2.nii.gz -o ./nii/S12/N4-T2.nii.gz
./N4BiasFieldCorrection -i ./nii/S12/GAD.nii.gz -o ./nii/S12/N4-GAD.nii.gz
./flirt -in ./nii/S12/N4-T1.nii.gz -ref ./nii/S12/N4-GAD.nii.gz  -out ./nii/S12/Reg-T1.nii.gz
./flirt -in ./nii/S12/N4-T2.nii.gz -ref ./nii/S12/N4-GAD.nii.gz  -out ./nii/S12/Reg-T2.nii.gz
./N4BiasFieldCorrection -i ./nii/S9/T1.nii.gz -o ./nii/S9/N4-T1.nii.gz
./N4BiasFieldCorrection -i ./nii/S9/T2.nii.gz -o ./nii/S9/N4-T2.nii.gz
./N4BiasFieldCorrection -i ./nii/S9/GAD.nii.gz -o ./nii/S9/N4-GAD.nii.gz
./flirt -in ./nii/S9/N4-T1.nii.gz -ref ./nii/S9/N4-GAD.nii.gz  -out ./nii/S9/Reg-T1.nii.gz
./flirt -in ./nii/S9/N4-T2.nii.gz -ref ./nii/S9/N4-GAD.nii.gz  -out ./nii/S9/Reg-T2.nii.gz
./N4BiasFieldCorrection -i ./nii/S2/T1.nii.gz -o ./nii/S2/N4-T1.nii.gz
./N4BiasFieldCorrection -i ./nii/S2/T2.nii.gz -o ./nii/S2/N4-T2.nii.gz
./N4BiasFieldCorrection -i ./nii/S2/GAD.nii.gz -o ./

In [29]:
#!zip T1.zip ./nii/*/T1.nii.gz 
#!zip T2.zip ./nii/*/T2.nii.gz
#!zip GAD.zip ./nii/*/GAD.nii.gz

# uncomment these lines if you skipped the cell above
#!unzip T1.zip
#!unzip T2.zip
#!unzip GAD.zip

# must also delete the DICOM and intermediate files to have enough space to make the new files
!rm -rf nii/Reg*
!rm -rf nii/N4-*
!rm -rf ./images
!rm T1.zip
!rm T2.zip
!rm GAD.zip
# so at this point, we have nifti files (T1.nii.gz, T2.nii.gz, GAD.nii.gz) for each subject


updating: nii/S10/T1.nii.gz (deflated 0%)
updating: nii/S11/T1.nii.gz (deflated 0%)
updating: nii/S12/T1.nii.gz (deflated 0%)
updating: nii/S13/T1.nii.gz (deflated 0%)
updating: nii/S14/T1.nii.gz (deflated 0%)
updating: nii/S15/T1.nii.gz (deflated 0%)
updating: nii/S16/T1.nii.gz (deflated 0%)
updating: nii/S1/T1.nii.gz (deflated 0%)
updating: nii/S2/T1.nii.gz (deflated 0%)
updating: nii/S3/T1.nii.gz (deflated 0%)
updating: nii/S4/T1.nii.gz (deflated 0%)
updating: nii/S5/T1.nii.gz (deflated 0%)
updating: nii/S6/T1.nii.gz (deflated 0%)
updating: nii/S7/T1.nii.gz (deflated 0%)
updating: nii/S8/T1.nii.gz (deflated 0%)
updating: nii/S9/T1.nii.gz (deflated 0%)
  adding: nii/S10/T2.nii.gz (deflated 0%)
  adding: nii/S11/T2.nii.gz (deflated 0%)
  adding: nii/S12/T2.nii.gz (deflated 0%)
  adding: nii/S13/T2.nii.gz (deflated 0%)
  adding: nii/S14/T2.nii.gz (deflated 0%)
  adding: nii/S15/T2.nii.gz (deflated 0%)
  adding: nii/S16/T2.nii.gz (deflated 0%)
  adding: nii/S1/T2.nii.gz (deflated 0%)
  

In [0]:
# There is also a text file called 'TumorSlices.csv' that I created which has subject ID, 
# first slice with contrast enhancement and last slice with contrast enhancement.
# note that subjects 2 and 14 don't have much enhancement
# we will read this into a Pandas dataframe
# Pandas is very popular for data analysis, and has a built-in function to read CSV (and excel) files
import pandas as pd
df = pd.read_csv ('./MC-ImageWrangling/TumorSlices.csv')


In [28]:
!ls -l

total 125884
-rwxr-xr-x  1 root root    13727 Jul  2 21:40 bet
-rwxr-xr-x  1 root root 15829495 Jul  2 21:40 bet2
-rwxr-xr-x  1 root root   947240 Jul  2 21:41 dcm2nii
-rwxr-xr-x  1 root root 16734490 Jul  2 21:39 flirt
drwxr-xr-x 19 root root     4096 Jul  4 15:19 images
drwxr-xr-x  3 root root     4096 Jul  4 15:19 MC-ImageWrangling
-rwxr-xr-x  1 root root 15000890 Jul  3 13:41 N4BiasFieldCorrection
drwxr-xr-x 18 root root     4096 Jul  4 15:19 nii
drwxr-xr-x  1 root root     4096 Jun 18 16:14 sample_data
-rw-r--r--  1 root root 80343057 Jul  4 16:24 T1.zip
drwxr-xr-x  2 root root     4096 Jul  4 15:19 tmp


In [0]:
!rm -rf nii
!rm -rf tmp

!mkdir tmp
!mkdir nii

# now some python code to loop over all the folders and convert from DICOM to nifti
# and then since the command doesn't allow speciyfing the name, will also move
#them from temp directory to an organized directory.
# this is why seemingly simple tasks always take longer than planned...

import os, fnmatch

for subj in fnmatch.filter(os.listdir('./images'), 'S*'):
    subj_path = os.path.join('./images', subj)
    series = os.listdir(subj_path)
    series = ['T1', 'T2', 'GAD']
    for ser in series:
        dcm_path = os.path.join (subj_path, ser)
        cmd = "./dcm2nii -o tmp %s" % (dcm_path)
        os.system(cmd)
        print (cmd)
        # now have to find teh .nii.gz and move it out
        f = fnmatch.filter(os.listdir('./tmp'), '*.nii.gz')
        out_dir = os.path.join ('./nii', subj)
        if not os.path.exists(out_dir):
            os.mkdir(out_dir)
        out_path = os.path.join (out_dir, ser)
        cmd = "mv ./tmp/%s %s.nii.gz" % (f[0], out_path)
        os.system(cmd)
        print (cmd)

        
#!./dcm2nii -o nii/S1 images/S1/T1/*
#!ls -l images/S1/T1
!ls -l nii/S1


./dcm2nii -o tmp ./images/S12/T1
mv ./tmp/19990516_093835s006a1000.nii.gz ./nii/S12/T1.nii.gz
./dcm2nii -o tmp ./images/S12/T2
mv ./tmp/19990516_093835s004a1000.nii.gz ./nii/S12/T2.nii.gz
./dcm2nii -o tmp ./images/S12/GAD
mv ./tmp/19990516_093835s009a1000.nii.gz ./nii/S12/GAD.nii.gz
./dcm2nii -o tmp ./images/S9/T1
mv ./tmp/19981128_131459Brains005a1001.nii.gz ./nii/S9/T1.nii.gz
./dcm2nii -o tmp ./images/S9/T2
mv ./tmp/19981128_131459Brains003a1001.nii.gz ./nii/S9/T2.nii.gz
./dcm2nii -o tmp ./images/S9/GAD
mv ./tmp/19981128_131459Brains006a1001.nii.gz ./nii/S9/GAD.nii.gz
./dcm2nii -o tmp ./images/S2/T1
mv ./tmp/19960823_093955s005a1000.nii.gz ./nii/S2/T1.nii.gz
./dcm2nii -o tmp ./images/S2/T2
mv ./tmp/19960823_093955s003a1000.nii.gz ./nii/S2/T2.nii.gz
./dcm2nii -o tmp ./images/S2/GAD
mv ./tmp/19960823_093955s006a1000.nii.gz ./nii/S2/GAD.nii.gz
./dcm2nii -o tmp ./images/S6/T1
mv ./tmp/19970526_160427s005a1000.nii.gz ./nii/S6/T1.nii.gz
./dcm2nii -o tmp ./images/S6/T2
mv ./tmp/19970526_160

In [0]:

!ls -l nii/S1
!ls -l nii/S2
!ls -l nii/S3
!ls -l nii/S4
!ls -l nii/S5
!ls -l nii/S7
!ls -l nii/S7
!ls -l nii/S11
!ls -l nii/S13
!ls -l nii/S14
!ls -l nii/S15
!ls -l nii/S16


total 5888
-rw-r--r-- 1 root root 2023457 Jul  2 22:49 GAD.nii.gz
-rw-r--r-- 1 root root 1951148 Jul  2 22:49 T1.nii.gz
-rw-r--r-- 1 root root 2044295 Jul  2 22:49 T2.nii.gz
total 5040
-rw-r--r-- 1 root root 1736988 Jul  2 22:49 GAD.nii.gz
-rw-r--r-- 1 root root 1626931 Jul  2 22:49 T1.nii.gz
-rw-r--r-- 1 root root 1789775 Jul  2 22:49 T2.nii.gz
total 4860
-rw-r--r-- 1 root root 1679711 Jul  2 22:49 GAD.nii.gz
-rw-r--r-- 1 root root 1552734 Jul  2 22:49 T1.nii.gz
-rw-r--r-- 1 root root 1736468 Jul  2 22:49 T2.nii.gz
total 4724
-rw-r--r-- 1 root root 1634570 Jul  2 22:49 GAD.nii.gz
-rw-r--r-- 1 root root 1516082 Jul  2 22:49 T1.nii.gz
-rw-r--r-- 1 root root 1678169 Jul  2 22:49 T2.nii.gz
total 5276
-rw-r--r-- 1 root root 1837186 Jul  2 22:49 GAD.nii.gz
-rw-r--r-- 1 root root 1714852 Jul  2 22:49 T1.nii.gz
-rw-r--r-- 1 root root 1846581 Jul  2 22:49 T2.nii.gz
total 5896
-rw-r--r-- 1 root root 2073840 Jul  2 22:49 GAD.nii.gz
-rw-r--r-- 1 root root 1932849 Jul  2 22:49 T1.nii.gz
-rw-r--r--

In [0]:
# Cell #2
# create new versions of the T1Pre, T1Post, and T2 images that range from 0 to 255, and where the 0 intensity value maps to the 5th percentile value 
# and 255 maps to the 95th percentile value.

In [0]:
# Cell #3
# take the 8 bit T1Post and store that into the red channel, the T1Pre into the green channel and the T2 into the blue channel


In [0]:
# Cell #4
# display some images to see the result

In [0]:
# Cell #5
# resize the CT images down to 256 pixels in the X and Y dimension using Pillow


In [0]:
# Cell # 6
# useful functions in numpy and pillow, including filter functions, as well as more advanced functions like morphologic filtering.
# We won't use these in our example, but you might want to try.


In [0]:
# Cell #7
# apply the image wrangling to find MRI slices that have tumors and those that dont
# We have already applied this to several BRI brain exams, and then also separated the slices with tumors and those without tumors
# If you have you own set of tumors (or other disease) you shoould try encoding the MRIs, and then separating the classes, and then training the classifier
# first we need to install and then load the fastai libraries

!pip3 install fastai
from fastai.vision import *