In [11]:
import os
import glob

# h5py can read hdf5 dataset
import h5py

# delete bad data files
from send2trash import send2trash

# fastmri has some k-space undersampling functions we can use
# git clone https://github.com/facebookresearch/fastMRI.git
# go to the fastmri directory
# pip install -e.
import fastmri

# We will use this functions to generate masks
from fastmri.data.subsample import RandomMaskFunc, EquispacedMaskFunc

# sigpy is apparently a good MRI viewing tool
# pip install sigpy
import sigpy as sp
import sigpy.plot as pl

import numpy as np
import tensorflow as tf

import matplotlib.pyplot as plt
import matplotlib

%matplotlib notebook

In [12]:
# define constants
DATASET = 'singlecoil_train'
AXES = {
        'singlecoil_train' : (1, 2),
        'multicoil_train' : (2, 3),
       }
PATH = os.path.join(os.path.dirname(os.getcwd()), DATASET)
mri_paths = glob.glob(os.path.join(PATH, '*.h5'))

In [13]:
def _get_kspace_and_reconstruction_rss(filename):
    """
    @params filename: full path to .h5 mri file
    @return kspace data of that particular file
    """
    try:
        with h5py.File(filename, 'r') as hr:
            return hr['kspace'][:], hr['reconstruction_rss'][:]
    except:
        print(f'Error could not open {filename}')

def _get_kspace_undersampled(kspace, center_fractions = [0.04], accelerations = [4]):
    """
    @params kspace: from _get_kspace_and_reconstruction_rss(filename)
    @params center_fractions: for undersampling, 
        N*center_fraction columns in center corresponding to low-frequencies
    @params accelerations: how much mri acquisition is sped up
    @return undersampled k-space
    """
    mask_func = RandomMaskFunc(
        center_fractions = center_fractions, 
        accelerations = accelerations
    )
    mask = np.array(mask_func(kspace.shape))
    return kspace * mask

def _get_mri_im(
#     kspace, 
    reconstruction_rss,
    kspace_undersampled, 
    DATASET
):
    """
    # @params kspace: from _get_kspace_and_reconstruction_rss(filename)
    @params reconstruction_rss: reconstructed MR image of fully sampled kspace, provided
    @params kspace_undersampled: mask-undersampled k-space from _get_kspace_undersampled
    @params DATASET: i.e. 'singlecoil_challenge' or 'multicoil_challenge'
    @return (undersampled mri image, fully sampled mri image (i.e. label for GAN))
    """
    undersampled_im = sp.ifft(kspace_undersampled, axes=AXES[DATASET])
#     fullysampled_im = sp.ifft(kspace, axes=AXES[DATASET])
    
    #crop to make sure images are all the same size
    undersampled_crop = sp.resize(
        undersampled_im,
        [1, 30, 320, 320]
    )
    
    fullysampled_crop = sp.resize(
        reconstruction_rss,
        [1, 30, 320, 320]
    )
    
    
    return (
        undersampled_crop,
        fullysampled_crop,
    )
    

def get_datum_from_single_file(filename, DATASET):
    """
    user-facing function for tf Dataset object
    @params filename: full path to .h5 mri file
    @params DATASET: i.e. 'singlecoil_challenge' or 'multicoil_challenge'
    @return (undersampled mri image, fully sampled mri image (i.e. label for GAN))
    """
    kspace, reconstruction_rss = _get_kspace_and_reconstruction_rss(filename)
    kspace_undersampled = _get_kspace_undersampled(kspace)
    return _get_mri_im(
        reconstruction_rss,
        kspace_undersampled,
        DATASET,
    )


def get_data_from_files(filenames, DATASET):  
    """
    user-facing function for tf Dataset object
    @params filenames: list of full paths to .h5 mri files
    @params DATASET: i.e. 'singlecoil_train' or 'multicoil_train'
    @return ndarray of 
        (undersampled mri image, fully sampled mri image (i.e. label for GAN))
    """
    undersampled_images = np.ones((1, 30, 320, 320))
    fullysampled_images = np.ones((1, 30, 320, 320))
    for mri_path in filenames:
        try:
            undersampled_crop, fullysampled_crop = get_datum_from_single_file(
                mri_path, DATASET
            )
            undersampled_images = np.vstack(
                (undersampled_images, undersampled_crop)
            )
            fullysampled_images = np.vstack(
                (fullysampled_images, fullysampled_crop)
            )
        except:
            print(f'could not open file {mri_path}')
            send2trash(mri_path)
            print(f'sent file {mri_path} to trash')
            
    return undersampled_images[1:], fullysampled_images[1:]

#


In [14]:
undersampled, fully_sampled = get_data_from_files(mri_paths, DATASET)
ds = tf.data.Dataset.from_tensor_slices((undersampled, fully_sampled))
ds = ds.shuffle(150,seed=123,reshuffle_each_iteration=True)
ds.take(1)

<TakeDataset shapes: ((30, 320, 320), (30, 320, 320)), types: (tf.float64, tf.float64)>

In [15]:
for undersampled_im, fullysampled_im in ds.take(1):
    pl.ImagePlot(undersampled_im)
    pl.ImagePlot(fullysampled_im)

This is a really nice comparison of what the partially sampled k-space (top) fully sampled k-space (bottom) will do to the reconstruction of the MR image. 


Normally for MRI, there is not much preprocessing beyond what has already been done. I could not find a golden standard with which to augment the dataset, but I may consult my SURF mentor in the coming days for more information. This is likely due to the fact that MRIs are already so rich with information, and improper augmentation may cause the GAN to hallucinate. In this particular project, we potentially have access to thousands of MRIs (the limitation is the disk quota, not the dataset itself), so I will go to OH this week to discuss how we may deal with this issue. While it may not be practical to store thousands of MRIs on the Caltech Clusters, I would imagine that it would be theoretically preferable to leverage the large dataset, rather than augment the current dataset.


Some more statistics: currently, we have $100$ MRIs in the dataset. I think it is interesting to note that there are other datasets availble from NYU fastMRI as well. Mainly, they also have multicoil knee, DICOM knee, multicoil brain, and DICOM brain datasets, each of which have hundreds of MR images as well. Based on how the disc space works out, it might be interesting to see if we can train the GAN to simulate super-resolution and deblurring for multiple types of MRIs.

### Bibliography
Will be in better format once we're off JupyterNotebook

Bustin, A., Fuin, N., Botnar, R. M., & Prieto, C. (2020). From Compressed-Sensing to Artificial Intelligence-Based Cardiac MRI Reconstruction. Frontiers in Cardiovascular Medicine, 7. https://doi.org/10.3389/fcvm.2020.00017


F. Knoll et al., "fastMRI: A publicly available raw K-space and DICOM dataset of knee images for accelerated MR image reconstruction using machine learning", Radiol.: Artif. Intell., vol. 2, no. 1, 2020.

Ye, J.C. Compressed sensing MRI: a review from signal processing perspective. BMC biomed eng 1, 8 (2019). https://doi.org/10.1186/s42490-019-0006-z

Zbontar J, Knoll F, Sriram A, et al. fastMRI: An Open Dataset and Bench-  marks for Accelerated MRI. arXiv [cs.CV]. http://arxiv.org/abs/1811.08839. Published 2018. 