# Preprocess MRI
## Packages

In [1]:
import pydicom as dicom
import numpy as np
import matplotlib.pyplot as plt
import os
import nibabel as nib
from ipywidgets import interact
from matplotlib.colors import ListedColormap
from azureml.core import Workspace, Dataset, Datastore
import shutil
import os
import pandas as pd

### Helper Functions

In [2]:
slice_description_to_use = ['t1_tse_cor', 
                            't1_tse_fs_cor', 
                            't1_tse_sag',
                            "t2_tse_cor", 
                            "t2_tse_sag",
                            't2_tse_fs_cor',
                            't2_tse_fs_sag',
                            
                            't1_tse_cor_T', 
                            't1_tse_fs_cor_T', 
                            't1_tse_sag_T', 
                            't2_tse_fs_cor_T',
                            't2_tse_fs_sag_T',
                            "t2_tse_fs_cor_II"]
def get_mri_slices_from_file(path,slice_descriptions = slice_description_to_use):
    path = f"../raw_data/downloaded_mri_files/{path}/"
    path_list = os.listdir(path)
    slices = []
    for dcm_path in path_list:
        try:
            slice = dicom.dcmread(path + "/" + dcm_path) 
            slices.append(slice)
            # if slice.SeriesDescription in slice_descriptions:
            #     slices.append(slice)
            # elif "t2 tse fs cor" == slice.SeriesDescription:
            #     slice.SeriesDescription = "t2_tse_fs_cor"
            #     slices.append(slice)
            # elif "t1 tse cor" == slice.SeriesDescription:
            #     slice.SeriesDescription = "t1_tse_cor"
            #     slices.append(slice)
            # else: 
            #     pass
        except:
            pass
    return slices

def get_slice_descriptions(slices):
    names = [ds.SeriesDescription for ds in slices]
    return np.unique(names,return_counts=True)

def slices_sort_and_selectet(slices,slice_type):
    slices.sort(key=lambda x: int(x.SliceLocation))
    type_ = [slic.SeriesDescription == slice_type for slic in slices]
    slice_one_type = np.array(slices)[type_]
    return slice_one_type


def delete_full_mri_storage(file_number):
    folder_path = f"../raw_data/downloaded_mri_files/{file_number}/"
    shutil.rmtree(folder_path)
def save_files(slices,file,slice_description_to_use=slice_description_to_use):
    for description in slice_description_to_use:
        slices_selected = slices_sort_and_selectet(slices,description)
        if len(slices_selected) > 0:
            array_list = np.array([dcm.pixel_array for dcm in slices_selected])
            volume_3d = np.stack(array_list, axis=-1)
            affine = np.eye(4)
            nifti_img = nib.Nifti1Image(volume_3d, affine)
            directory = os.path.dirname(f"../raw_data/nii_files/{description}/")
            if not os.path.exists(directory):
                os.makedirs(directory)
            nib.save(nifti_img, f'../raw_data/nii_files/{description}/{str(file)}.nii')

def download_file(file):
    subscription_id = '35068096-667c-4620-8bb2-6b050765fd4e'
    resource_group = 'ComputeFHNW'
    workspace_name = 'FHNW'
    workspace = Workspace(subscription_id, resource_group, workspace_name) 
    datastore = Datastore.get(workspace, "mrialldata")
    dataset = Dataset.File.from_files(path=(datastore, 'output.deid/images/' + file))
    dataset.download(target_path='../raw_data/downloaded_mri_files/' + file, overwrite=True)

### Download all the file paths

In [35]:
subscription_id = '35068096-667c-4620-8bb2-6b050765fd4e'
resource_group = 'ComputeFHNW'
workspace_name = 'FHNW'
workspace = Workspace(subscription_id, resource_group, workspace_name)
datastore = Datastore.get(workspace, "mrialldata")
dataset = Dataset.File.from_files(path=(datastore, 'output.deid/images/'))
mount_context = dataset.mount()
mount_context.start()
files = next(os.walk(mount_context.mount_point))[1]
mount_context.stop()

In [20]:
download_file(str("7606713"))

In [30]:
slices = get_mri_slices_from_file("7606713")

In [31]:
names = [ds.SeriesDescription == "t1_tse_fs_cor" for ds in slices]

In [40]:
np.array(slices)[names][10].pixel_array.max()

1109

In [13]:
names == "t1_tse_fs_cor"

False

In [12]:
names[names == "t1_tse_fs_cor"]

'ep2d_diff_tra_TRACEW'

In [8]:
slices[0].values

<bound method Dataset.values of Dataset.file_meta -------------------------------
(0002, 0000) File Meta Information Group Length  UL: 196
(0002, 0001) File Meta Information Version       OB: b'\x00\x01'
(0002, 0002) Media Storage SOP Class UID         UI: MR Image Storage
(0002, 0003) Media Storage SOP Instance UID      UI: 1.3.12.2.1107.5.2.41.69518.2019110607375334069215665
(0002, 0010) Transfer Syntax UID                 UI: Explicit VR Little Endian
(0002, 0012) Implementation Class UID            UI: 1.2.276.0.7230010.3.0.3.6.6
(0002, 0013) Implementation Version Name         SH: 'OFFIS_DCMTK_366'
-------------------------------------------------
(0008, 0005) Specific Character Set              CS: 'ISO_IR 100'
(0008, 0008) Image Type                          CS: ['DERIVED', 'PRIMARY', 'DIFFUSION', 'TRACEW', 'NORM', 'DIS2D']
(0008, 0012) Instance Creation Date              DA: '20191106'
(0008, 0013) Instance Creation Time              TM: '073754.152000'
(0008, 0016) SOP Class U

### Download dicom files

In [15]:
### all_files_saved
for file in files:
    download_file(str(file))
    slices = get_mri_slices_from_file(file)
    if len(get_slice_descriptions(slices)[0]) != 5:
        print(file)
    print(get_slice_descriptions(slices))
    save_files(slices,file)
    delete_full_mri_storage(file)

8020686
(array(['t1_tse_cor', 't1_tse_sag', 't2_tse_cor', 't2_tse_sag'],
      dtype='<U10'), array([44, 20, 22, 20]))
7729409
(array(['t1_tse_cor', 't1_tse_sag', 't2_tse_cor', 't2_tse_sag'],
      dtype='<U10'), array([40, 20, 20, 20]))
7698240
(array(['t1_tse_cor', 't1_tse_sag', 't2_tse_fs_cor'], dtype='<U13'), array([21, 21, 21]))
7757311
(array(['t1_tse_cor', 't1_tse_sag', 't2_tse_fs_cor'], dtype='<U13'), array([20, 20, 20]))
8578318
(array(['t1_tse_cor', 't1_tse_sag', 't2_tse_fs_cor', 't2_tse_fs_sag'],
      dtype='<U13'), array([20, 20, 20, 20]))
8433426
(array(['t1_tse_cor', 't1_tse_sag', 't2_tse_fs_cor', 't2_tse_fs_sag'],
      dtype='<U13'), array([15, 15, 15, 15]))
7947388
(array(['t1_tse_cor', 't1_tse_sag', 't2_tse_fs_cor'], dtype='<U13'), array([15, 15, 15]))
7677484
(array(['t1_tse_cor', 't1_tse_sag', 't2_tse_fs_cor', 't2_tse_fs_sag'],
      dtype='<U13'), array([20, 20, 20, 20]))
