# Data preparation
---
#### Trying Pydicom

In [2]:
%matplotlib inline
import numpy as np
import os
import matplotlib as mpl
import matplotlib.pyplot as plt
import pydicom as dicom
from pydicom.data import get_testdata_files

In [6]:
dataset = 'dataset2/'
foldernames = os.listdir(dataset)
foldernames.remove('.DS_Store')


In [7]:
def getDicomArray(refDs, lstFiles):
    ConstPixelDims = (len(lstFiles), int(refDs.Rows), int(refDs.Columns))
    ConstPixelSpacing = (float(refDs.PixelSpacing[0]), float(refDs.PixelSpacing[1]), float(refDs.SliceThickness))
    PixelDict = {}
    ArrayDicom = np.zeros(ConstPixelDims, dtype=refDs.pixel_array.dtype)
    # loop through all the DICOM files
    for filenameDCM in lstFiles:
        # read the file
        ds = dicom.read_file(filenameDCM)
        key = str(ds.pixel_array.shape)
        if (key in PixelDict):
            PixelDict[key] = np.vstack((PixelDict[key], [ds.pixel_array]))
        else:
            PixelDict[key] = np.asarray([ds.pixel_array])
    return PixelDict

def extractFeatures(PathDicom):
    print("Extracting:",PathDicom)
    lstCTFilesDCM = []
    lstMRFilesDCM = []
    lstOtherFilesDCM = []
    metadata = {}
    metaExtracted = False
    for dirName, subdirList, fileList in os.walk(PathDicom):
        for filename in fileList:
            if ".dcm" in filename.lower() and "ct" in filename.lower()[:2]:  # check whether the file's CT
                lstCTFilesDCM.append(os.path.join(dirName,filename))
                if not metaExtracted:
                    metadata = extractMetaData(os.path.join(dirName, filename))
                    metaExtracted = True
            elif ".dcm" in filename.lower() and "mr" in filename.lower()[:2]:  # check whether the file's MR
                lstMRFilesDCM.append(os.path.join(dirName, filename))
                if not metaExtracted:
                    metadata = extractMetaData(os.path.join(dirName, filename))
                    metaExtracted = True
            else:
                lstOtherFilesDCM.append(os.path.join(dirName, filename))
    
    if len(lstCTFilesDCM) > 0:
        refDsCT = dicom.read_file(lstCTFilesDCM[0])
        ArrayCT = getDicomArray(refDsCT, lstCTFilesDCM)
    else:
        ArrayCT = {}
    if len(lstMRFilesDCM) > 0:
        refDsMR = dicom.read_file(lstMRFilesDCM[0])
        ArrayMR = getDicomArray(refDsMR, lstMRFilesDCM)
    else:
        ArrayMR = {}
    return ArrayCT, ArrayMR, metadata

def extractMetaData(filenameDCM):
    extract = ['PatientBirthDate', 'PatientID', 'PatientName', 'ImageComments', 'PatientSex', 'StudyDate', 'InstitutionName', 'ReferringPhysicianName']
    ds = dicom.read_file(filenameDCM)
    temp = {}
    for item in extract:
        temp[item] = getattr(ds, item, 'NA')
    return temp

users = []
for i,user in enumerate(foldernames):
    path = "./" + dataset + user + "/"
    temp = {}
    ret = extractFeatures(path)
    temp['ct'] = ret[0]
    temp['mr'] = ret[1]
    temp['meta'] = ret[2]
    users.append(temp)



Extracting: ./dataset2/339663/
Extracting: ./dataset2/345923/
Extracting: ./dataset2/346231/
Extracting: ./dataset2/351423/
Extracting: ./dataset2/353891/
Extracting: ./dataset2/354152/
Extracting: ./dataset2/361113/
Extracting: ./dataset2/362946/
Extracting: ./dataset2/363578/
Extracting: ./dataset2/363740/
Extracting: ./dataset2/386348/
Extracting: ./dataset2/386661/
Extracting: ./dataset2/386662/
Extracting: ./dataset2/386800/
Extracting: ./dataset2/386801/


## All the users in variable - users

In [8]:
len(users)

15

### Writing visualization code below

In [None]:
for i in range(50,77):
    f = plt.figure()
    ax = f.gca()
    ax.imshow(ArrayCT[i])
    f.canvas.draw()
    break