In [1]:
import os
from pathlib import Path
import pydicom
import nibabel as nib
import dicom2nifti
import numpy as np
from matplotlib import pyplot as plt
import importlib
import pickle
import SimpleITK as sitk
from radiomics import featureextractor as extractor
import pandas as pd
import dicom2nifti.settings as settings
settings.disable_validate_slice_increment()
from PIL import Image


import radipop_utils
import radipop_utils.conversion_utils_mw_with_reporting_function as cu
import radipop_utils.features
from radipop_utils.features import convert_and_extract, convert_and_extract2


# extract path of repo. Needed for to find ./yaml/exampleCT.yaml with settings for the radiomics feature extractor 
path = Path(os.path.abspath(radipop_utils.__file__))
RADIPOP_PACKAGE_ROOT = path.parent.parent

In [9]:
os.chdir("E:/radipop/")

In [10]:
matching = pd.read_excel("matching_3.xlsx")

In [11]:
def convert_and_extract(dcm, mask):
##define directories
    idx = mask
    print(idx)

    dicom_dir = os.path.join("clean_DICOM", dcm)
    first_conversion_dir = os.path.join("output", idx)
    output_dir = os.path.join("output", idx, "output_png")
    png_dir = os.path.join(output_dir, "png")
    
    output_liver_cond = os.path.isfile(os.path.join(output_dir, "Features_liver.xlsx"))
    output_spleen_cond = os.path.isfile(os.path.join(output_dir, "Features_spleen.xlsx"))
    
    if output_liver_cond and  output_spleen_cond: 
        print("Radiomics features already extracted!")
        return True

    if not os.path.isdir(output_dir):
        os.makedirs(output_dir)

    if not os.path.isdir(png_dir):
        os.makedirs(png_dir)

    #read the input image
    files = [pydicom.dcmread(os.path.join(dicom_dir, f), force = True) for f in os.listdir(dicom_dir)]
    files = [x for x in files if "ImagePositionPatient" in dir(x)]
    patient_position = files[0].PatientPosition
    reverse = patient_position == "FFS"
    files.sort(key=lambda x: float(x.ImagePositionPatient[2]), reverse=reverse)

    #loop over files
    for i, file in enumerate(files):
        ext = i
        orig = cu.extract_pixels_for_viewing(file)
        aim = Image.fromarray(orig, mode='L')
        aim.save(os.path.join(png_dir, str(ext)+".png"), format='PNG')


    #read the images
    images = np.array([x for x in os.listdir(png_dir) if x.endswith(".png")])
    indices = np.array([x.strip(".png") for x in images], dtype = int)
    images = images[np.argsort(indices)]
    images = np.array([plt.imread(os.path.join(png_dir, x)) for x in images])
    images = images[::-1,::-1,:]

    #convert to image object
    img = sitk.GetImageFromArray(images)

    #get the filepath for the nifti that was produced from directly converting dicom to nifti via dicom2nifti 
    fp_nifti_image1 = [x for x in os.listdir(first_conversion_dir) if x.endswith("nii.gz") and not x.startswith("mask")][0]
    img.CopyInformation(sitk.ReadImage(os.path.join(first_conversion_dir, fp_nifti_image1)))
    sitk.WriteImage(img, os.path.join(output_dir, "image_from_png.nii.gz"))

    #radiomics feature extraction
    mask_liver = sitk.ReadImage(os.path.join(first_conversion_dir, "mask_liver.nii.gz"))
    mask_spleen = sitk.ReadImage(os.path.join(first_conversion_dir, "mask_spleen.nii.gz"))

    print("Extracting liver features.")
    fe = extractor.RadiomicsFeatureExtractor(str(RADIPOP_PACKAGE_ROOT / "yaml" / "exampleCT.yaml"))
    features_liver = fe.execute(img, mask_liver)

    features_liver_df = {}
    for key in features_liver.keys():
        if not key.startswith("diagnostics_"):
            features_liver_df[key] = np.float64(features_liver[key])

    features_liver_df = pd.DataFrame(features_liver_df, index = [0])
    features_liver_df.to_excel(os.path.join(output_dir, "Features_liver.xlsx"))


    #extract spleen features
    print("Extracting spleen features.")
    features_spleen = fe.execute(img, mask_spleen)
    features_spleen_df = {}
    for key in features_spleen.keys():
        if not key.startswith("diagnostics_"):
            features_spleen_df[key] = np.float64(features_spleen[key])

    features_spleen_df = pd.DataFrame(features_spleen_df, index = [0])
    features_spleen_df.to_excel(os.path.join(output_dir, "Features_spleen.xlsx"))
    print("Feature extraction {} done!".format(output_dir))

In [58]:
missed = {}

for dcm, mask in zip(matching.Dicom, matching.Mask):
    
    try:
        convert_and_extract(dcm, mask)
    except Exception as e:
        print("Extraction failed:", mask)
        print(str(e))
        missed[mask] = e

V 280
Radiomics features already extracted!
V 260
Radiomics features already extracted!
V 250
Radiomics features already extracted!
V 245
Radiomics features already extracted!
V 237
Radiomics features already extracted!
V 234
Radiomics features already extracted!
V 216
Radiomics features already extracted!
V 188
Radiomics features already extracted!
V 172
Radiomics features already extracted!
V 163
Radiomics features already extracted!
V 158
Radiomics features already extracted!
V 141
Radiomics features already extracted!
V 131
Radiomics features already extracted!
V 129
Radiomics features already extracted!
V 114
Radiomics features already extracted!
V 112
Radiomics features already extracted!
V 109
Radiomics features already extracted!
V 98
Radiomics features already extracted!
V 75
Radiomics features already extracted!
V 73
Radiomics features already extracted!
V 288
Radiomics features already extracted!
V 292
Radiomics features already extracted!
V 297
Radiomics features already ex

In [9]:
missed_ids = missed.keys()
missed_df = pd.DataFrame({"ID": missed_ids, "Message" : [missed[x] for x in missed_ids]})
missed_df.to_excel("Missed_IDs_matching_3.xlsx")

In [12]:
pd.DataFrame({"missedID" : missed}).to_excel("Missed_IDs_external_validationcohort.xlsx")

In [55]:
mask = "V 10"
dcm = "V.10 Horos"
flipped_z_axis = False
flip_mask = True
cut_indices = "597_818"
liver_label = 1
spleen_label = 2
#cut_indices = [int(x) for x in cut_indices.split("_")]


In [5]:
def convert_and_extract2(dcm, mask, cut_indices, flipped_z_axis = False, flip_mask = True, liver_label = 1, spleen_label = 2):
    ##rerun for dicoms with different dicoms in folder

    #get the cut indices
    cut_indices = [int(x) for x in cut_indices.split("_")]

    ##define directories
    idx = mask
    print(idx)

    #input dirs
    dicom_dir = os.path.join("E://radipop/clean_DICOM", dcm)
    mask_dir = os.path.join("E://radipop/MASKS/", mask)

    #output dir for cleaned dcm and masks
    output_dicom = os.path.join("E://radipop/cut_DICOM", mask)
    output_mask = os.path.join("E://radipop/output/", mask)

    #output dirs for png conversion
    output_dir = os.path.join("E://radipop/output/", idx, "output_png")
    png_dir = os.path.join(output_dir, "png_cut")

    output_liver_cond = os.path.isfile(os.path.join(output_dir, "Features_liver.xlsx"))
    output_spleen_cond = os.path.isfile(os.path.join(output_dir, "Features_spleen.xlsx"))

    #disable conditionals to enable rerun
    #if output_liver_cond and  output_spleen_cond: 
    #    print("Radiomics features already extracted!")
    #    return True

    if not os.path.isdir(output_dicom):
        os.makedirs(output_dicom)

    if not os.path.isdir(output_mask):
        os.makedirs(output_mask)    

    if not os.path.isdir(output_dir):
        os.makedirs(output_dir)

    if not os.path.isdir(png_dir):
        os.makedirs(png_dir)

    #read the input image
    files = [pydicom.dcmread(os.path.join(dicom_dir, f), force = True) for f in os.listdir(dicom_dir)]
    files = [x for x in files if "ImagePositionPatient" in dir(x)]
    patient_position = files[0].PatientPosition
    reverse = patient_position == "FFS"
    files.sort(key=lambda x: float(x.InstanceNumber), reverse=reverse)

    #cut and write the files
    files_cut = files[(cut_indices[0]-1):(cut_indices[1])]

    for i, j in enumerate(files_cut):
        pydicom.dcmwrite(os.path.join(output_dicom, str(i) + ".dcm"), j)

    #reread the dicom image
    files = [pydicom.dcmread(os.path.join(output_dicom, f), force = True) for f in os.listdir(output_dicom)]
    files = [x for x in files if "ImagePositionPatient" in dir(x)]
    patient_position = files[0].PatientPosition
    reverse = patient_position == "FFS"
    files.sort(key=lambda x: float(x.ImagePositionPatient[2]), reverse=reverse)

    #loop over and convert to png
    for i, file in enumerate(files):
        ext = i
        orig = cu.extract_pixels_for_viewing(file)
        aim = Image.fromarray(orig, mode='L')
        aim.save(os.path.join(png_dir, str(ext)+".png"), format='PNG')

    #read the images
    images = np.array([x for x in os.listdir(png_dir) if x.endswith(".png")])
    indices = np.array([x.strip(".png") for x in images], dtype = int)
    images = images[np.argsort(indices)]
    images = np.array([plt.imread(os.path.join(png_dir, x)) for x in images])
    images = images[::-1,::-1,:]

    if flipped_z_axis:
        images = images[::-1,:,:]

    #convert to image object
    img = sitk.GetImageFromArray(images)

    #convert the dicom using dicom2nifti
    dicom2nifti.convert_directory(output_dicom, output_mask)

    #get the filepath for the nifti that was produced from directly converting dicom to nifti via dicom2nifti 
    fp_nifti_image1 = [x for x in os.listdir(output_mask) if x.endswith("nii.gz") and not x.startswith("mask")][0]

    #copy the information to the image
    img.CopyInformation(sitk.ReadImage(os.path.join(output_mask, fp_nifti_image1)))
    sitk.WriteImage(img, os.path.join(output_dir, "image_from_png.nii.gz"))

    #read the mask
    mask_files = [x for x in os.listdir(mask_dir) if x.endswith(".p") and not x.startswith("._")]

    file_position = [int(x.strip(".p")) for x in mask_files]
    #get masks aligned
    mask_files = np.array(mask_files)[np.argsort(file_position)]

    masks = [pickle.load(open(os.path.join(mask_dir, file), "rb")) for file in mask_files]

    if flip_mask:
        masks = masks[::-1]

    mask = np.stack(masks, axis = 0)
    print(mask.dtype)

    assert mask.shape[1:] == (512,512)

    mask_coords = [0, 0, 512, 512]

    mask_new = np.zeros((mask.shape[0], 512, 512))
    for zi in range(mask.shape[0]):
        mask_new[zi,mask_coords[0]:mask_coords[2], mask_coords[1]:mask_coords[3]] = mask[zi,:,:]
        mask_new[zi, :, :] = mask_new[zi, ::-1, :]

    #define orientation in z axis 
    #disable due to jens tool
    #testfile = os.listdir(dcm_dir)[0]
    #testfile = pydicom.dcmread(os.path.join(dcm_dir, testfile))
    #print("Patient orientation", testfile.PatientPosition)
    #if testfile.PatientPosition == "FFS":
    #    mask_new = mask_new[::-1, :, :]

    mask = mask_new
    print(mask.dtype)

    #cut the mask
    mask_cut = mask[(cut_indices[0]-1):(cut_indices[1])]


    #create masks for spleen and liver
    mask_liver = mask_cut.copy() 
    mask_liver[np.where(mask_liver != liver_label)] = 0
    mask_liver[np.where(mask_liver == liver_label)] = 1

    mask_spleen = mask_cut.copy() 
    mask_spleen[np.where(mask_spleen != spleen_label)] = 0
    mask_spleen[np.where(mask_spleen == spleen_label)] = 1

    #get mask for liver as nifti
    mask_liver = sitk.GetImageFromArray(mask_liver)
    mask_liver.CopyInformation(img)
    sitk.WriteImage(mask_liver, os.path.join(output_mask, "mask_liver.nii.gz"))

    #get mask for spleen as nifti
    mask_spleen = sitk.GetImageFromArray(mask_spleen)
    mask_spleen.CopyInformation(img)
    sitk.WriteImage(mask_spleen, os.path.join(output_mask, "mask_spleen.nii.gz"))

    #radiomics feature extraction
    #mask_liver = sitk.ReadImage(os.path.join(first_conversion_dir, "mask_liver.nii.gz"))
    #mask_spleen = sitk.ReadImage(os.path.join(first_conversion_dir, "mask_spleen.nii.gz"))

    print("Extracting liver features.")
    fe = extractor.RadiomicsFeatureExtractor(str(RADIPOP_PACKAGE_ROOT / "yaml" / "exampleCT.yaml"))
    features_liver = fe.execute(img, mask_liver)

    features_liver_df = {}
    for key in features_liver.keys():
        if not key.startswith("diagnostics_"):
            features_liver_df[key] = np.float64(features_liver[key])

    features_liver_df = pd.DataFrame(features_liver_df, index = [0])
    features_liver_df.to_excel(os.path.join(output_dir, "Features_liver.xlsx"))


    #extract spleen features
    print("Extracting spleen features.")
    features_spleen = fe.execute(img, mask_spleen)
    features_spleen_df = {}
    for key in features_spleen.keys():
        if not key.startswith("diagnostics_"):
            features_spleen_df[key] = np.float64(features_spleen[key])

    features_spleen_df = pd.DataFrame(features_spleen_df, index = [0])
    features_spleen_df.to_excel(os.path.join(output_dir, "Features_spleen.xlsx"))
    print("Feature extraction {} done!".format(output_dir))

In [12]:
matching_rerun = matching.loc[~pd.isna(matching.mask_position), :]

In [13]:
matching_rerun

Unnamed: 0.1,Unnamed: 0,Dicom,Mask,comment,mask_position
8,560,V.172 horos,V 172,incorrect,364_725
16,569,V.109 horos,V 109,incorrect,493_979
18,571,V.75 horos,V 75,incorrect,104_636
19,572,V.73 horos,V 73,incorrect,386_758
30,585,V.416 horos,V 416,flipped z axis,1_514
43,598,V.38 Horos,V 38,incorrect,696_981
44,599,V.11 horos,V 11,incorrect,104_626
45,600,V.10 Horos,V 10,incorrect,597_818


In [14]:
missed = {}

for dcm, mask, slices in zip(matching_rerun.Dicom, matching_rerun.Mask, matching_rerun.mask_position):
    
    try:
        convert_and_extract2(dcm, mask, slices)
    except Exception as e:
        print("Extraction failed:", mask)
        print(str(e))
        missed[mask] = e

V 172




int64
float64
Extracting liver features.
Extracting spleen features.
Feature extraction E://radipop/output/V 172\output_png done!
V 109




int64
float64
Extracting liver features.
Extracting spleen features.
Feature extraction E://radipop/output/V 109\output_png done!
V 75




int64
float64
Extracting liver features.
Extracting spleen features.
Feature extraction E://radipop/output/V 75\output_png done!
V 73




int64
float64
Extracting liver features.
Extracting spleen features.
Feature extraction E://radipop/output/V 73\output_png done!
V 416




int64
float64
Extracting liver features.
Extracting spleen features.
Feature extraction E://radipop/output/V 416\output_png done!
V 38




int64
float64
Extracting liver features.
Extracting spleen features.
Feature extraction E://radipop/output/V 38\output_png done!
V 11




int64
float64
Extracting liver features.
Extracting spleen features.
Feature extraction E://radipop/output/V 11\output_png done!
V 10




int64
float64
Extracting liver features.
Extracting spleen features.
Feature extraction E://radipop/output/V 10\output_png done!
