In [1]:
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
import SimpleITK as sitk
from sklearn.naive_bayes import GaussianNB
from sklearn import svm
from sklearn import tree
from sklearn.ensemble import RandomForestClassifier
from sklearn import neighbors
from IPython.display import clear_output

In [2]:
def read_kImage(path):
    # Reads the image using SimpleITK
    itkimage = sitk.ReadImage(path)
    # Convert the image to a  numpy array first and then shuffle the dimensions to get axis in the order z,y,x
    ct_scan = sitk.GetArrayFromImage(itkimage)
    return ct_scan

In [3]:
def extract_voxel_data(series_path):
    reader = sitk.ImageSeriesReader()
    dicom_names = reader.GetGDCMSeriesFileNames( series_path )
    reader.SetFileNames(dicom_names)
    image = reader.Execute()
    image_3d = sitk.GetArrayFromImage(image)
    return image_3d

In [4]:
def normalize_image(image_3D):
    z,y,x = image_3D.shape
    new_image_3D = np.empty(shape = (z,y,x) )
    target_min = -1
    target_max = 1
    for i in range (z):
        max_value = np.max (image_3D[i])
        min_value = np.min (image_3D[i])
        new_image_3D[i] = (image_3D[i] - min_value)*(float(target_max - target_min)/(max_value - min_value)) + target_min
    return new_image_3D

In [5]:
def subvoxel (img_3d,ijk, window):
    ijk = ijk.split()
    x = int(ijk[0])
    y = int(ijk[1])
    z = int(ijk[2])
    if (z < 0):
        z = z*(-1)
    new_img3d = np.empty(shape = (3,img_3d.shape[1], img_3d.shape[2]))
    max_bandwith = img_3d.shape[0] - 1
    if ( z == max_bandwith ):
        new_img3d[0,:,:] = img_3d[z-1,:,:] #n-1
        new_img3d[1,:,:] = img_3d[z,:,:] #n
        new_img3d[2,:,:] = img_3d[z,:,:] #n
    else:
        if (z == 0):
            new_img3d[0,:,:] = img_3d[z,:,:] #0
            new_img3d[1,:,:] = img_3d[z,:,:] #0
            new_img3d[2,:,:] = img_3d[z+1,:,:] #1
        else:
            new_img3d[0,:,:] = img_3d[z-1,:,:] #n-1
            new_img3d[1,:,:] = img_3d[z,:,:] #n
            new_img3d[2,:,:] = img_3d[z+1,:,:] #n+1
    window = window//2
    new_img3d = new_img3d[:, y-window : y+window, x-window : x+window]
    return new_img3d

In [6]:
def generate_training_dataframes():
    basedir = "../1/train/ProstateX-TrainingLesionInformationv2/ProstateX-TrainingLesionInformationv2"
    findingsfile = "{}/{}".format(basedir,"ProstateX-Findings-Train.csv")
    imagesfile = "{}/{}".format(basedir,"ProstateX-Images-Train.csv")
    kimagesfile = "{}/{}".format(basedir,"ProstateX-Images-KTrans-Train.csv")
    #Generamos las 3 tablas básicas a partir de los archivos.cvs
    findings_table = pd.read_csv(findingsfile)
    images_table = pd.read_csv(imagesfile)
    kimages_table = pd.read_csv(kimagesfile)
    #Operación de inner join entre tablas.
    join_kimages_table = pd.merge(left=findings_table, right= kimages_table, how ="inner", on=["ProxID","fid"])
    join_images_table =  pd.merge(left=findings_table, right=  images_table, how ="inner", on=["ProxID","fid"])
    #agregamos una columna para insertar la imagen
    #agregamos una columna para los parches
    #agregamos otra columna para la integridad de los datos
    #agregamos otra columna para las predicciones
    join_images_table["MRI3D"] = pd.Series(np.zeros((len(join_images_table),2,2,2)).tolist(), index= join_images_table.index)
    join_images_table["MRIPatch3D"] = pd.Series(np.zeros((len(join_images_table),2,2,2)).tolist(), index= join_images_table.index)
    join_images_table["State"] = pd.Series(False, index= join_images_table.index)
    join_images_table["InterestSequence"] = pd.Series(False, index= join_images_table.index)
    join_images_table["Predictions"] = pd.Series(0, index= join_images_table.index)
    join_kimages_table["Kimg3D"] = pd.Series(np.zeros((len(join_kimages_table),2,2,2)).tolist(), index= join_kimages_table.index)
    join_kimages_table["KimgPatch3D"] = pd.Series(np.zeros((len(join_kimages_table),2,2,2)).tolist(), index= join_kimages_table.index)
    join_kimages_table["State"] = pd.Series(False, index= join_kimages_table.index)
    join_kimages_table["Predictions"] = pd.Series(0, index= join_kimages_table.index)
    return join_images_table, join_kimages_table

In [7]:
def generate_testing_dataframes():
    basedir = "../1/test/ProstateX-TestLesionInformation/ProstateX-TestLesionInformation"
    findingsfile = "{}/{}".format(basedir,"ProstateX-Findings-Test.csv")
    imagesfile = "{}/{}".format(basedir,"ProstateX-Images-Test.csv")
    kimagesfile = "{}/{}".format(basedir,"ProstateX-Images-KTrans-Test.csv")
    #Generamos las 3 tablas básicas a partir de los archivos.cvs
    findings_table = pd.read_csv(findingsfile)
    images_table = pd.read_csv(imagesfile)
    kimages_table = pd.read_csv(kimagesfile)
    #Operación de inner join entre tablas.
    join_kimages_table = pd.merge(left=findings_table, right= kimages_table, how ="inner", on=["ProxID","fid"])
    join_images_table =  pd.merge(left=findings_table, right=  images_table, how ="inner", on=["ProxID","fid"])
    #agregamos una columna para insertar la imagen
    join_images_table["MRI3D"] = pd.Series(np.zeros((len(join_images_table),2,2,2)).tolist(), index= join_images_table.index)
    join_images_table["MRIPatch3D"] = pd.Series(np.zeros((len(join_images_table),2,2,2)).tolist(), index= join_images_table.index)
    join_images_table["State"] = pd.Series(False, index= join_images_table.index)
    join_images_table["InterestSequence"] = pd.Series(False, index= join_images_table.index)
    join_kimages_table["Kimg3D"] = pd.Series(np.zeros((len(join_kimages_table),2,2,2)).tolist(), index= join_kimages_table.index)
    join_kimages_table["KimgPatch3D"] = pd.Series(np.zeros((len(join_kimages_table),2,2,2)).tolist(), index= join_kimages_table.index)
    join_kimages_table["State"] = pd.Series(False, index= join_kimages_table.index)
    return join_images_table, join_kimages_table

In [8]:
def isValid_data(img_3d, ijk):
    state = False
    k_string = ijk.split()[2]
    max_bandwith = img_3d.shape[0] -1
    possible_k = int (k_string)
    if (possible_k >= max_bandwith):
        possible_k = max_bandwith
        slice_index = possible_k
        state = True
        print("The ijk requested is [{}], there was an error since the shapes of the image is {}".format(ijk,str(img_3d.shape)))
    else:
        if possible_k >= 0:
            slice_index = possible_k
            print("The ijk requested is [{}],OK since shapes of the image is {}".format(ijk,str(img_3d.shape)))
        else:
            state = False
            print("The ijk requested is [{}],OK since shapes of the image is {}".format(ijk,str(img_3d.shape)))
    return state

In [9]:
def set_MRI_value(dataFrame,index,image3d,state,window):
    row = dataFrame.iloc[index]
    dataFrame.at[index,"MRI3D"] = image3d
    dataFrame.at[index,"State"] = state
    if state == False:
        dataFrame.at[index,"MRIPatch3D"] = subvoxel(img_3d=image3d, ijk=row.ijk, window=window)
    else:
        dataFrame.at[index,"MRIPatch3D"] = np.zeros(shape= (window,image3d.shape[0],image3d.shape[1]))
    

In [10]:
def set_kImg_value(dataFrame,index,image3d,state, window):
    row = dataFrame.iloc[index]
    dataFrame.at[index,"Kimg3D"] = image3d
    dataFrame.at[index,"State"] = state
    if state == False:
        dataFrame.at[index,"KimgPatch3D"] = subvoxel(img_3d=image3d, ijk=row.ijk, window=window)
    else:
        dataFrame.at[index,"KimgPatch3D"] = np.empty(shape= image3d.shape)
    

In [11]:
def fill_ktrans_images_dataframe_at_training(dataFrame, window):
    if window == None:
        window = 40
    for row in range(len(dataFrame)):
        path = "../1/train/ProstateXKtrains-train-fixed"
        patient_id = dataFrame.ProxID.iloc[row]
        ijk = dataFrame.ijk.iloc[row]
        path = "{}/{}".format(path,patient_id)
        path = "{}/{}-Ktrans.mhd".format(path,patient_id)
        kimage_3d = read_kImage(path)
        kimage_3d = normalize_image(kimage_3d)
        state =  isValid_data(kimage_3d,ijk)
        set_kImg_value(dataFrame,row,kimage_3d,state,window)
        print("Se ha cargado la k-trans imagen3d #{} para el paciente {} ".format(row,patient_id))
        del path,patient_id,kimage_3d,state


In [12]:
def fill_ktrans_images_dataframe_at_testing(dataFrame, window):
    for row in range(len(dataFrame)):
        path = "../1/test/ProstateXKtrans-test-fixedv2/ProstateXKtrans-test-fixedv2"
        patient_id = dataFrame.ProxID.iloc[row]
        ijk = dataFrame.ijk.iloc[row]
        path = "{}/{}".format(path,patient_id)
        path = "{}/{}-Ktrans.mhd".format(path,patient_id)
        kimage_3d = read_kImage(path)
        kimage_3d = normalize_image(kimage_3d)
        state =  isValid_data(kimage_3d,ijk)
        set_kImg_value(dataFrame,row,kimage_3d,state, window)
        print("Se ha cargado la k-trans imagen3d #{} para el paciente {} ".format(row,patient_id))
        del path,patient_id,kimage_3d,state


In [13]:
training_MRI_imgs_dataframe, training_ktrans_imgs_dataframe = generate_training_dataframes()
#testing_MRI_imgs_dataframe, testing_ktrans_imgs_dataframe = generate_testing_dataframes()

In [14]:
window = 40
fill_ktrans_images_dataframe_at_training(dataFrame = training_ktrans_imgs_dataframe , window =window)
#fill_ktrans_images_dataframe_at_testing(dataFrame = testing_ktrans_imgs_dataframe, window = window)
clear_output()
print ("All the Ktrans training images were read correctly")
print ("All the Ktrans testing images were read correctly")
training_ktrans_imgs_dataframe = training_ktrans_imgs_dataframe[ training_ktrans_imgs_dataframe.State == False]
training_ktrans_imgs_dataframe.reset_index(drop=True)
training_ktrans_imgs_dataframe.index = pd.RangeIndex(len(training_ktrans_imgs_dataframe))
display(training_ktrans_imgs_dataframe[["ProxID", "fid", "ijk", "ClinSig"]].iloc[35:60])

All the Ktrans training images were read correctly
All the Ktrans testing images were read correctly


Unnamed: 0,ProxID,fid,ijk,ClinSig
35,ProstateX-0025,1,67 88 -7,False
36,ProstateX-0025,1,75 79 7,False
37,ProstateX-0025,1,81 77 7,False
38,ProstateX-0025,1,67 88 -7,False
39,ProstateX-0025,1,75 79 7,False
40,ProstateX-0025,1,81 77 7,False
41,ProstateX-0025,2,57 89 -7,False
42,ProstateX-0025,2,71 79 7,False
43,ProstateX-0025,3,46 80 -7,False
44,ProstateX-0025,3,60 70 8,False


In [15]:
def fill_MRI_images_dataframe_at_training(dataFrame, window):
    for row in range (len(dataFrame)):
        print("===============================================================")
        patient_id = dataFrame.ProxID.iloc[row]
        series_name = dataFrame.DCMSerDescr.iloc[row]
        series_id = dataFrame.DCMSerNum.iloc[row]
        slice_ijk = dataFrame.ijk.iloc[row]
        sequence_name = dataFrame.Name.iloc[row]
        path = "../1/train/PROSTATEx/{}/".format(patient_id)#Selects the Patient ID
        path = "{}/{}".format(path,os.listdir(path)[0])#Selects the default folder inside the patient
        sequences = os.listdir(path) # Selects all the sequences
        #sequence = [ sequence for sequence in sequences if sequence[0:len(str(series_id))] == str(series_id) ] [0]
        for sequence in sequences:
            sequence_id = sequence[0:len(str(series_id))]
            if (str(sequence_id) == str(series_id)):
                print ("The condition is true {} = {}".format(sequence_id,series_id))
                print ("The sequence selected is {}".format(sequence))
                print ("The requested sequence is {}".format(sequence_name))
                break
        # Selects the only sequence that matches the condition.
        path = "{}/{}".format(path,sequence)
        image_3d = extract_voxel_data(path)
        state = isValid_data(ijk=slice_ijk, img_3d=image_3d)
        set_MRI_value(dataFrame,row,image_3d,state,40)
        print("Se ha cargado la imagen MRI3D y MRI3DPATCH #{} para el paciente {} serie: {} ".format(row,patient_id,series_name))
        del path, patient_id, series_name, series_id,sequence,sequences, image_3d

In [16]:
def fill_MRI_images_dataframe_at_testing(dataFrame, window):
    for row in range (len(dataFrame)):
        print("===============================================================")
        patient_id = dataFrame.ProxID.iloc[row]
        series_name = dataFrame.DCMSerDescr.iloc[row]
        sequence_name = dataFrame.Name.iloc[row]
        series_id = dataFrame.DCMSerNum.iloc[row]
        slice_ijk = dataFrame.ijk.iloc[row]
        path = "../1/test/PROSTATEx/{}".format(patient_id)#Selects the Patient ID
        path = "{}/{}".format(path,os.listdir(path)[0])#Selects the default folder inside the patient
        sequences = os.listdir(path) # Selects all the sequences
        for sequence in sequences:
            sequence_id = sequence[0:len(str(series_id))]
            if (str(sequence_id) == str(series_id)):
                print ("The condition is true {} = {}".format(sequence_id,series_id))
                print ("The sequence selected is {}".format(sequence))
                print ("The requested sequence is {}".format(sequence_name))
                break
        # Selects the only sequence that matches the condition.
        path = "{}/{}".format(path,sequence)
        image_3d = extract_voxel_data(path)
        state = isValid_data(ijk=slice_ijk, img_3d=image_3d)
        set_MRI_value(dataFrame,row,image_3d,state,40)
        print("Se ha cargado la imagen MRI3D y MRI3DPATCH #{} para el paciente {} serie: {} ".format(row,patient_id,series_name))
        del path, patient_id, series_name, series_id,sequence,sequences, image_3d

In [17]:
fill_MRI_images_dataframe_at_training(dataFrame = training_MRI_imgs_dataframe, window = window)
#fill_MRI_images_dataframe_at_testing(dataFrame = testing_MRI_imgs_dataframe, window = window)
clear_output()
print ("All the MRI training images were read correctly")
print ("All the MRI testing images were read correctly")
training_MRI_imgs_dataframe =training_MRI_imgs_dataframe[ training_MRI_imgs_dataframe.State == False]
training_MRI_imgs_dataframe.reset_index(drop=True)
training_MRI_imgs_dataframe.index = pd.RangeIndex(len(training_MRI_imgs_dataframe))
display(training_MRI_imgs_dataframe[["ProxID", "fid", "ijk", "ClinSig"]].iloc[35:60])


All the MRI training images were read correctly
All the MRI testing images were read correctly


Unnamed: 0,ProxID,fid,ijk,ClinSig
35,ProstateX-0002,2,45 70 9,False
36,ProstateX-0002,2,134 137 1,False
37,ProstateX-0002,2,153 151 0,False
38,ProstateX-0002,2,170 186 12,False
39,ProstateX-0002,2,178 169 8,False
40,ProstateX-0002,2,203 216 10,False
41,ProstateX-0002,2,68 72 8,False
42,ProstateX-0003,1,54 68 11,False
43,ProstateX-0003,1,54 68 11,False
44,ProstateX-0003,1,54 68 11,False


In [18]:
def setup_MRI_series_descriptions():
    for i in range (len(training_MRI_imgs_dataframe)):
        DCM_description = str.lower( training_MRI_imgs_dataframe.DCMSerDescr.iloc[i] )
        DCM_description = DCM_description.replace(" ","_")
        DCM_description = DCM_description.replace("-","_")
        training_MRI_imgs_dataframe.at[i,"DCMSerDescr"] = DCM_description
    print(training_MRI_imgs_dataframe.DCMSerDescr.unique())
setup_MRI_series_descriptions()

['ep2d_diff_tra_dyndist_adc' 'ep2d_diff_tra_dyndist'
 'ep2d_diff_tra_dyndistcalc_bval' 't2_tse_cor' 't2_tse_sag' 't2_tse_tra'
 'tfl_3d_pd_ref_tra_1.5x1.5_t3' 'ep2d_diff_tra_dyndist_mix_adc'
 'ep2d_diff_tra_dyndist_mix' 'ep2d_diff_tra_dyndist_mixcalc_bval'
 't2_localizer' 't2_loc_sag' 't2_tse_tra_exacte_copy_diffusie'
 't2_loc_tra' 'adc_s3_1' 'adc_s3_2'
 'ep2d_diff_tra2x2_noise0_fs_dyndist_adc'
 'ep2d_diff_tra2x2_noise0_fs_dyndist'
 'ep2d_diff_tra2x2_noise0_fs_dyndistcalc_bval'
 'ep2d_diff_tra2x2_noise0_nofs_dyndist_adc'
 'ep2d_diff_tra2x2_noise0_nofs_dyndist'
 'ep2d_diff_tra2x2_noise0_nofs_dyndistcalc_bval'
 'perfusie_t1_twist_1.3x1.3x3_temp_2s_tt=10.2s'
 'perfusie_t1_twist_1.3x1.3x3_temp_2s_tt=100.5s'
 'perfusie_t1_twist_1.3x1.3x3_temp_2s_tt=101.7s'
 'perfusie_t1_twist_1.3x1.3x3_temp_2s_tt=103.0s'
 'perfusie_t1_twist_1.3x1.3x3_temp_2s_tt=104.2s'
 'perfusie_t1_twist_1.3x1.3x3_temp_2s_tt=105.5s'
 'perfusie_t1_twist_1.3x1.3x3_temp_2s_tt=106.8s'
 'perfusie_t1_twist_1.3x1.3x3_temp_2s_tt=10

In [19]:

def get_dataframe_given_zone(zone, dataFrame):
    dataFrame = dataFrame[dataFrame.zone == zone]
    return dataFrame

In [20]:
def get_data_labels (dataFrame):
    indexes = dataFrame.index
    data = dataFrame.MRIPatch3D
    labels = dataFrame.ClinSig
    z,y,x = [3,40,40]
    all_data = np.empty(shape= (len(data), z*y*x ) )
    all_labels = np.zeros(shape= (len(data) ) )
    i = 0
    for index in indexes:
        all_data[i,:] = data[index].reshape((1,x*y*z))
        label = labels [index]
        all_labels[i] = label
        i = i + 1
    return all_data, all_labels

In [21]:
def apply_Kfold_cross_validation(data, labels, classifier, kfolds):
    from sklearn.model_selection import cross_val_score
    #data, labels = get_data_labels(dataframe)

    scores = cross_val_score(classifier, data, labels, cv= kfolds)
    print(scores)
    print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))
    return scores, scores.mean()*100, scores.std()*100

In [22]:

def create_training_file(dataframe, FileName):
    FileName = FileName +".txt"
    data, labels = get_data_labels (dataframe)
    number_of_samples, number_of_features = data.shape
    with open(FileName, "w") as text_file:
        str_line = ""
        for i in range (number_of_samples):

            str_line = "{} ".format(str(int(labels[i])))
            for j in range (number_of_features):
                str_line = "{} {}:{}".format(str_line, (j+1) , data[i,j])

            
            text_file.write(str_line)
            if (i != len(data)):
                text_file.write("\n")
            str_line = ""
        text_file.close()
    print("The file {} was succesfully created".format(FileName))

In [23]:
def plot_ROC_curve(classifier, data_test, labels_test):
    from sklearn import metrics
    probs = classifier.predict_proba(data_test)
    preds = probs[:,1]
    fpr, tpr, threshold = metrics.roc_curve(labels_test, preds)
    roc_auc = metrics.auc(fpr, tpr)
    import matplotlib.pyplot as plt
    plt.figure(figsize=(10,10))
    plt.title('Receiver Operating Characteristic')
    plt.plot(fpr, tpr, 'b', label = 'AUC = %0.2f' % roc_auc)
    plt.legend(loc = 'lower right')
    plt.plot([0, 1], [0, 1],'r--')
    plt.xlim([0, 1])
    plt.ylim([0, 1])
    plt.ylabel('True Positive Rate')
    plt.xlabel('False Positive Rate')
    plt.show()

In [24]:
#find substrings
def return_MRI_serie_of_interest(serie_of_interest):
    for i in range (len( training_MRI_imgs_dataframe )):
        training_MRI_imgs_dataframe.at[i,"InterestSequence"] = False
        if (training_MRI_imgs_dataframe.DCMSerDescr.iloc[i].find(serie_of_interest) != -1):
            training_MRI_imgs_dataframe.at[i,"InterestSequence"] = True

    dataframe = training_MRI_imgs_dataframe[ training_MRI_imgs_dataframe.InterestSequence == True ]
    return dataframe

In [25]:
#DWI-ADC
serie_of_interest = "adc"
dyndist_adc_dataframe = return_MRI_serie_of_interest(serie_of_interest)
dyndist_adc_dataframe.reset_index(drop=True)
dyndist_adc_dataframe.index = pd.RangeIndex(len(dyndist_adc_dataframe))

#t2_tse_sag
serie_of_interest = "t2_tse_sag"
t2_tse_sag_dataframe = return_MRI_serie_of_interest(serie_of_interest)
t2_tse_sag_dataframe.reset_index(drop=True)
t2_tse_sag_dataframe.index = pd.RangeIndex(len(t2_tse_sag_dataframe))

#t2_tse_tra
serie_of_interest = "t2_tse_tra"
t2_tse_tra_dataframe = return_MRI_serie_of_interest(serie_of_interest)
t2_tse_tra_dataframe.reset_index(drop=True)
t2_tse_tra_dataframe.index = pd.RangeIndex(len(t2_tse_tra_dataframe))



In [26]:
list_of_k_findings = []
dataframe = training_ktrans_imgs_dataframe
print("There is a total of {} rows".format(len(dataframe)))
i = 0
for patient_id in dataframe.ProxID.unique():
    for fid in dataframe[dataframe.ProxID == patient_id].fid.unique():
        list_of_k_findings.append("{} - {}".format(patient_id,fid))
        i = i + 1
print("There are {} findings".format(i))

There is a total of 339 rows
There are 325 findings


In [27]:
list_of_adc_findings = []
dataframe = dyndist_adc_dataframe
print("There is a total of {} rows".format(len(dataframe)))
i = 0
for patient_id in dataframe.ProxID.unique():
    for fid in dataframe[dataframe.ProxID == patient_id].fid.unique():
        list_of_adc_findings.append("{} - {}".format(patient_id,fid))
        i = i + 1
print("There are {} findings".format(i))

There is a total of 345 rows
There are 323 findings


In [28]:
list_of_t2_tra_findings = []
dataframe = t2_tse_tra_dataframe
print("There is a total of {} rows".format(len(dataframe)))
i = 0
for patient_id in dataframe.ProxID.unique():
    for fid in dataframe[dataframe.ProxID == patient_id].fid.unique():
        list_of_t2_tra_findings.append("{} - {}".format(patient_id,fid))
        i = i + 1
print("There are {} findings".format(i))

There is a total of 415 rows
There are 325 findings


In [29]:
list_of_t2_sag_findings = []
dataframe = t2_tse_sag_dataframe
print("There is a total of {} rows".format(len(dataframe)))
i = 0
for patient_id in dataframe.ProxID.unique():
    for fid in dataframe[dataframe.ProxID == patient_id].fid.unique():
        list_of_t2_sag_findings.append("{} - {}".format(patient_id,fid))
        i = i + 1
print("There are {} findings".format(i))

There is a total of 363 rows
There are 324 findings


In [30]:
list_of_elements_tobe_deleted = []
for finding in list_of_k_findings:
    if (finding not in  list_of_adc_findings):
        if (finding not in list_of_elements_tobe_deleted):
            list_of_elements_tobe_deleted.append(finding)
            
for finding in list_of_k_findings:
    if (finding not in  list_of_t2_sag_findings):
        if (finding not in list_of_elements_tobe_deleted):
            list_of_elements_tobe_deleted.append(finding)
            
for finding in list_of_k_findings:
    if (finding not in  list_of_t2_tra_findings):
        if (finding not in list_of_elements_tobe_deleted):
            list_of_elements_tobe_deleted.append(finding)

In [31]:
list_of_elements_tobe_deleted

['ProstateX-0080 - 1',
 'ProstateX-0140 - 2',
 'ProstateX-0013 - 1',
 'ProstateX-0130 - 1',
 'ProstateX-0172 - 1']

In [32]:
dataframe = training_ktrans_imgs_dataframe
index_to_delete = []
for finding in list_of_elements_tobe_deleted:
    ProxID,fid = finding.split(" - ")
    dataframe2 = dataframe[(dataframe.ProxID == ProxID) & (dataframe.fid == int(fid)) ]
    if (len(dataframe2) != 0):
        index = dataframe2.index[0]
        index_to_delete.append(index)
        print(index)
        dataframe.at[index,"State"] = True

119
223
21
200
282


In [33]:
training_ktrans_imgs_dataframe = training_ktrans_imgs_dataframe.drop(index_to_delete)
training_ktrans_imgs_dataframe.reset_index(drop=True)
training_ktrans_imgs_dataframe.index = pd.RangeIndex(len(training_ktrans_imgs_dataframe))

In [34]:
training_ktrans_imgs_dataframe[training_ktrans_imgs_dataframe.State == True][["ProxID","fid","State"]]

Unnamed: 0,ProxID,fid,State


In [35]:
dataframe = dyndist_adc_dataframe
index_to_delete = []
for finding in list_of_elements_tobe_deleted:
    ProxID,fid = finding.split(" - ")
    dataframe2 = dataframe[(dataframe.ProxID == ProxID) & (dataframe.fid == int(fid)) ]
    if (len(dataframe2) != 0):
        index = dataframe2.index[0]
        print(index)
        index_to_delete.append(index)
        dataframe.at[index,"State"] = True

21
201
282


In [36]:
dyndist_adc_dataframe = dyndist_adc_dataframe.drop(index_to_delete)
dyndist_adc_dataframe.reset_index(drop=True)
dyndist_adc_dataframe.index = pd.RangeIndex(len(dyndist_adc_dataframe))
dyndist_adc_dataframe[dyndist_adc_dataframe.State == True][["ProxID","fid","State"]]

Unnamed: 0,ProxID,fid,State


In [37]:
dataframe = t2_tse_sag_dataframe
index_to_delete = []
for finding in list_of_elements_tobe_deleted:
    ProxID,fid = finding.split(" - ")
    dataframe2 = dataframe[(dataframe.ProxID == ProxID) & (dataframe.fid == int(fid)) ]
    if (len(dataframe2) != 0):
        index = dataframe2.index[0]
        index_to_delete.append(index)
        print(index)
        dataframe.at[index,"State"] = True

128
239


In [38]:
t2_tse_sag_dataframe = t2_tse_sag_dataframe.drop(index_to_delete)
t2_tse_sag_dataframe.reset_index(drop=True)
t2_tse_sag_dataframe.index = pd.RangeIndex(len(t2_tse_sag_dataframe))
t2_tse_sag_dataframe[t2_tse_sag_dataframe.State == True][["ProxID","fid","State"]]

Unnamed: 0,ProxID,fid,State


In [39]:
dataframe = t2_tse_tra_dataframe
index_to_delete = []
for finding in list_of_elements_tobe_deleted:
    ProxID,fid = finding.split(" - ")
    dataframe2 = dataframe[(dataframe.ProxID == ProxID) & (dataframe.fid == int(fid)) ]
    if (len(dataframe2) != 0):
        index = dataframe2.index[0]
        print(index)
        index_to_delete.append(index)
        dataframe.at[index,"State"] = True

136
263
24
239
343


In [40]:
t2_tse_tra_dataframe = t2_tse_tra_dataframe.drop(index_to_delete)
t2_tse_tra_dataframe.reset_index(drop=True)
t2_tse_tra_dataframe.index = pd.RangeIndex(len(t2_tse_tra_dataframe))
t2_tse_tra_dataframe[t2_tse_tra_dataframe.State == True][["ProxID","fid","State"]]

Unnamed: 0,ProxID,fid,State


In [41]:
print(len(training_ktrans_imgs_dataframe))
print(len(t2_tse_sag_dataframe))
print(len(t2_tse_tra_dataframe))
print(len(dyndist_adc_dataframe))

334
361
410
342


In [42]:
dyndist_adc_dataframe.to_pickle("pickle/dyndist_adc_dataframe.pickle")
training_ktrans_imgs_dataframe.to_pickle("pickle/ktrans_dataframe.pickle")
t2_tse_sag_dataframe.to_pickle("pickle/t2_tse_sag_dataframe.pickle")



In [43]:
t2_tse_tra_dataframe.to_pickle("pickle/t2_tse_tra_dataframe.pickle")