In [1]:
from batchgenerators.utilities.file_and_folder_operations import *
import nibabel as nib
import matplotlib.pyplot as plt
import numpy as np
from scipy.ndimage import center_of_mass, binary_dilation
from scipy.spatial import distance
from skimage.measure import label  
import pandas as pd
import os

In [2]:
def main_extract_save(folder):
    dst_sn_m, dst_mp_sn, dst_msn_sn = index_extract(folder)
    if dst_sn_m is not None:
        result_array = np.array((int(folder.split('/')[-1].split('_')[0]), dst_sn_m, dst_mp_sn, dst_msn_sn))

        save_path = folder + '/result.npy'
        np.save(save_path, result_array)
        print(save_path)

In [3]:
def index_extract(folder):
    seg_file = folder+'/segmentation_sinus_r.nii.gz'
    seg = nib.load(seg_file)
    seg_data = seg.get_fdata()

    sinus_data = np.zeros(seg_data.shape)
    sinus_data[seg_data==4]=1
    
    # parenchyma island extract
    kid_data = np.zeros(seg_data.shape)
    kid_data[seg_data==1]=1

    kid_split = label(kid_data)
    assert(len(np.unique(kid_split))==3)

    kid_data_1 = np.zeros(seg_data.shape)
    kid_data_1[kid_split==1]=1
    kid_data_2 = np.zeros(seg_data.shape)
    kid_data_2[kid_split==2]=1

    # sinus island extract
    sinus_split = label(sinus_data)
    assert(len(np.unique(sinus_split))==3)

    sinus_data_1 = np.zeros(sinus_data.shape)
    sinus_data_1[sinus_split==1]=1
    sinus_data_2 = np.zeros(sinus_data.shape)
    sinus_data_2[sinus_split==2]=1

    # mass extract
    mass_data = np.zeros(seg_data.shape)
    mass_data[seg_data==2]=1
    #assert(len(np.unique(mass_data))==2)
    if len(np.unique(mass_data))!=2:
        return None, None, None

    # distance
    kid_data_1_center = center_of_mass(kid_data_1)
    kid_data_2_center = center_of_mass(kid_data_2)
    sinus_data_1_center = center_of_mass(sinus_data_1)
    sinus_data_2_center = center_of_mass(sinus_data_2)
    mass_center = center_of_mass(mass_data)

    dst_kd1_m = distance.euclidean(kid_data_1_center, mass_center)
    dst_kd2_m = distance.euclidean(kid_data_2_center, mass_center)
    dst_sn1_m = distance.euclidean(sinus_data_1_center, mass_center)
    dst_sn2_m = distance.euclidean(sinus_data_2_center, mass_center)

    if dst_kd1_m < dst_kd2_m:
        main_kid_data = kid_data_1.copy()
    else:
        main_kid_data = kid_data_2.copy()

    if dst_sn1_m < dst_sn2_m:
        main_sinus_data = sinus_data_1.copy()
    else:
        main_sinus_data = sinus_data_2.copy()
        
    dst_sn_m, dst_mp_sn, dst_msn_sn = distance_extract(main_sinus_data, main_kid_data, mass_data, seg.affine)
    
    return dst_sn_m, dst_mp_sn, dst_msn_sn

In [4]:
def distance_extract(main_sinus_data, main_kid_data, mass_data, affine):
    # 1. mass_center - sinus_center distance
    sinus_center = center_of_mass(main_sinus_data)
    mass_center = center_of_mass(mass_data)
    dst_sn_m = dist_3d(sinus_center, mass_center, affine)
    
    # 2. mass_parenchyma_surface - sinus_center distance (3D)
    mass_dil_data = binary_dilation(mass_data)
    x,y,z = np.where((mass_dil_data==1) & (main_kid_data==1))
    dst_mp_sn=0
    if len(x)!=0:
        for a,b,c in zip(x,y,z):
            dst_mp_sn += 1/dist_3d(sinus_center, (a,b,c), affine)
            
    # 3. mass_sinus_surface - sinus_center distance (3D)
    x,y,z = np.where((mass_dil_data==1) & (main_sinus_data==1))
    dst_msn_sn=0
    if len(x)!=0:
        for a,b,c in zip(x,y,z):
            dst_msn_sn += 1/dist_3d(sinus_center, (a,b,c), affine) 
            
    return dst_sn_m, dst_mp_sn, dst_msn_sn

In [5]:
def dist_3d(first, second, affine):
    # first,second : 3d point
    # affine : nib-> .affine
    dst = distance.euclidean(first, second)
    dst *= affine[0][0]
    return abs(dst)

In [47]:
#data_path =  '/home/ldh/1_Tindex/data/T_Index_nii_test/T_Index_extract_mask'
data_path =  '/home/ldh/1_Tindex/data/T_Index_nii'
folders = subfolders(data_path)

In [7]:
from multiprocessing import Pool
p = Pool(4)
p.map(main_extract_save, folders)
p.close()
p.join()

/home/ldh/1_Tindex/data/T_Index_nii/001/result.npy
/home/ldh/1_Tindex/data/T_Index_nii/023/result.npy
/home/ldh/1_Tindex/data/T_Index_nii/003/result.npy
/home/ldh/1_Tindex/data/T_Index_nii/041/result.npy
/home/ldh/1_Tindex/data/T_Index_nii/061/result.npy
/home/ldh/1_Tindex/data/T_Index_nii/063/result.npy
/home/ldh/1_Tindex/data/T_Index_nii/005/result.npy
/home/ldh/1_Tindex/data/T_Index_nii/026/result.npy
/home/ldh/1_Tindex/data/T_Index_nii/064/result.npy
/home/ldh/1_Tindex/data/T_Index_nii/008/result.npy
/home/ldh/1_Tindex/data/T_Index_nii/044/result.npy
/home/ldh/1_Tindex/data/T_Index_nii/029/result.npy
/home/ldh/1_Tindex/data/T_Index_nii/010/result.npy
/home/ldh/1_Tindex/data/T_Index_nii/048/result.npy
/home/ldh/1_Tindex/data/T_Index_nii/032/result.npy
/home/ldh/1_Tindex/data/T_Index_nii/067/result.npy
/home/ldh/1_Tindex/data/T_Index_nii/033/result.npy
/home/ldh/1_Tindex/data/T_Index_nii/016/result.npy
/home/ldh/1_Tindex/data/T_Index_nii/069/result.npy
/home/ldh/1_Tindex/data/T_Index

# Test

In [6]:
data_path =  '/home/ldh/1_Tindex/data/T_Index_nii_test/T_Index_extract_pred'
folders = subfolders(data_path)

In [8]:
main_extract_save('/home/ldh/1_Tindex/data/T_Index_nii_test/T_Index_extract_pred/376_4303442_170904_preop')

In [9]:
index_extract('/home/ldh/1_Tindex/data/T_Index_nii_test/T_Index_extract_pred/376_4303442_170904_preop')

(None, None, None)

In [13]:
folder = '/home/ldh/1_Tindex/data/T_Index_nii_test/T_Index_extract_pred/313_7074817_151218_preop'
seg_file = folder+'/segmentation_sinus_r.nii.gz'
seg = nib.load(seg_file)
seg_data = seg.get_fdata()

sinus_data = np.zeros(seg_data.shape)
sinus_data[seg_data==4]=1

# parenchyma island extract
kid_data = np.zeros(seg_data.shape)
kid_data[seg_data==1]=1

kid_split = label(kid_data)
assert(len(np.unique(kid_split))==3)

kid_data_1 = np.zeros(seg_data.shape)
kid_data_1[kid_split==1]=1
kid_data_2 = np.zeros(seg_data.shape)
kid_data_2[kid_split==2]=1

# sinus island extract
sinus_split = label(sinus_data)
assert(len(np.unique(sinus_split))==3)

sinus_data_1 = np.zeros(sinus_data.shape)
sinus_data_1[sinus_split==1]=1
sinus_data_2 = np.zeros(sinus_data.shape)
sinus_data_2[sinus_split==2]=1

# mass extract
mass_data = np.zeros(seg_data.shape)
mass_data[seg_data==2]=1

In [None]:
data_path =  '/home/ldh/1_Tindex/data/T_Index_nii_final_test/T_Index_extract_pred_0'
folders = subfolders(data_path)
for f in folders:
    try:
        main_extract_save(f)
    except:
        print(f)

/home/ldh/1_Tindex/data/T_Index_nii_final_test/T_Index_extract_pred_0/001_2627046_211022_NP_preop_HK/result.npy
/home/ldh/1_Tindex/data/T_Index_nii_final_test/T_Index_extract_pred_0/002_3685594_211022_CMP_preop_KO/result.npy
/home/ldh/1_Tindex/data/T_Index_nii_final_test/T_Index_extract_pred_0/002_3685594_211022_NP_preop_HK/result.npy
/home/ldh/1_Tindex/data/T_Index_nii_final_test/T_Index_extract_pred_0/002_3685594_220325_CMP_KO/result.npy
/home/ldh/1_Tindex/data/T_Index_nii_final_test/T_Index_extract_pred_0/002_3685594_220325_NP_HK/result.npy
/home/ldh/1_Tindex/data/T_Index_nii_final_test/T_Index_extract_pred_0/003_2597321_211217_CMP_preop_KO/result.npy
/home/ldh/1_Tindex/data/T_Index_nii_final_test/T_Index_extract_pred_0/003_2597321_211217_NP_preop_HK/result.npy
/home/ldh/1_Tindex/data/T_Index_nii_final_test/T_Index_extract_pred_0/004_10506248_211220_CMP_preop_KO/result.npy
/home/ldh/1_Tindex/data/T_Index_nii_final_test/T_Index_extract_pred_0/004_10506248_211220_NP_preop_HK
/home/ldh

In [11]:
from multiprocessing import Pool
p = Pool(4)
p.map(main_extract_save, folders)
p.close()
p.join()

/home/ldh/1_Tindex/data/T_Index_nii_final_test/T_Index_extract_pred_0/002_3685594_220325_CMP_KO/result.npy
/home/ldh/1_Tindex/data/T_Index_nii_final_test/T_Index_extract_pred_0/002_3685594_211022_CMP_preop_KO/result.npy
/home/ldh/1_Tindex/data/T_Index_nii_final_test/T_Index_extract_pred_0/003_2597321_211217_CMP_preop_KO/result.npy
/home/ldh/1_Tindex/data/T_Index_nii_final_test/T_Index_extract_pred_0/001_2627046_211022_NP_preop_HK/result.npy
/home/ldh/1_Tindex/data/T_Index_nii_final_test/T_Index_extract_pred_0/002_3685594_220325_NP_HK/result.npy
/home/ldh/1_Tindex/data/T_Index_nii_final_test/T_Index_extract_pred_0/002_3685594_211022_NP_preop_HK/result.npy
/home/ldh/1_Tindex/data/T_Index_nii_final_test/T_Index_extract_pred_0/003_2597321_211217_NP_preop_HK/result.npy
/home/ldh/1_Tindex/data/T_Index_nii_final_test/T_Index_extract_pred_0/004_10506248_211220_CMP_preop_KO/result.npy
/home/ldh/1_Tindex/data/T_Index_nii_final_test/T_Index_extract_pred_0/004_10506248_220603_CMP_KO/result.npy
/ho

AssertionError: 

Process ForkPoolWorker-1:
Process ForkPoolWorker-4:
Process ForkPoolWorker-3:
Process ForkPoolWorker-2:
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
  File "/home/ldh/anaconda3/envs/uk/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
    self.run()
  File "/home/ldh/anaconda3/envs/uk/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
    self.run()
  File "/home/ldh/anaconda3/envs/uk/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
    self.run()
  File "/home/ldh/anaconda3/envs/uk/lib/python3.7/multiprocessing/process.py", line 99, in run
    self._target(*self._args, **self._kwargs)
  File "/home/ldh/anaconda3/envs/uk/lib/python3.7/multiprocessing/process.py", line 99, in run
    self._target(*self._args, **self._kwargs)
  File "/home/ldh/anaconda3/envs/uk/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
    self.run()
  

In [37]:
import pandas as pd
df = pd.read_excel('/home/ldh/1_Tindex/data/PNX7Y_MR_CT_labeling_20220629.xlsx', sheet_name='Master Sheet')

In [9]:
df

Unnamed: 0,연번(No.),Pt_name,Pt_number,preop eGFR,postopeGFR,eGFR1day,eGFR1month,eGFR3month,eGFR6month,eGFR1year,...,recur/mets 까지 f/u 기간,TheDate_of_Recurrence,TheDate_of_Death.1,fatal_event_For_Death,Unnamed: 103,Unnamed: 104,Unnamed: 105,right_vol,left_vol,total_vol
0,1,안장순,2605574,88.45,75.81,75.81,58.6,75.81,88.45,88.45,...,,2007-09-17 00:00:00,,,...,,,173729.878,157590.871,331320.749
1,2,이중기,3245232,84.07,62.11,52.65,84.07,75.31,84.07,94.93,...,,2007-10-25 00:00:00,,,...,,,137435.760,137802.213,275237.973
2,3,정명희,5281859,96.62,82.82,82.82,,82.82,82.82,92.05,...,,2007-11-30 00:00:00,,,...,,,58096.518,85430.133,143526.651
3,4,조경은,5269576,77.62,77.62,77.62,,,77.62,x,...,,,,,...,,,,,
4,5,곽병철,5301420,84.72,69.86,69.86,,84.72,84.72,84.72,...,,,,,...,,,74399.469,187159.241,261558.710
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
599,600,이도영,10239084,46\n2020-07-21 14:43:39,42,42,43\n2020-09-23 17:08:56,52\n2020-11-19 14:33:08,48\n2020-12-22 15:09:23,55\n2021-07-01 11:47:23,...,,,,,,,,,,
600,601,윤일준,10245256,52\n2020-07-29 13:35:01,57,81,58\n2020-09-18 08:33:47,63\n2020-11-03 12:12:11,56\n2020-12-30 08:45:50,57\n2021-06-21 14:09:03,...,,,,,,,,,,
601,602,변동훈,10246173,88\n2020-08-05 10:22:40,74,90,85\n2020-09-18 09:35:36,x,81\n2021-01-13 15:05:31,81\n2021-07-19 11:27:26,...,,,,,,,,,,
602,603,김동식,10265817,90\n2020-09-15 13:22:32,77,90,x,90\n2020-12-17 15:14:25,x,x,...,,,,,,,,,,


In [10]:
t_index = []
data_path = '/home/ldh/1_Tindex/data/T_Index_nii'
for nb in df['연번(No.)'].values:
    result_path=None
    for d in subfolders(data_path, join=False):
        if d.startswith(str(nb).zfill(3)):
            result_path = data_path+'/'+d+'/result.npy'
    try:
        result_array = np.load(result_path)
        t_index.append(result_array)
    except:
        t_index.append([nb,None,None,None])

In [11]:
dict_value = {
    "No" : df['연번(No.)'].values,
    "Pt_number" : df['Pt_number'].values,
    "WIT" : df['WIT'].values,
    "EBL" : df['EBL'].values,
    "Total Op time" : df['Total Op time'].values,
    "Op_date" : df['Op_date'].values,
    "Discharge_date" : df['Discharge_date'].values,
    "Preop_cr" : df['Preop_cr'].values,
    "Postop_Cr_1D" : df['Postop_Cr_1D'].values,
    "PADUA_Score" : df['PADUA_Score'].values,
    "RENAL_Score" : df['RENAL_Score'].values,
    "x_C_index" : df['x_C_index'].values,
    "y_C_index" : df['y_C_index'].values,
    "m_sn" : np.array(t_index)[:,1],
    "mp_sn" : np.array(t_index)[:,2],
    "ms_sn" : np.array(t_index)[:,3],
}

In [13]:
new_df.to_csv('/home/ldh/1_Tindex/data/T_Index_Extract_train.csv', index=False)

In [38]:
t_index = []
t_index_ai = []
data_path = '/home/ldh/1_Tindex/data/T_Index_nii_test/T_Index_extract_mask'
data_ai_path = '/home/ldh/1_Tindex/data/T_Index_nii_test/T_Index_extract_pred'
for nb in df['연번(No.)'].values:
    result_path=None
    result_ai_path=None
    for d in subfolders(data_path, join=False):
        if d.startswith(str(nb).zfill(3)):
            result_path = data_path+'/'+d+'/result.npy'
            result_ai_path = data_ai_path+'/'+d+'/result.npy'
    try:
        result_array = np.load(result_path)
        t_index.append(result_array)
    except:
        t_index.append([nb,None,None,None])
        
    try:
        result_ai_array = np.load(result_ai_path)
        t_index_ai.append(result_ai_array)
    except:
        t_index_ai.append([nb,None,None,None])

In [39]:
dict_value = {
    "No" : df['연번(No.)'].values,
    "Pt_number" : df['Pt_number'].values,
    "WIT" : df['WIT'].values,
    "EBL" : df['EBL'].values,
    "Total Op time" : df['Total Op time'].values,
    "Op_date" : df['Op_date'].values,
    "Discharge_date" : df['Discharge_date'].values,
    "Preop_cr" : df['Preop_cr'].values,
    "Postop_Cr_1D" : df['Postop_Cr_1D'].values,
    "PADUA_Score" : df['PADUA_Score'].values,
    "RENAL_Score" : df['RENAL_Score'].values,
    "x_C_index" : df['x_C_index'].values,
    "y_C_index" : df['y_C_index'].values,
    "m_sn" : np.array(t_index)[:,1],
    "mp_sn" : np.array(t_index)[:,2],
    "ms_sn" : np.array(t_index)[:,3],
    "m_sn_ai" : np.array(t_index_ai)[:,1],
    "mp_sn_ai" : np.array(t_index_ai)[:,2],
    "ms_sn_ai" : np.array(t_index_ai)[:,3]
}

In [40]:
new_df = pd.DataFrame(dict_value)

In [41]:
new_df.to_csv('/home/ldh/1_Tindex/data/T_Index_Extract.csv', index=False)