In [None]:
# internal imports
from wsi_core.WholeSlideImage import WholeSlideImage 
from wsi_core.wsi_utils import StitchPatches
from wsi_core.batch_process_utils import initialize_df
# other imports
import os
import numpy as np
import time
import argparse
import pdb
import pandas as pd

def stitching(file_path, downscale = 64):
    start = time.time()
	heatmap = StitchPatches(file_path, downscale=downscale, bg_color=(0,0,0), alpha=-1, draw_grid=False)
	total_time = time.time() - start
	
	return heatmap, total_time

def segment(WSI_object, seg_params, filter_params):
	### Start Seg Timer
	start_time = time.time()

	# Segment
	WSI_object.segmentTissue(**seg_params, filter_params=filter_params)

	### Stop Seg Timers
	seg_time_elapsed = time.time() - start_time   
	return WSI_object, seg_time_elapsed

def patching(WSI_object, **kwargs):
	### Start Patch Timer
	start_time = time.time()

	# Patch
	file_path = WSI_object.createPatches_bag_hdf5(**kwargs, save_coord=True)

	### Stop Patch Timer
	patch_time_elapsed = time.time() - start_time
	return file_path, patch_time_elapsed

def seg_and_patch(source, save_dir, patch_save_dir, mask_save_dir, stitch_save_dir, 
				  patch_size = 256, step_size = 256, custom_downsample=1, 
				  seg_params = {'seg_level': -1, 'sthresh': 8, 'mthresh': 7, 'close': 4, 'use_otsu': False,
				  'keep_ids': 'none', 'exclude_ids': 'none'},
				  filter_params = {'a_t':100, 'a_h': 16, 'max_n_holes':8 }, 
				  vis_params = {'vis_level': -1, 'line_thickness': 250},
				  patch_params = {'white_thresh': 5, 'black_thresh': 40, 'use_padding': True, 'contour_fn': 'four_pt'},
				  patch_level = 0,
				  use_default_params = False, 
				  seg = False, save_mask = True, 
				  stitch= False, 
				  patch = False, auto_skip=True, process_list = None):
	


	slides = sorted(os.listdir(source))
	slides = [slide for slide in slides if os.path.isfile(os.path.join(source, slide))]

	if process_list is None:
		df = initialize_df(slides, seg_params, filter_params, vis_params, patch_params, save_patches=True)
	
	else:
		df = pd.read_csv(process_list)
		df = initialize_df(df, seg_params, filter_params, vis_params, patch_params, save_patches=True)


	mask = df['process'] == 1
	process_stack = df[mask]

	total = len(process_stack)
	seg_times = 0.
	patch_times = 0.
	stitch_times = 0.

	for i in range(total):
		df.to_csv(os.path.join(save_dir, 'process_list_autogen.csv'), index=False)
		idx = process_stack.index[i]
		slide = process_stack.loc[idx, 'slide_id']
		print("\n\nprogress: {:.2f}, {}/{}".format(i/total, i, total))
		print('processing {}'.format(slide))
		
		df.loc[idx, 'process'] = 0
		slide_id, _ = os.path.splitext(slide)

		if auto_skip and os.path.isfile(os.path.join(patch_save_dir, slide_id + '.h5')):
			print('{} already exist in destination location, skipped'.format(slide_id))
			df.loc[idx, 'status'] = 'already_exist'
			continue

		# Inialize WSI
		full_path = os.path.join(source, slide)
		WSI_object = WholeSlideImage(full_path)

		if use_default_params:
			current_vis_params = vis_params.copy()
			current_filter_params = filter_params.copy()
			current_seg_params = seg_params.copy()
			current_patch_params = patch_params.copy()
			
		else:
			current_vis_params = {}
			current_filter_params = {}
			current_seg_params = {}
			current_patch_params = {}
			for key in vis_params.keys():
				current_vis_params.update({key: df.loc[idx, key]})

			for key in filter_params.keys():
				current_filter_params.update({key: df.loc[idx, key]})

			for key in seg_params.keys():
				current_seg_params.update({key: df.loc[idx, key]})

			for key in patch_params.keys():
				current_patch_params.update({key: df.loc[idx, key]})

		if current_vis_params['vis_level'] < 0:
			if len(WSI_object.level_dim) == 1:
				current_vis_params['vis_level'] = 0
			
			else:	
				wsi = WSI_object.getOpenSlide()
				best_level = wsi.get_best_level_for_downsample(64)
				current_vis_params['vis_level'] = best_level

		if current_seg_params['seg_level'] < 0:
			if len(WSI_object.level_dim) == 1:
				current_seg_params['seg_level'] = 0
			
			else:
				wsi = WSI_object.getOpenSlide()
				best_level = wsi.get_best_level_for_downsample(64)
				current_seg_params['seg_level'] = best_level

		keep_ids = str(current_seg_params['keep_ids'])
		if keep_ids != 'none' and len(keep_ids) > 0:
			str_ids = current_seg_params['keep_ids']
			current_seg_params['keep_ids'] = np.array(str_ids.split(',')).astype(int)
		else:
			current_seg_params['keep_ids'] = []

		exclude_ids = str(current_seg_params['exclude_ids'])
		if exclude_ids != 'none' and len(exclude_ids) > 0:
			str_ids = current_seg_params['exclude_ids']
			current_seg_params['exclude_ids'] = np.array(str_ids.split(',')).astype(int)
		else:
			current_seg_params['exclude_ids'] = []

		w, h = WSI_object.level_dim[current_seg_params['seg_level']] 
		if w * h > 1e8:
			print('level_dim {} x {} is likely too large for successful segmentation, aborting'.format(w, h))
			df.loc[idx, 'status'] = 'failed_seg'
			continue

		if not process_list:
			df.loc[idx, 'vis_level'] = current_vis_params['vis_level']
			df.loc[idx, 'seg_level'] = current_seg_params['seg_level']

		seg_time_elapsed = -1
		if seg:
			WSI_object, seg_time_elapsed = segment(WSI_object, current_seg_params, current_filter_params) 

		if save_mask:
			mask = WSI_object.visWSI(**current_vis_params)
			mask_path = os.path.join(mask_save_dir, slide_id+'.png')
			mask.save(mask_path)

		patch_time_elapsed = -1 # Default time
		if patch:
			current_patch_params.update({'patch_level': patch_level, 'patch_size': patch_size, 'step_size': step_size, 
										 'save_path': patch_save_dir, 'custom_downsample': custom_downsample})
			file_path, patch_time_elapsed = patching(WSI_object = WSI_object, **current_patch_params)
		
		stitch_time_elapsed = -1
		if stitch:
			file_path = os.path.join(patch_save_dir, slide_id+'.h5')
			heatmap, stitch_time_elapsed = stitching(file_path, downscale=64)
			stitch_path = os.path.join(stitch_save_dir, slide_id+'.png')
			heatmap.save(stitch_path)

		print("segmentation took {} seconds".format(seg_time_elapsed))
		print("patching took {} seconds".format(patch_time_elapsed))
		print("stitching took {} seconds".format(stitch_time_elapsed))
		df.loc[idx, 'status'] = 'processed'

		seg_times += seg_time_elapsed
		patch_times += patch_time_elapsed
		stitch_times += stitch_time_elapsed

	seg_times /= total
	patch_times /= total
	stitch_times /= total

	df.to_csv(os.path.join(save_dir, 'process_list_autogen.csv'), index=False)
	print("average segmentation time in s per slide: {}".format(seg_times))
	print("average patching time in s per slide: {}".format(patch_times))
	print("average stiching time in s per slide: {}".format(stitch_times))
		
	return seg_times, patch_times

parser = argparse.ArgumentParser(description='seg and patch')
parser.add_argument('--source', type = str,
					help='path to folder containing raw wsi image files')
parser.add_argument('--step_size', type = int, default=256,
					help='step_size')
parser.add_argument('--patch_size', type = int, default=256,
					help='patch_size')
parser.add_argument('--patch', default=False, action='store_true')
parser.add_argument('--seg', default=False, action='store_true')
parser.add_argument('--stitch', default=False, action='store_true')
parser.add_argument('--no_auto_skip', default=True, action='store_false')
parser.add_argument('--save_dir', type = str,
					help='directory to save processed data')
parser.add_argument('--preset', default=None, type=str,
					help='predefined profile of default segmentation and filter parameters (.csv)')
parser.add_argument('--patch_level', type=int, default=0, 
					help='downsample level at which to patch')
parser.add_argument('--custom_downsample', type= int, choices=[1,2], default=1, 
					help='custom downscale when native downsample is not available (only tested w/ 2x downscale)')
parser.add_argument('--process_list',  type = str, default=None,
					help='name of list of images to process with parameters (.csv)')

if __name__ == '__main__':
	args = parser.parse_args()

	patch_save_dir = os.path.join(args.save_dir, 'patches')
	mask_save_dir = os.path.join(args.save_dir, 'masks')
	stitch_save_dir = os.path.join(args.save_dir, 'stitches')

	if args.process_list:
		process_list = os.path.join(args.save_dir, args.process_list)

	else:
		process_list = None

	print('source: ', args.source)
	print('patch_save_dir: ', patch_save_dir)
	print('mask_save_dir: ', mask_save_dir)
	print('stitch_save_dir: ', stitch_save_dir)
	
	directories = {'source': args.source, 
				   'save_dir': args.save_dir,
				   'patch_save_dir': patch_save_dir, 
				   'mask_save_dir' : mask_save_dir, 
				   'stitch_save_dir': stitch_save_dir} 

	for key, val in directories.items():
		print("{} : {}".format(key, val))
		if key not in ['source']:
			os.makedirs(val, exist_ok=True)


	seg_params = {'seg_level': -1, 'sthresh': 8, 'mthresh': 7, 'close': 4, 'use_otsu': False,
				  'keep_ids': 'none', 'exclude_ids': 'none'}
	filter_params = {'a_t':100, 'a_h': 16, 'max_n_holes':8 }
	vis_params = {'vis_level': -1, 'line_thickness': 250}
	patch_params = {'white_thresh': 5, 'black_thresh': 40, 'use_padding': True, 'contour_fn': 'four_pt'}

	if args.preset:
		preset_df = pd.read_csv(os.path.join('presets', args.preset))
		for key in seg_params.keys():
			seg_params[key] = preset_df.loc[0, key]

		for key in filter_params.keys():
			filter_params[key] = preset_df.loc[0, key]

		for key in vis_params.keys():
			vis_params[key] = preset_df.loc[0, key]

		for key in patch_params.keys():
			patch_params[key] = preset_df.loc[0, key]
	
	parameters = {'seg_params': seg_params,
				  'filter_params': filter_params,
	 			  'patch_params': patch_params,
				  'vis_params': vis_params}

	print(parameters)

	seg_times, patch_times = seg_and_patch(**directories, **parameters,
											patch_size = args.patch_size, step_size=args.step_size, 
											seg = args.seg,  use_default_params=False, save_mask = True, 
											stitch= args.stitch, custom_downsample = args.custom_downsample, 
											patch_level=args.patch_level, patch = args.patch,
											process_list = process_list, auto_skip=args.no_auto_skip)

In [None]:
# internal imports
from wsi_core.WholeSlideImage import WholeSlideImage 
from wsi_core.wsi_utils import StitchPatches
from wsi_core.batch_process_utils import initialize_df
# other imports
import os
import numpy as np
import time
import argparse
import pdb
import pandas as pd

def stitching(file_path, downscale = 64):
    start = time.time()
	heatmap = StitchPatches(file_path, downscale=downscale, bg_color=(0,0,0), alpha=-1, draw_grid=False)
	total_time = time.time() - start
	
	return heatmap, total_time

def segment(WSI_object, seg_params, filter_params):
	### Start Seg Timer
	start_time = time.time()

	# Segment
	WSI_object.segmentTissue(**seg_params, filter_params=filter_params)

	### Stop Seg Timers
	seg_time_elapsed = time.time() - start_time   
	return WSI_object, seg_time_elapsed

def patching(WSI_object, **kwargs):
	### Start Patch Timer
	start_time = time.time()

	# Patch
	file_path = WSI_object.createPatches_bag_hdf5(**kwargs, save_coord=True)

	### Stop Patch Timer
	patch_time_elapsed = time.time() - start_time
	return file_path, patch_time_elapsed

def seg_and_patch(source, save_dir, patch_save_dir, mask_save_dir, stitch_save_dir, 
				  patch_size = 256, step_size = 256, custom_downsample=1, 
				  seg_params = {'seg_level': -1, 'sthresh': 8, 'mthresh': 7, 'close': 4, 'use_otsu': False,
				  'keep_ids': 'none', 'exclude_ids': 'none'},
				  filter_params = {'a_t':100, 'a_h': 16, 'max_n_holes':8 }, 
				  vis_params = {'vis_level': -1, 'line_thickness': 250},
				  patch_params = {'white_thresh': 5, 'black_thresh': 40, 'use_padding': True, 'contour_fn': 'four_pt'},
				  patch_level = 0,
				  use_default_params = False, 
				  seg = False, save_mask = True, 
				  stitch= False, 
				  patch = False, auto_skip=True, process_list = None):
	


	slides = sorted(os.listdir(source))
	slides = [slide for slide in slides if os.path.isfile(os.path.join(source, slide))]

	if process_list is None:
		df = initialize_df(slides, seg_params, filter_params, vis_params, patch_params, save_patches=True)
	
	else:
		df = pd.read_csv(process_list)
		df = initialize_df(df, seg_params, filter_params, vis_params, patch_params, save_patches=True)


	mask = df['process'] == 1
	process_stack = df[mask]

	total = len(process_stack)
	seg_times = 0.
	patch_times = 0.
	stitch_times = 0.

	for i in range(total):
		df.to_csv(os.path.join(save_dir, 'process_list_autogen.csv'), index=False)
		idx = process_stack.index[i]
		slide = process_stack.loc[idx, 'slide_id']
		print("\n\nprogress: {:.2f}, {}/{}".format(i/total, i, total))
		print('processing {}'.format(slide))
		
		df.loc[idx, 'process'] = 0
		slide_id, _ = os.path.splitext(slide)

		if auto_skip and os.path.isfile(os.path.join(patch_save_dir, slide_id + '.h5')):
			print('{} already exist in destination location, skipped'.format(slide_id))
			df.loc[idx, 'status'] = 'already_exist'
			continue

		# Inialize WSI
		full_path = os.path.join(source, slide)
		WSI_object = WholeSlideImage(full_path)

		if use_default_params:
			current_vis_params = vis_params.copy()
			current_filter_params = filter_params.copy()
			current_seg_params = seg_params.copy()
			current_patch_params = patch_params.copy()
			
		else:
			current_vis_params = {}
			current_filter_params = {}
			current_seg_params = {}
			current_patch_params = {}
			for key in vis_params.keys():
				current_vis_params.update({key: df.loc[idx, key]})

			for key in filter_params.keys():
				current_filter_params.update({key: df.loc[idx, key]})

			for key in seg_params.keys():
				current_seg_params.update({key: df.loc[idx, key]})

			for key in patch_params.keys():
				current_patch_params.update({key: df.loc[idx, key]})

		if current_vis_params['vis_level'] < 0:
			if len(WSI_object.level_dim) == 1:
				current_vis_params['vis_level'] = 0
			
			else:	
				wsi = WSI_object.getOpenSlide()
				best_level = wsi.get_best_level_for_downsample(64)
				current_vis_params['vis_level'] = best_level

		if current_seg_params['seg_level'] < 0:
			if len(WSI_object.level_dim) == 1:
				current_seg_params['seg_level'] = 0
			
			else:
				wsi = WSI_object.getOpenSlide()
				best_level = wsi.get_best_level_for_downsample(64)
				current_seg_params['seg_level'] = best_level

		keep_ids = str(current_seg_params['keep_ids'])
		if keep_ids != 'none' and len(keep_ids) > 0:
			str_ids = current_seg_params['keep_ids']
			current_seg_params['keep_ids'] = np.array(str_ids.split(',')).astype(int)
		else:
			current_seg_params['keep_ids'] = []

		exclude_ids = str(current_seg_params['exclude_ids'])
		if exclude_ids != 'none' and len(exclude_ids) > 0:
			str_ids = current_seg_params['exclude_ids']
			current_seg_params['exclude_ids'] = np.array(str_ids.split(',')).astype(int)
		else:
			current_seg_params['exclude_ids'] = []

		w, h = WSI_object.level_dim[current_seg_params['seg_level']] 
		if w * h > 1e8:
			print('level_dim {} x {} is likely too large for successful segmentation, aborting'.format(w, h))
			df.loc[idx, 'status'] = 'failed_seg'
			continue

		if not process_list:
			df.loc[idx, 'vis_level'] = current_vis_params['vis_level']
			df.loc[idx, 'seg_level'] = current_seg_params['seg_level']

		seg_time_elapsed = -1
		if seg:
			WSI_object, seg_time_elapsed = segment(WSI_object, current_seg_params, current_filter_params) 

		if save_mask:
			mask = WSI_object.visWSI(**current_vis_params)
			mask_path = os.path.join(mask_save_dir, slide_id+'.png')
			mask.save(mask_path)

		patch_time_elapsed = -1 # Default time
		if patch:
			current_patch_params.update({'patch_level': patch_level, 'patch_size': patch_size, 'step_size': step_size, 
										 'save_path': patch_save_dir, 'custom_downsample': custom_downsample})
			file_path, patch_time_elapsed = patching(WSI_object = WSI_object, **current_patch_params)
		
		stitch_time_elapsed = -1
		if stitch:
			file_path = os.path.join(patch_save_dir, slide_id+'.h5')
			heatmap, stitch_time_elapsed = stitching(file_path, downscale=64)
			stitch_path = os.path.join(stitch_save_dir, slide_id+'.png')
			heatmap.save(stitch_path)

		print("segmentation took {} seconds".format(seg_time_elapsed))
		print("patching took {} seconds".format(patch_time_elapsed))
		print("stitching took {} seconds".format(stitch_time_elapsed))
		df.loc[idx, 'status'] = 'processed'

		seg_times += seg_time_elapsed
		patch_times += patch_time_elapsed
		stitch_times += stitch_time_elapsed


In [None]:
# internal imports
from wsi_core.WholeSlideImage import WholeSlideImage 
from wsi_core.wsi_utils import StitchPatches
from wsi_core.batch_process_utils import initialize_df
# other imports
import os
import numpy as np
import time
import argparse
import pdb
import pandas as pd

def stitching(file_path, downscale = 64):
    start = time.time()
	heatmap = StitchPatches(file_path, downscale=downscale, bg_color=(0,0,0), alpha=-1, draw_grid=False)
	total_time = time.time() - start
	
	return heatmap, total_time



In [None]:
from WholeSlideImage import WSI
from wsi_utils import StitchPatches
from batch_process_utils import initizalize_df

import os
import numpy as np
import time
import argparse
import pdb
import pandas as pd

def stitching(file_path, downscale = 64):
    start = time.time()
    heatmap = StitchPatches(file_path, downscale = downscale, bg_color = (0,0,0), alpha = -1, draw_grid = False)
    total_itime = time.time() - start
    
    return heatmap, total_time

#이 함수를 통해 StitchPatches라는 함수를 호출한 후에 이를 실행한 시간을 계산하도록 한다. StitchPatches는 원래 wsi_utils로부터 나오는 것.

In [None]:
def segment(WSI_object, seg_params, filter_params):
	### Start Seg Timer
	start_time = time.time()

	# Segment
	WSI_object.segmentTissue(**seg_params, filter_params=filter_params)

	### Stop Seg Timers
	seg_time_elapsed = time.time() - start_time   
	return WSI_object, seg_time_elapsed


In [None]:
def segment(WSI_object, seg_params, filter_params):
    start+time = time.time()
    WSI_object.segmentTissue(**seg_params, filter_params = filter_params)
    
    seg_time_elapsed = time.time() - start_time
    return WSI_object, seg_time_elapsed


#이 코드를 통해 segment를 한다. 어떻게 하는거지? 
#object를 통해 객체 처리를 하는 것으로 보이고 그 후에는 seg_params의 주소. 즉 seg_params는 하나의 배열 또는 matrix가 될 것이다.
#그 후에 걸린 시간을 보여준다. key는 하나의 함수를 어떻게 실행하는가 - 거기에 있는 것으로 보인다.

In [None]:

def patching(WSI_object, **kwargs):
	### Start Patch Timer
	start_time = time.time()

	# Patch
	file_path = WSI_object.createPatches_bag_hdf5(**kwargs, save_coord=True)

	### Stop Patch Timer
	patch_time_elapsed = time.time() - start_time
	return file_path, patch_time_elapsed


In [1]:
def patching(WSI_object, **kwargs): # 도대체 WSI_object는 무슨 객체인가? dataset을 어떻게 설계해낼 수 있을까? 결국에 한 번 해보자는 거지.
    #회사 일도 이런 식으로 이루어지는 것이 아닐까? 세상의 온갖 일들이 어떻게 이루어지는지.
    start_time = time.time()
    
    file_path = WSI_object.createPatches_bag_hdf5(**kwargs, save_coord = True)
    
    patch_time_elapsed = time.time() - start_time
    return file_path, patch_time_elapsed
#각 단계를 통해서 kwarg라는 배열에 함수의 결과값을 집어넣는다. 어떻게 집어넣는가? 잘 모르겠다. 그러나 할 수 있을 것으로 보인다.

In [None]:

def seg_and_patch(source, save_dir, patch_save_dir, mask_save_dir, stitch_save_dir, 
				  patch_size = 256, step_size = 256, custom_downsample=1, 
				  seg_params = {'seg_level': -1, 'sthresh': 8, 'mthresh': 7, 'close': 4, 'use_otsu': False,
				  'keep_ids': 'none', 'exclude_ids': 'none'},
				  filter_params = {'a_t':100, 'a_h': 16, 'max_n_holes':8 }, 
				  vis_params = {'vis_level': -1, 'line_thickness': 250},
				  patch_params = {'white_thresh': 5, 'black_thresh': 40, 'use_padding': True, 'contour_fn': 'four_pt'},
				  patch_level = 0,
				  use_default_params = False, 
				  seg = False, save_mask = True, 
				  stitch= False, 
				  patch = False, auto_skip=True, process_list = None):
	


	slides = sorted(os.listdir(source))
	slides = [slide for slide in slides if os.path.isfile(os.path.join(source, slide))]

	if process_list is None:
		df = initialize_df(slides, seg_params, filter_params, vis_params, patch_params, save_patches=True)
	
	else:
		df = pd.read_csv(process_list)
		df = initialize_df(df, seg_params, filter_params, vis_params, patch_params, save_patches=True)


In [None]:
def seg_and_patch(source, savedir, patch_save_dir, mask_save_dir, stitch_save_dir,
                 patch_size = 256, step_size = 256, custom_downsample = 1,
                 seg_params = {'seg_level': = -1, 'sthresh': 8, 'mthresh':7, 'close':4,'use_otsu':False,
                    'keep_ids': 'none', 'exclued_ids':none},
                 filter_params = {'a_t':100, 'a_h': 16, 'max_n_holes':8},
                 vis_params = {'vis_level':-1, 'line_thickness':250},
                 patch_params={'white_thresh':5, 'black_thresh':40, 'use_padding':True, 'contour_fn':four_pt},
                 patch_level=0,
                 use_default_params=False,
                 seg = False, save_mask=True,
                 stitch= False,
                 patch=False, auto_skip=True, process_list = None):
    slides = sorted(os.listdir(source))
    slides = [slide for slide in slides if os.path.isfile(os.path.join(source, slide))]
    
    if process_list is None:
        df = initizalize_df(slies, seg_params, filter_params, vis_params, patch_params, save_patches=True)
        
    else:
        df = pd.read_csv(process_list)
        df = initialize_df(df, seg_params, filter_params, vis_params, patch_params, save_patches=True)
        
#왜 df의 정의를 다르게 하는가? process_list가 무엇이기에? 그리고 
#어디에서 이런 함수를 정의해서 가져오는 것이지? 궁금하다 - 계속해서 알아가고 전체 체계를 숙지하도록 노력한다.

#일단, 하나의 함수를 정의하고자 하고, 여러 가지 모든 변수들을 가져와서 정의한 후에 이들을 가지고 slidde, df를 만들어낸다. 이것이 첫 번쨰이다.
#각각의 슬라이드에 대한 정보를 중심으로 pandas df를 만들어내는 것으로 보인다. 그러나 process_list의 존재가 왜 필요한지가 의문이다.


In [None]:

	mask = df['process'] == 1
	process_stack = df[mask]

	total = len(process_stack)
	seg_times = 0.
	patch_times = 0.
	stitch_times = 0.

	for i in range(total):
		df.to_csv(os.path.join(save_dir, 'process_list_autogen.csv'), index=False)
		idx = process_stack.index[i]
		slide = process_stack.loc[idx, 'slide_id']
		print("\n\nprogress: {:.2f}, {}/{}".format(i/total, i, total))
		print('processing {}'.format(slide))
		
		df.loc[idx, 'process'] = 0
		slide_id, _ = os.path.splitext(slide)

		if auto_skip and os.path.isfile(os.path.join(patch_save_dir, slide_id + '.h5')):
			print('{} already exist in destination location, skipped'.format(slide_id))
			df.loc[idx, 'status'] = 'already_exist'
			continue



In [2]:
    mask = df['process'] == 1
    process_stack = df[mask]
    #? 도대체 무슨 일이 벌어지는 거지
    #1. process_list, 그리고 만들어진 initialize_df가 어디 있는지, 어떤 역할 하는지 알아보자.
    #전체 흐름 속에서 무엇이 돌아가는지 알아가고자 하는 흐름을 보고자 한다.

NameError: name 'df' is not defined

In [None]:
import pandas as pd
import numpy as np
import pdb
	
'''
initiate a pandas df describing a list of slides to process
args:
	slides (df or array-like): 
		array-like structure containing list of slide ids, if df, these ids assumed to be
		stored under the 'slide_id' column
	seg_params (dict): segmentation paramters 
	filter_params (dict): filter parameters
	vis_params (dict): visualization paramters
	patch_params (dict): patching paramters
	use_heatmap_args (bool): whether to include heatmap arguments such as ROI coordinates
'''
def initialize_df(slides, seg_params, filter_params, vis_params, patch_params, 
	use_heatmap_args=False, save_patches=False):

	total = len(slides)
	if isinstance(slides, pd.DataFrame):
		slide_ids = slides.slide_id.values
	else:
		slide_ids = slides
	default_df_dict = {'slide_id': slide_ids, 'process': np.full((total), 1, dtype=np.uint8)}

	# initiate empty labels in case not provided
	if use_heatmap_args:
		default_df_dict.update({'label': np.full((total), -1)})
	
	default_df_dict.update({
		'status': np.full((total), 'tbp'),
		# seg params
		'seg_level': np.full((total), int(seg_params['seg_level']), dtype=np.int8),
		'sthresh': np.full((total), int(seg_params['sthresh']), dtype=np.uint8),
		'mthresh': np.full((total), int(seg_params['mthresh']), dtype=np.uint8),
		'close': np.full((total), int(seg_params['close']), dtype=np.uint32),
		'use_otsu': np.full((total), bool(seg_params['use_otsu']), dtype=bool),
		'keep_ids': np.full((total), seg_params['keep_ids']),
		'exclude_ids': np.full((total), seg_params['exclude_ids']),
		
		# filter params
		'a_t': np.full((total), int(filter_params['a_t']), dtype=np.float32),
		'a_h': np.full((total), int(filter_params['a_h']), dtype=np.float32),
		'max_n_holes': np.full((total), int(filter_params['max_n_holes']), dtype=np.uint32),

		# vis params
		'vis_level': np.full((total), int(vis_params['vis_level']), dtype=np.int8),
		'line_thickness': np.full((total), int(vis_params['line_thickness']), dtype=np.uint32),

		# patching params
		'use_padding': np.full((total), bool(patch_params['use_padding']), dtype=bool),
		'contour_fn': np.full((total), patch_params['contour_fn'])
		})

	if save_patches:
		default_df_dict.update({
			'white_thresh': np.full((total), int(patch_params['white_thresh']), dtype=np.uint8),
			'black_thresh': np.full((total), int(patch_params['black_thresh']), dtype=np.uint8)})

	if use_heatmap_args:
		# initiate empty x,y coordinates in case not provided
		default_df_dict.update({'x1': np.empty((total)).fill(np.NaN), 
			'x2': np.empty((total)).fill(np.NaN), 
			'y1': np.empty((total)).fill(np.NaN), 
			'y2': np.empty((total)).fill(np.NaN)})


	if isinstance(slides, pd.DataFrame):
		temp_copy = pd.DataFrame(default_df_dict) # temporary dataframe w/ default params
		# find key in provided df
		# if exist, fill empty fields w/ default values, else, insert the default values as a new column
		for key in default_df_dict.keys(): 
			if key in slides.columns:
				mask = slides[key].isna()
				slides.loc[mask, key] = temp_copy.loc[mask, key]
			else:
				slides.insert(len(slides.columns), key, default_df_dict[key])
	else:
		slides = pd.DataFrame(default_df_dict)
	
	return slides

In [None]:
import pandas as pd
import numpy as np
import pdb
	
'''
initiate a pandas df describing a list of slides to process
args:
	slides (df or array-like): 
		array-like structure containing list of slide ids, if df, these ids assumed to be
		stored under the 'slide_id' column
	seg_params (dict): segmentation paramters 
	filter_params (dict): filter parameters
	vis_params (dict): visualization paramters
	patch_params (dict): patching paramters
	use_heatmap_args (bool): whether to include heatmap arguments such as ROI coordinates
'''
def initialize_df(slides, seg_params, filter_params, vis_params, patch_params, 
	use_heatmap_args=False, save_patches=False):

	total = len(slides)  # 슬라이드의 총 개수를 구합니다.
	if isinstance(slides, pd.DataFrame):  # 슬라이드가 DataFrame인지 확인합니다.
		slide_ids = slides.slide_id.values  # 슬라이드 ID를 가져옵니다.
	else:
		slide_ids = slides  # 슬라이드가 DataFrame이 아닌 경우 그대로 사용합니다.
	default_df_dict = {'slide_id': slide_ids, 'process': np.full((total), 1, dtype=np.uint8)}  # 데이터프레임의 기본 구조를 만듭니다.

	# initiate empty labels in case not provided
	if use_heatmap_args:  # 히트맵 인수를 사용할지 여부를 확인합니다.
		default_df_dict.update({'label': np.full((total), -1)})  # 히트맵 인수가 사용되는 경우 'label' 열을 생성합니다.
	

In [None]:
import pandas as pd
import numpy as np
import pdb

'''
initiate a pandas df describing a list of slides to process args:
slides(df or array-like):
    array-like structure containing list of slide ids, if df, these ids assumed to be stored under the 'slide_i' column
and other parameters available'''

def initialize_df(slides, seg_params, filter_params, vis_params, patch_params,
                 use_heatmap_args=False, save_patches = False):
    total = len(slides) #슬라이드 총 개수 구하기
    if isinstance(slides, pd.DataFrame): # 슬라이드 DataFrame인지 확인! 제대로 구현해보지.
        slide_ids = slides.slide_id.values
    else: #슬라이드가 그냥 slide list로 주어진다면.
        slide_ids = slides
    #이렇게 slide_id를 구해왔다.
    defalut_dict = {'slide_id':slide_ids, 'process':np.full((total), 1, dtype = np.uint8)} #dataframe 기본 구조 만들기?? - what is it?
    
    # initiate empty labels in case not provides-d
    if use_heatmap_args:
        default_df_dict.update({'label':np.full((total)-1)})
#결국에 경우가 나눠진다는거는
#언젠가 그걸 통합하는 상황에서의 추상화 또는 일들이 이뤄지게 되는구나.

In [None]:

	default_df_dict.update({
		'status': np.full((total), 'tbp'),  # 'status' 열을 생성하고 초기화합니다.
		# seg params
		'seg_level': np.full((total), int(seg_params['seg_level']), dtype=np.int8),  # 'seg_level' 열을 생성하고 초기화합니다.
		'sthresh': np.full((total), int(seg_params['sthresh']), dtype=np.uint8),  # 'sthresh' 열을 생성하고 초기화합니다.
		'mthresh': np.full((total), int(seg_params['mthresh']), dtype=np.uint8),  # 'mthresh' 열을 생성하고 초기화합니다.
		'close': np.full((total), int(seg_params['close']), dtype=np.uint32),  # 'close' 열을 생성하고 초기화합니다.
		'use_otsu': np.full((total), bool(seg_params['use_otsu']), dtype=bool),  # 'use_otsu' 열을 생성하고 초기화합니다.
		'keep_ids': np.full((total), seg_params['keep_ids']),  # 'keep_ids' 열을 생성하고 초기화합니다.
		'exclude_ids': np.full((total), seg_params['exclude_ids']),  # 'exclude_ids' 열을 생성하고 초기화합니다.
		
		# filter params
		'a_t': np.full((total), int(filter_params['a_t']), dtype=np.float32),  # 'a_t' 열을 생성하고 초기화합니다.
		'a_h': np.full((total), int(filter_params['a_h']), dtype=np.float32),  # 'a_h' 열을 생성하고 초기화합니다.
		'max_n_holes': np.full((total), int(filter_params['max_n_holes']), dtype=np.uint32),  # 'max_n_holes' 열을 생성하고 초기화합니다.

		# vis params
		'vis_level': np.full((total), int(vis_params['vis_level']), dtype=np.int8),  # 'vis_level' 열을 생성하고 초기화합니다.
		'line_thickness': np.full((total), int(vis_params['line_thickness']), dtype=np.uint32),  # 'line_thickness' 열을 생성하고 초기화합니다.

		# patching params
		'use_padding': np.full((total), bool(patch_params['use_padding']), dtype=bool),  # 'use_padding' 열을 생성하고 초기화합니다.
		'contour_fn': np.full((total), patch_params['contour_fn'])  # 'contour_fn' 열을 생성하고 초기화합니다.
		})


In [None]:
default_df_dict.update({
    'status':np.full((total), 'tbp'),
# ...    
})

# 등등 여러 열들을 생성해준다. 각 열을 생성해서 이분들은 어디에 쓰려는 생각인 것이지? 보고 생각해보아야겠다.

# 모든 사람들이 생각보다 '생각'으로 이루어져 있다는 사실이 새삼스럽게 느껴지기도 한다. 모두가 같은 사람이고 이해할만한 생각으로
# 세상을 만들고 ㅂㄴ형하고 지배해 나간다.


In [None]:

	if save_patches:  # 패치를 저장할지 여부를 확인합니다.
		default_df_dict.update({
			'white_thresh': np.full((total), int(patch_params['white_thresh']), dtype=np.uint8),  # 'white_thresh' 열을 생성하고 초기화합니다.
			'black_thresh': np.full((total), int(patch_params['black_thresh']), dtype=np.uint8)})  # 'black_thresh' 열을 생성하고 초기화합니다.

	if use_heatmap_args:  # 히트맵 인수를 사용할지 여부를 확인합니다.
		# initiate empty x,y coordinates in case not provided
		default_df_dict.update({'x1': np.empty((total)).fill(np.NaN),  # 'x1' 열을 생성하고 초기화합니다.
			'x2': np.empty((total)).fill(np.NaN),  # 'x2' 열을 생성하고 초기화합니다.
			'y1': np.empty((total)).fill(np.NaN),  # 'y1' 열을 생성하고 초기화합니다.
			'y2': np.empty((total)).fill(np.NaN)})  # 'y2' 열을 생성하고 초기화합니다.


	if isinstance(slides, pd.DataFrame):  # 슬라이드가 DataFrame인지 확인합니다.
		temp_copy = pd.DataFrame(default_df_dict)  # 데이터프레임의 복사본을 만듭니다.
		# find key in provided df
		# if exist, fill empty fields w/ default values, else, insert the default values as a new column
		for key in default_df_dict.keys():  # 각 열을 확인하면서
			if key in slides.columns:  # 만약 주어진 데이터프레임에 열이 이미 존재한다면
				mask = slides[key].isna()  # 누락된 값이 있는지 확인하고
				slides.loc[mask, key] = temp_copy.loc[mask, key]  # 누락된 값에 기본 값을 채웁니다.
			else:  # 주어진 데이터프레임에 열이 존재하지 않는 경우
				slides.insert(len(slides.columns), key, default_df_dict[key])  # 새로운 열을 추가하고 기본 값을 채웁니다.
	else:  # 슬라이드가 DataFrame이 아닌 경우
		slides = pd.DataFrame(default_df_dict)  # 새로운 데이터프레임을 생성하고 초기화합니다.
	
	return slides  # 초기화된 데이터프레임을 반환합니다.


In [None]:
    if save_patches:
        default_df_dict.update({
            'white_thresh':np.full((total), int(patch_params['white_thresh']),dtype=np.uint8),
            'black_thresh':np.full((total),int(patch_params['black_thresh']),dtype=np.uint8)
        })
    if use_heatmap_args:
        #initiate empty x, y coordinates in case not provided
        default_df_dict.update({'x1':np.empty((total)).fill(np.NaN),
                               'x2':np.empty((total)).fill(np.NaN),
                               'y1':np.empty((total)).fill(np.NaN),
                               'y2':np.empty((total)).fill(np.NaN)})
    if isinstance(slides, pd.DataFrame):
        temp_copy = pd.DataFrame(default_df_dict)
        #find key in proveded df
        for key in default_df_dict.keys():
            if key in slides.columns:
                mask = slides[key].isna()
                slides.loc[mask, key] = temp_copy.loc[mask, key]
            else:
                slides.insert(len(slides.columns),key,default_df_dict[key])
    else:
        slides=pd.DataFrame(default_df_dict)
    return slides

#이렇게 하나의 DF를 만들어내고 정의해낸다. 슬라이드로부터 정보를 얻어낸다.