1. wsi 경로 list로 만듦
2. wsi 읽음
3. patch로 쪼갬
4. patch들 h5로 저장

In [9]:
# internal imports
from wsi_core.WholeSlideImage import WholeSlideImage
from wsi_core.wsi_utils import StitchCoords
from wsi_core.batch_process_utils import initialize_df
# other imports
import os
import numpy as np
import time
import argparse
import pdb
import pandas as pd

import sys

In [10]:
def stitching(file_path, wsi_object, downscale = 64):
    start = time.time()
    heatmap = StitchCoords(file_path, wsi_object, downscale=downscale, bg_color=(0,0,0), alpha=-1, draw_grid=False)
    total_time = time.time() - start
    
    return heatmap, total_time

def segment(WSI_object, seg_params = None, filter_params = None, mask_file = None):
    ### Start Seg Timer
    start_time = time.time()
    # Use segmentation file
    if mask_file is not None:
        WSI_object.initSegmentation(mask_file)
    # Segment	
    else:
        WSI_object.segmentTissue(**seg_params, filter_params=filter_params)

    ### Stop Seg Timers
    seg_time_elapsed = time.time() - start_time   
    return WSI_object, seg_time_elapsed

def patching(WSI_object, patient_id, **kwargs):
    ### Start Patch Timer
    start_time = time.time()

    # Patch
    file_path = WSI_object.process_contours(patient_id, **kwargs)


    ### Stop Patch Timer
    patch_time_elapsed = time.time() - start_time
    return file_path, patch_time_elapsed


def seg_and_patch(source, save_dir, patch_save_dir, mask_save_dir, stitch_save_dir, 
                patch_size = 256, step_size = 256, 
                seg_params = {'seg_level': -1, 'sthresh': 8, 'mthresh': 7, 'close': 4, 'use_otsu': False,
                'keep_ids': 'none', 'exclude_ids': 'none'},
                filter_params = {'a_t':100, 'a_h': 16, 'max_n_holes':8}, 
                vis_params = {'vis_level': -1, 'line_thickness': 500},
                patch_params = {'use_padding': True, 'contour_fn': 'four_pt'},
                patch_level = 0,
                use_default_params = False, 
                seg = False, save_mask = True, 
                stitch= False, 
                patch = False, auto_skip=True, process_list = None):
    


    folders = sorted(os.listdir(source))
    slides = []
    for folder in folders:
        files = os.listdir(os.path.join(source, folder))
        for file in files:
            if file.endswith('svs'):
                slides.append(os.path.join(folder, file))
    #slides = [slide for slide in slides if os.path.isfile(os.path.join(source, slide))]

    if process_list is None:
        df = initialize_df(slides, seg_params, filter_params, vis_params, patch_params)
    
    else:
        df = pd.read_csv(process_list)
        df = initialize_df(df, seg_params, filter_params, vis_params, patch_params)

    mask = df['process'] == 1
    process_stack = df[mask]

    total = len(process_stack)

    legacy_support = 'a' in df.keys()
    if legacy_support:
        print('detected legacy segmentation csv file, legacy support enabled')
        df = df.assign(**{'a_t': np.full((len(df)), int(filter_params['a_t']), dtype=np.uint32),
        'a_h': np.full((len(df)), int(filter_params['a_h']), dtype=np.uint32),
        'max_n_holes': np.full((len(df)), int(filter_params['max_n_holes']), dtype=np.uint32),
        'line_thickness': np.full((len(df)), int(vis_params['line_thickness']), dtype=np.uint32),
        'contour_fn': np.full((len(df)), patch_params['contour_fn'])})

    seg_times = 0.
    patch_times = 0.
    stitch_times = 0.

    ###### patching start#######
    for i in range(total):
        df.to_csv(os.path.join(save_dir, 'process_list_autogen.csv'), index=False)
        idx = process_stack.index[i]
        slide = process_stack.loc[idx, 'slide_id']
        print("\n\nprogress: {:.2f}, {}/{}".format(i/total, i, total))
        print('processing {}'.format(slide))
        
        df.loc[idx, 'process'] = 0
        slide_id, _ = os.path.splitext(slide)

        if auto_skip and os.path.isfile(os.path.join(patch_save_dir, slide_id + '.h5')):
            print('{} already exist in destination location, skipped'.format(slide_id))
            df.loc[idx, 'status'] = 'already_exist'
            continue

        # Inialize WSI
        full_path = os.path.join(source, slide)
        WSI_object = WholeSlideImage(full_path)

        if use_default_params:
            current_vis_params = vis_params.copy()
            current_filter_params = filter_params.copy()
            current_seg_params = seg_params.copy()
            current_patch_params = patch_params.copy()
            
        else:
            current_vis_params = {}
            current_filter_params = {}
            current_seg_params = {}
            current_patch_params = {}


            for key in vis_params.keys():
                if legacy_support and key == 'vis_level':
                    df.loc[idx, key] = -1
                current_vis_params.update({key: df.loc[idx, key]})

            for key in filter_params.keys():
                if legacy_support and key == 'a_t':
                    old_area = df.loc[idx, 'a']
                    seg_level = df.loc[idx, 'seg_level']
                    scale = WSI_object.level_downsamples[seg_level]
                    adjusted_area = int(old_area * (scale[0] * scale[1]) / (512 * 512))
                    current_filter_params.update({key: adjusted_area})
                    df.loc[idx, key] = adjusted_area
                current_filter_params.update({key: df.loc[idx, key]})

            for key in seg_params.keys():
                if legacy_support and key == 'seg_level':
                    df.loc[idx, key] = -1
                current_seg_params.update({key: df.loc[idx, key]})

            for key in patch_params.keys():
                current_patch_params.update({key: df.loc[idx, key]})

        if current_vis_params['vis_level'] < 0:
            if len(WSI_object.level_dim) == 1:
                current_vis_params['vis_level'] = 0
            
            else:	
                wsi = WSI_object.getOpenSlide()
                best_level = wsi.get_best_level_for_downsample(64)
                current_vis_params['vis_level'] = best_level

        if current_seg_params['seg_level'] < 0:
            if len(WSI_object.level_dim) == 1:
                current_seg_params['seg_level'] = 0
            
            else:
                wsi = WSI_object.getOpenSlide()
                best_level = wsi.get_best_level_for_downsample(64)
                current_seg_params['seg_level'] = best_level

        keep_ids = str(current_seg_params['keep_ids'])
        if keep_ids != 'none' and len(keep_ids) > 0:
            str_ids = current_seg_params['keep_ids']
            current_seg_params['keep_ids'] = np.array(str_ids.split(',')).astype(int)
        else:
            current_seg_params['keep_ids'] = []

        exclude_ids = str(current_seg_params['exclude_ids'])
        if exclude_ids != 'none' and len(exclude_ids) > 0:
            str_ids = current_seg_params['exclude_ids']
            current_seg_params['exclude_ids'] = np.array(str_ids.split(',')).astype(int)
        else:
            current_seg_params['exclude_ids'] = []

        w, h = WSI_object.level_dim[current_seg_params['seg_level']] 
        if w * h > 1e8:
            print('level_dim {} x {} is likely too large for successful segmentation, aborting'.format(w, h))
            df.loc[idx, 'status'] = 'failed_seg'
            continue

        df.loc[idx, 'vis_level'] = current_vis_params['vis_level']
        df.loc[idx, 'seg_level'] = current_seg_params['seg_level']


        seg_time_elapsed = -1
        if seg:
            WSI_object, seg_time_elapsed = segment(WSI_object, current_seg_params, current_filter_params)
            if len(WSI_object.contours_tissue)==0:
                print('failed to extract contours')
                df.loc[idx, 'status'] = 'failed_seg'
                continue

        if save_mask:
            mask = WSI_object.visWSI(**current_vis_params)
            patient_id, wsi_id = slide_id.split('/')
            os.makedirs(os.path.join(mask_save_dir, patient_id), exist_ok=True)
            mask_path = os.path.join(mask_save_dir, patient_id, wsi_id+'.jpg')
            mask.save(mask_path)

        patch_time_elapsed = -1 # Default time
        if patch:
            patient_id, wsi_id = slide_id.split('/')
            os.makedirs(os.path.join(patch_save_dir, patient_id), exist_ok=True)
            current_patch_params.update({'patch_level': patch_level, 'patch_size': patch_size, 'step_size': step_size, 
                                        'save_path': patch_save_dir})
            file_path, patch_time_elapsed = patching(WSI_object = WSI_object,  patient_id=patient_id, **current_patch_params,)
        
        stitch_time_elapsed = -1
        if stitch:
            file_path = os.path.join(patch_save_dir, slide_id+'.h5')
            if os.path.isfile(file_path):
                heatmap, stitch_time_elapsed = stitching(file_path, WSI_object, downscale=64)
                patient_id, wsi_id = slide_id.split('/')
                os.makedirs(os.path.join(stitch_save_dir, patient_id), exist_ok=True)
                stitch_path = os.path.join(stitch_save_dir, patient_id, wsi_id+'.jpg')
                heatmap.save(stitch_path)

        print("segmentation took {} seconds".format(seg_time_elapsed))
        print("patching took {} seconds".format(patch_time_elapsed))
        print("stitching took {} seconds".format(stitch_time_elapsed))
        df.loc[idx, 'status'] = 'processed'

        seg_times += seg_time_elapsed
        patch_times += patch_time_elapsed
        stitch_times += stitch_time_elapsed

    seg_times /= total
    patch_times /= total
    stitch_times /= total

    df.to_csv(os.path.join(save_dir, 'process_list_autogen.csv'), index=False)
    print("average segmentation time in s per slide: {}".format(seg_times))
    print("average patching time in s per slide: {}".format(patch_times))
    print("average stiching time in s per slide: {}".format(stitch_times))
        
    return seg_times, patch_times

In [11]:
# args
step_size = 256
patch_size = 256
patch = True
seg = True
stitch = True
no_auto_skip = True
preset = None
patch_level = None
save_dir = '/shared/js.yun/data/CLAM_data/TCGA-lung-h5-patches/'
source = '/shared/j.jang/pathai/data/TCGA-lung/'
process_list = None

In [12]:
# main
patch_save_dir = os.path.join(save_dir, 'patches')
mask_save_dir = os.path.join(save_dir, 'masks')
stitch_save_dir = os.path.join(save_dir, 'stitches')

print('source: ', source)
print('patch_save_dir: ', patch_save_dir)
print('mask_save_dir: ', mask_save_dir)
print('stitch_save_dir: ', stitch_save_dir)

directories = {'source': source, 
                'save_dir': save_dir,
                'patch_save_dir': patch_save_dir, 
                'mask_save_dir' : mask_save_dir, 
                'stitch_save_dir': stitch_save_dir} 

for key, val in directories.items():
    print("{} : {}".format(key, val))
    if key not in ['source']:
        os.makedirs(val, exist_ok=True)

seg_params = {'seg_level': -1, 'sthresh': 8, 'mthresh': 7, 'close': 4, 'use_otsu': False,
                'keep_ids': 'none', 'exclude_ids': 'none'}
filter_params = {'a_t':100, 'a_h': 16, 'max_n_holes':8}
vis_params = {'vis_level': -1, 'line_thickness': 250}
patch_params = {'use_padding': True, 'contour_fn': 'four_pt'}

parameters = {'seg_params': seg_params,
                'filter_params': filter_params,
                'patch_params': patch_params,
                'vis_params': vis_params}
print('\n', parameters, '\n')

seg_times, patch_times = seg_and_patch(**directories, **parameters,
                                        patch_size = patch_size, step_size=step_size, 
                                        seg = seg,  use_default_params=False, save_mask = True, 
                                        stitch= stitch,
                                        patch_level=patch_level, patch = patch,
                                        process_list = process_list, auto_skip=no_auto_skip)


source:  /shared/j.jang/pathai/data/TCGA-lung/
patch_save_dir:  /shared/js.yun/data/CLAM_data/TCGA-lung-h5-patches/patches
mask_save_dir:  /shared/js.yun/data/CLAM_data/TCGA-lung-h5-patches/masks
stitch_save_dir:  /shared/js.yun/data/CLAM_data/TCGA-lung-h5-patches/stitches
source : /shared/j.jang/pathai/data/TCGA-lung/
save_dir : /shared/js.yun/data/CLAM_data/TCGA-lung-h5-patches/
patch_save_dir : /shared/js.yun/data/CLAM_data/TCGA-lung-h5-patches/patches
mask_save_dir : /shared/js.yun/data/CLAM_data/TCGA-lung-h5-patches/masks
stitch_save_dir : /shared/js.yun/data/CLAM_data/TCGA-lung-h5-patches/stitches

 {'seg_params': {'seg_level': -1, 'sthresh': 8, 'mthresh': 7, 'close': 4, 'use_otsu': False, 'keep_ids': 'none', 'exclude_ids': 'none'}, 'filter_params': {'a_t': 100, 'a_h': 16, 'max_n_holes': 8}, 'patch_params': {'use_padding': True, 'contour_fn': 'four_pt'}, 'vis_params': {'vis_level': -1, 'line_thickness': 250}} 



progress: 0.00, 0/1053
processing 004d8238-0a74-40bd-9547-f48a2086c

Creating patches for:  TCGA-75-7030-01Z-00-DX1.5DDF24B5-00D1-4418-A067-A9B609E15314 ...
Total number of contours to process:  2


TypeError: list indices must be integers or slices, not NoneType

# Test

In [4]:
import sys
import openslide
from PIL import Image
import h5py
import numpy as np
import os
import torch
import torchvision
from openslide.lowlevel import *
from openslide.lowlevel import _read_region as _read_region
from openslide.lowlevel import _convert
import io

def read_region(self, location, level, size):
    """Return a PIL.Image containing the contents of the region.

    location: (x, y) tuple giving the top left pixel in the level 0
                reference frame.
    level:    the level number.
    size:     (width, height) tuple giving the region size.

    Unlike in the C interface, the image data returned by this
    function is not premultiplied."""
    return lowlevel_read_region(
        self._osr, location[0], location[1], level, size[0], size[1]
    )

def lowlevel_read_region(slide, x, y, level, w, h):
    if w < 0 or h < 0:
        # OpenSlide would catch this, but not before we tried to allocate
        # a negative-size buffer
        raise OpenSlideError(
            "negative width (%d) or negative height (%d) not allowed" % (w, h)
        )
    if w == 0 or h == 0:
        # PIL.Image.frombuffer() would raise an exception
        return PIL.Image.new('RGBA', (w, h))
    buf = (w * h * c_uint32)()
    _read_region(slide, buf, x, y, level, w, h)
    return load_image_as_numpy_rgb(buf, (w, h))

def load_image_as_numpy_rgb(buf, size):
    '''buf must be a mutable buffer.'''
    arr = torch.frombuffer(buf, dtype=torch.uint8).view(size[1], size[0], 4)
    # Select RGB channels, reverse the order, and transpose       이거까지 GPU에서 하는게 나을듯
    rgb_arr = arr[..., [2, 1, 0]].permute(2, 0, 1).contiguous()
    
    # return PIL.Image.frombuffer('RGBA', size, buf, 'raw', 'RGBA', 0, 1)
    return rgb_arr



slide = openslide.open_slide('/shared/j.jang/pathai/data/TCGA-lung/00a0b174-1eab-446a-ba8c-7c6e3acd7f0c/TCGA-MN-A4N4-01Z-00-DX2.9550732D-8FB1-43D9-B094-7C0CD310E9C0.svs')
# print(slide.properties)
print(slide.associated_images)



# slide.read_region = read_region.__get__(slide)
# image_pil = slide.read_region(location=(13000,33000), level=0, size=(256, 256))
# print(type(image_pil))


# encode_image = torchvision.io.encode_jpeg(image_pil, 75)


# # HDF5 파일 생성
# file_path_pil = 'encode.hdf5'
# if os.path.exists(file_path_pil):
#     os.remove(file_path_pil)
# # file_path_jpg = 'test_jpg.hdf5'
# # if os.path.exists(file_path_jpg):
# #     os.remove(file_path_jpg)

# with h5py.File(file_path_pil, 'w') as f:
#     dset = f.create_dataset('binary_data', data=encode_image)
#     f.close()


# # HDF5 파일에서 데이터 읽어오기 (바이트로)
# with h5py.File(file_path_pil, 'r') as f:
#     binary_data = f['binary_data'][()]


# # 바이트 데이터를 JPEG로 디코딩하고 텐서로 변환
# image_pil_from_bytes = Image.open(io.BytesIO(binary_data))
# tensor_from_numpy = torch.from_numpy(binary_data)
# image_tensor = torchvision.io.decode_image(tensor_from_numpy)

# image_pil_from_bytes.show()

<_AssociatedImageMap {'thumbnail': <PIL.Image.Image image mode=RGBA size=1024x742 at 0x7F7CEAFC3110>}>
