In [1]:
# Put these at the top of every notebook, to get automatic reloading and inline plotting
%reload_ext autoreload
%autoreload 2
%matplotlib inline

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

# import warnings
# warnings.filterwarnings("ignore", category=DeprecationWarning);

import sys
sys.executable

'/home/mingrui/anaconda3/envs/py36_cu90/bin/python'

In [2]:
import os
import glob
import shutil
from random import shuffle

# plotting
import PIL
import matplotlib.pyplot as plt
from matplotlib.pyplot import imshow
from IPython.display import Image
plt.rcParams['figure.figsize'] = [15, 15]

import deephistopath.wsi.slide as slide

import py_wsi
import py_wsi.imagepy_toolkit as tk

from wsi_utils import multiprocessing_pipeline, file_stats, turtle_stats

import pandas as pd
pd.set_option('display.max_columns',None)

# Setup multi classes

In [3]:
tumor_folder_names = [['TCGA_Glioma'], ['Meningiomas', 'CGGA_Meningioma'], ['TCGA_Lung'], ['TCGA_Kidney'], 
                      ['TCGA_Breast'], ['TCGA_Uterus'], ['TCGA_Colorectal'], ['TCGA_WSI_glioma', 'TCGA_Glioma_Frozen'], 
                      ['Ependymoma_Craniopharyngioma', 'CGGA_EC']]
tumor_type_list = []

SYNOLOGY_DIR = '/media/pathimg/'
#PATH_NET_DIR = '/media/disk2/PathologyNet'
PATH_NET_DIR = './data/'

for tumor in tumor_folder_names:
    # use first as origin folder name
    tumor_dir = os.path.join(SYNOLOGY_DIR, tumor[0])
    # use last as destination folder name
    db_dir = os.path.join(PATH_NET_DIR, tumor[-1])
    # name db with destination folder name
    db_name = tumor[-1] + '_patch_db'
    # create db dir if doesn't exist at destination
    if os.path.exists(db_dir):
        shutil.rmtree(db_dir)
        os.mkdir(db_dir)
    else:
        os.mkdir(db_dir)
    tumor_type_list.append([tumor_dir, db_dir, db_name])

# TCGA_UTERUS_DIR

# TCGA_BREAST_DIR

# TCGA_COLORECTAL

# TCGA_GLIOMA_FROZEN_DIR

# # Ependymoma Craniopharyngioma
# CGGA_EC_DIR

print(tumor_type_list)

[['/media/pathimg/TCGA_Glioma', './data/TCGA_Glioma', 'TCGA_Glioma_patch_db'], ['/media/pathimg/Meningiomas', './data/CGGA_Meningioma', 'CGGA_Meningioma_patch_db'], ['/media/pathimg/TCGA_Lung', './data/TCGA_Lung', 'TCGA_Lung_patch_db'], ['/media/pathimg/TCGA_Kidney', './data/TCGA_Kidney', 'TCGA_Kidney_patch_db'], ['/media/pathimg/TCGA_Breast', './data/TCGA_Breast', 'TCGA_Breast_patch_db'], ['/media/pathimg/TCGA_Uterus', './data/TCGA_Uterus', 'TCGA_Uterus_patch_db'], ['/media/pathimg/TCGA_Colorectal', './data/TCGA_Colorectal', 'TCGA_Colorectal_patch_db'], ['/media/pathimg/TCGA_WSI_glioma', './data/TCGA_Glioma_Frozen', 'TCGA_Glioma_Frozen_patch_db'], ['/media/pathimg/Ependymoma_Craniopharyngioma', './data/CGGA_EC', 'CGGA_EC_patch_db']]


In [4]:
def process_tumor_type(tumor_dir, db_dir, db_name, num_to_process):
    turtle = py_wsi.Turtle(tumor_dir, db_dir, db_name)
    turtle_stats(turtle)
#     file_stats(turtle.files_path[0])
    files_path = turtle.files_path[:num_to_process]
    files_name = turtle.files[:num_to_process]
    
    multiprocessing_pipeline(tumor_dir, db_dir, files_path, files_name, tile_size=1024, zoom_level=0, num_tiles=50)

In [7]:
%%capture cap
print('start')
with open('output.txt', 'w') as f:
    f.write(cap.stdout)
    
for tumor_type in tumor_type_list:
    process_tumor_type(tumor_type[0], tumor_type[1], tumor_type[2], 28)
    print('finished processing', tumor_type[0])
    with open('output.txt', 'w') as f:
        f.write(cap.stdout)

# Move files to training folders

In [17]:
%%capture cap
for tumor_type in tumor_type_list:
    tumor_name = tumor_type[2][:-9]
    tumor_db_dir = tumor_type[1]

    tumor_tile_dir = os.path.join(tumor_db_dir, 'tiles_png')

    CLASSIFICATION_DATA_DIR = './data/multiclass_data'
    train_dir = os.path.join(CLASSIFICATION_DATA_DIR, 'train')
    val_dir = os.path.join(CLASSIFICATION_DATA_DIR, 'val')

    train_tumor_dir = os.path.join(train_dir, tumor_name)
    val_tumor_dir = os.path.join(val_dir, tumor_name)

    if not os.path.exists(train_tumor_dir):
        os.mkdir(train_tumor_dir)

    if not os.path.exists(val_tumor_dir):
        os.mkdir(val_tumor_dir)

    all_tumor_png = glob.glob(os.path.join(tumor_tile_dir, '*', '*'), recursive=True)
    len(all_tumor_png)
    shuffle(all_tumor_png)
    
    for img in all_tumor_png[:1120]:
        shutil.move(img, train_tumor_dir)
    for img in all_tumor_png[1120:]:
        shutil.move(img, val_tumor_dir)