# Patch Generator

In [1]:
from utils import getLabel, getWSI, getRegionFromSlide
import numpy as np
import openslide
import cv2
import matplotlib.pyplot as plt
from sklearn.utils import shuffle
from itertools import cycle
import json

In [11]:
def patch_generator(folder, all_patch_list, 
                    det_patch_list, batch_size=64, 
                    detection_ratio=0.5, levels=[0,1,2],
                    dims=(512,512)):
    '''
    Returns (via yields) the sample image patch and corresponding ground truth mask, in given batch_size, using
    one level in levels list per patch with equal probability
    '''
    
    
    true_batch = int(detection_ratio * batch_size)+1
    all_batch_size = batch_size - true_batch
    
    print('true_batch_size: {} \t all_batch_size: {}'.format(true_batch, all_batch_size))
    
    while 1:
        all_patch_list = shuffle(all_patch_list)
        det_patch_list = shuffle(det_patch_list)
        
        det_patch_list_cycle = cycle(det_patch_list)
        
        for offset in range(0,len(all_patch_list),all_batch_size):
            
            ## Get file and coords list from each patch list and combine them
            all_samples = all_patch_list[offset:offset+all_batch_size]
            true_sample = []
            count = 0
            for sample in det_patch_list_cycle:
                true_sample.append(sample)
                count += 1
                if count>=true_batch:
                    break
            combined_sample_list = all_samples
            combined_sample_list.extend(true_sample)
            
            combined_sample_list = shuffle(combined_sample_list)
            
            patch = []
            ground_truth = []
            
            for sample in combined_sample_list:
                filename = folder + sample[0]
                coords = sample[1]
                level = levels[np.random.randint(0, len(levels), dtype=np.int8)]
                patch.append(getRegionFromSlide(getWSI(filename), level=level, start_coord=coords, dims=dims))
                
                ground_truth.append(getLabel(filename,level,coords,dims))
                
                print('Level used: {}'.format(level))
                
            X_train = np.array(patch)
            y_train = np.array(ground_truth)
            
            yield shuffle(X_train, y_train)

## Test patch generation

In [3]:
with open('all_patch_list.json', 'rb') as f :
    all_patch_list = json.load(f)['list']
    

In [4]:
with open('./detections_patch_list.json', 'rb') as f :
    detections_patch_list = json.load(f)['list']

In [5]:
all_patch_list = shuffle(all_patch_list)
detections_patch_list =shuffle(detections_patch_list)
all_patch_list_short = all_patch_list[:10]
detections_patch_list_short = detections_patch_list[:10]

In [12]:
count = 0
batches = 5
for X_train, y_train in patch_generator('/home/mak/PathAI/slides/', 
                                        all_patch_list_short, 
                                        detections_patch_list_short, 
                                        batch_size=4):
    if count >= batches:
        print('completed')
        break
    print('---------------Count: {} -----------------\nX_train avg/shape: {} {}\ny_train sum/shape: {} {}'.format(count, np.average(X_train, (1,2,3)), X_train.shape, np.sum(y_train, (1,2,3)), y_train.shape))
    count += 1
    

true_batch_size: 3 	 all_batch_size: 1
Level used: 0
Level used: 0
Level used: 0
Level used: 1
---------------Count: 0 -----------------
X_train avg/shape: [203.44775391 153.75242488 233.09274801 202.17811457] (4, 512, 512, 3)
y_train sum/shape: [262144. 262144.      0. 262144.] (4, 512, 512, 1)
Level used: 1
Level used: 2
Level used: 2
Level used: 1
---------------Count: 1 -----------------
X_train avg/shape: [232.49019877 151.93796031 188.54871496 190.37866211] (4, 512, 512, 3)
y_train sum/shape: [     0. 262144.   9407. 259848.] (4, 512, 512, 1)
Level used: 1
Level used: 1
Level used: 0
Level used: 2
---------------Count: 2 -----------------
X_train avg/shape: [162.32260386 182.96707662 216.46680832 160.92891439] (4, 512, 512, 3)
y_train sum/shape: [262144. 254539.      0. 262144.] (4, 512, 512, 1)
Level used: 0
Level used: 0
Level used: 0
Level used: 2
---------------Count: 3 -----------------
X_train avg/shape: [203.87278493 203.44775391 224.19379807 147.44900386] (4, 512, 512, 3)

In [7]:
all_patch_list_short

[['patient_046_node_2.tif', [38656, 9216]],
 ['patient_063_node_2.tif', [71238, 133251]],
 ['patient_046_node_0.tif', [84224, 8448]],
 ['patient_051_node_3.tif', [15872, 37376]],
 ['patient_043_node_1.tif', [22336, 1664]],
 ['patient_054_node_3.tif', [77312, 79616]],
 ['patient_047_node_2.tif', [58752, 20480]],
 ['patient_014_node_1.tif', [67907, 100451]],
 ['patient_047_node_2.tif', [68480, 60672]],
 ['patient_049_node_3.tif', [46592, 13568]]]