The tif images are all very large and cannot be fed directly into the neural network model for training. So I plan to cut the images into small patches for training. There is also a lot of white background in the images that should be removed.

(tif图像都很大，无法直接输入神经网络模型进行训练。所以把图片切割成小patch进行训练。图片中还有很多白色背景应该剔除。)

**Upvote if it help ( 请给我一票如果有用)**

### IMPORT (导入)

In [None]:
import sys
import os
import cv2
import pandas as pd
import numpy as np
import openslide
from skimage.filters import threshold_otsu
import matplotlib.pyplot as plt

## HELPER (一些函数)

In [None]:
def get_img_path(img_id, test=False):
    if test:
        return f'../input/mayo-clinic-strip-ai/test/{img_id}.tif'
    return f'../input/mayo-clinic-strip-ai/train/{img_id}.tif'

# 进行前景背景分离 Separate foreground backgrounds
def ostu(img):
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    thre = threshold_otsu(gray)
    ret1, th =  cv2.threshold(gray,thre,255,cv2.THRESH_BINARY_INV)
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
    th = cv2.morphologyEx(th.astype(np.uint8), cv2.MORPH_CLOSE, kernel)
    th[th>0] = 1
    th[gray==0] = 0
    return th

# 利用reshape把图片分为size大小的patch
# split img to patch by np.reshape
def gray2patch(img, size=256):
    h, w = img.shape
    nh, nw = h - h%size, w - w%size
    img = img[:nh, :nw]
    h,w= img.shape
    d_h=int(h/size)
    d_w=int(w/size)
    img = img.reshape((d_h, h//d_h, d_w, w//d_w))
    return img.transpose([0,2,1,3])
def img2patch(img, patch_size=256):
    img_r = gray2patch(img[:,:,0])
    img_g = gray2patch(img[:,:,1])
    img_b = gray2patch(img[:,:,2])
    return np.stack((img_r, img_g, img_b), axis=-1)

# 检验二值化msk中前景(=1)像素点的数量是否满足阈值
# Check if the number of foreground (=1) pixel points 
# in msk satisfies the threshold
def check_msk(msk, threshold=0.6):
    if msk.sum()/(msk.size) > threshold:
        return True
    return False

In [None]:
df_train = pd.read_csv('../input/mayo-clinic-strip-ai/train.csv')

# Show an example (展示一个例子)

split img to patch by np.reshape(把图片用reshape切割为patch)

In [None]:
img_id = df_train.image_id[4]

In [None]:
with openslide.OpenSlide(get_img_path(img_id)) as wsi:
    img_RGB = np.transpose(np.array(wsi.read_region((0, 0), 0,
                       wsi.level_dimensions[0]).convert('RGB')),
                       axes=[1, 0, 2])
th = ostu(img_RGB)
plt.imshow(th, cmap='gray')

In [None]:
msk_patches = gray2patch(th)
plt.imshow(msk_patches[35,25,...], cmap='gray')

In [None]:
img_patches = img2patch(img_RGB)
plt.imshow(img_patches[35,25,...])

In [None]:
os.makedirs('train', exist_ok=True)
h, w, _, _, _ = img_patches.shape
# 如果切割的遮罩满足一定阈值，就保留该图片
# If an image corresponds to a mask that meets a certain threshold, image is saved.
count = 0
for i in range(h):
    for j in range(w):
        if check_msk(msk_patches[i, j, ...], threshold=0.6):
            np.save(f'./train/{img_id}_{count}.npy', img_patches[i, j, ...])
            count += 1
print(f'Image {img_id} convert to {count} patch.')

In [None]:
patch = np.load('./train/026c97_0_0.npy')
plt.imshow(patch)