In [1]:
import warnings
warnings.filterwarnings('always')
warnings.filterwarnings('ignore')

In [2]:
import numpy as np
import glob
import cv2

In [3]:
COLOR_LO = 0.7
COLOR_HI = 1.3

def get_file_name_dir(rootDir, ext):
 
    # Return file names with certain extension

    return glob.glob1(rootDir, "*." + ext)



def normalize_staining(img):
    """
    Adopted from "Classification of breast cancer histology images using Convolutional Neural Networks",
    Teresa Araújo , Guilherme Aresta, Eduardo Castro, José Rouco, Paulo Aguiar, Catarina Eloy, António Polónia,
    Aurélio Campilho. https://doi.org/10.1371/journal.pone.0177544

    Performs staining normalization.

    # Arguments
        img: Numpy image array.
    # Returns
        Normalized Numpy image array.
    """
    Io = 240
    beta = 0.15
    alpha = 1
    HERef = np.array([[0.5626, 0.2159],
                      [0.7201, 0.8012],
                      [0.4062, 0.5581]])
    maxCRef = np.array([1.9705, 1.0308])

    h, w, c = img.shape
    img = img.reshape(h * w, c)
    OD = -np.log((img.astype("uint16") + 1) / Io)
    ODhat = OD[(OD >= beta).all(axis=1)]
    W, V = np.linalg.eig(np.cov(ODhat, rowvar=False))

    Vec = -V.T[:2][::-1].T  # desnecessario o sinal negativo
    That = np.dot(ODhat, Vec)
    phi = np.arctan2(That[:, 1], That[:, 0])
    minPhi = np.percentile(phi, alpha)
    maxPhi = np.percentile(phi, 100 - alpha)
    vMin = np.dot(Vec, np.array([np.cos(minPhi), np.sin(minPhi)]))
    vMax = np.dot(Vec, np.array([np.cos(maxPhi), np.sin(maxPhi)]))
    if vMin[0] > vMax[0]:
        HE = np.array([vMin, vMax])
    else:
        HE = np.array([vMax, vMin])

    HE = HE.T
    Y = OD.reshape(h * w, c).T

    C = np.linalg.lstsq(HE, Y)
    maxC = np.percentile(C[0], 99, axis=1)

    C = C[0] / maxC[:, None]
    C = C * maxCRef[:, None]
    Inorm = Io * np.exp(-np.dot(HERef, C))
    Inorm = Inorm.T.reshape(h, w, c).clip(0, 255).astype("uint8")

    return Inorm


def hematoxylin_eosin_aug(img, low=0.7, high=1.3, seed=None):
    """
    "Quantification of histochemical staining by color deconvolution"
    Arnout C. Ruifrok, Ph.D. and Dennis A. Johnston, Ph.D.
    http://www.math-info.univ-paris5.fr/~lomn/Data/2017/Color/Quantification_of_histochemical_staining.pdf

    Performs random hematoxylin-eosin augmentation

    # Arguments
        img: Numpy image array.
        low: Low boundary for augmentation multiplier
        high: High boundary for augmentation multiplier
    # Returns
        Augmented Numpy image array.
    """
    D = np.array([[1.88, -0.07, -0.60],
                  [-1.02, 1.13, -0.48],
                  [-0.55, -0.13, 1.57]])
    M = np.array([[0.65, 0.70, 0.29],
                  [0.07, 0.99, 0.11],
                  [0.27, 0.57, 0.78]])
    Io = 240

    h, w, c = img.shape
    OD = -np.log10((img.astype("uint16") + 1) / Io)
    C = np.dot(D, OD.reshape(h * w, c).T).T
    r = np.ones(3)
    r[:2] = np.random.RandomState(seed).uniform(low=low, high=high, size=2)
    img_aug = np.dot(C, M) * r

    img_aug = Io * np.exp(-img_aug * np.log(10)) - 1
    img_aug = img_aug.reshape(h, w, c).clip(0, 255).astype("uint8")
    return img_aug

def process_image(image_file):
    
#     img = cv2.imread(image_file)
    img = cv2.cvtColor(image_file, cv2.COLOR_BGR2RGB)
    img_norm = normalize_staining(img)
    img_aug = hematoxylin_eosin_aug(img_norm, low=COLOR_LO, high=COLOR_HI)

    return img_aug

In [103]:
input_path = '02.Patched-data/train_patched/Normal/'
input_extension = 'png'

inputs_files = sorted(get_file_name_dir(input_path, input_extension))

cont = 1

for file_name in inputs_files:
    print('Preprocessing: ' + file_name + ': ' + str(cont))
    cont += 1
 
    img = input_path + file_name
    im_in = cv2.imread(img)
    image = process_image(im_in)

    cv2.imwrite(input_path +file_name , image)

Preprocessing: n091_patch003.png: 1
Preprocessing: n091_patch004.png: 2
Preprocessing: n091_patch005.png: 3
Preprocessing: n091_patch006.png: 4
Preprocessing: n091_patch007.png: 5
Preprocessing: n091_patch008.png: 6
Preprocessing: n091_patch009.png: 7
Preprocessing: n091_patch010.png: 8
Preprocessing: n091_patch011.png: 9
Preprocessing: n091_patch012.png: 10
Preprocessing: n091_patch013.png: 11
Preprocessing: n091_patch014.png: 12
Preprocessing: n091_patch015.png: 13
Preprocessing: n091_patch016.png: 14
Preprocessing: n091_patch017.png: 15
Preprocessing: n091_patch018.png: 16
Preprocessing: n091_patch019.png: 17
Preprocessing: n091_patch020.png: 18
Preprocessing: n091_patch021.png: 19
Preprocessing: n091_patch022.png: 20
Preprocessing: n091_patch023.png: 21
Preprocessing: n091_patch024.png: 22
Preprocessing: n091_patch025.png: 23
Preprocessing: n091_patch026.png: 24
Preprocessing: n091_patch027.png: 25
Preprocessing: n091_patch028.png: 26
Preprocessing: n091_patch029.png: 27
Preprocess

Preprocessing: n098_patch012.png: 220
Preprocessing: n098_patch013.png: 221
Preprocessing: n098_patch014.png: 222
Preprocessing: n098_patch015.png: 223
Preprocessing: n098_patch016.png: 224
Preprocessing: n098_patch017.png: 225
Preprocessing: n098_patch018.png: 226
Preprocessing: n098_patch019.png: 227
Preprocessing: n098_patch020.png: 228
Preprocessing: n098_patch021.png: 229
Preprocessing: n098_patch022.png: 230
Preprocessing: n098_patch023.png: 231
Preprocessing: n098_patch024.png: 232
Preprocessing: n098_patch025.png: 233
Preprocessing: n098_patch026.png: 234
Preprocessing: n098_patch027.png: 235
Preprocessing: n098_patch028.png: 236
Preprocessing: n098_patch029.png: 237
Preprocessing: n098_patch030.png: 238
Preprocessing: n098_patch031.png: 239
Preprocessing: n098_patch032.png: 240
Preprocessing: n098_patch033.png: 241
Preprocessing: n098_patch034.png: 242
Preprocessing: n098_patch035.png: 243
Preprocessing: n099_patch001.png: 244
Preprocessing: n099_patch002.png: 245
Preprocessin

In [6]:
import os

data_path = '02.Patched-data/'

for root,dirs,files in os.walk(data_path):
    print (root, len(files))

02.Patched-data/ 0
02.Patched-data/validation_patched 0
02.Patched-data/validation_patched/InSitu 700
02.Patched-data/validation_patched/Invasive 700
02.Patched-data/validation_patched/Normal 700
02.Patched-data/validation_patched/Benign 700
02.Patched-data/test_patched 0
02.Patched-data/test_patched/InSitu 700
02.Patched-data/test_patched/Invasive 700
02.Patched-data/test_patched/Normal 700
02.Patched-data/test_patched/Benign 700
02.Patched-data/train_patched 0
02.Patched-data/train_patched/InSitu 2100
02.Patched-data/train_patched/Invasive 2100
02.Patched-data/train_patched/Normal 2100
02.Patched-data/train_patched/Benign 2100
