In [None]:
import glob
import numpy as np
import cv2

In [None]:
# reading data
patches = glob.glob('./data/**/*.png', recursive=True)
class0 = []
class1 = []
for name in glob.glob('./data/**/*class0.png', recursive=True):
    class0.append(name)
for name in glob.glob('./data/**/*class1.png', recursive=True):
    class1.append(name)

In [None]:
def prepare_data_pad():
    """ divides data into images and labels
        pads images that are smaller than 50x50 with white (note: this isn't zero-padding,
        decided against black, because cancer parts of the images are dark)
        Min-Max scales all images 
    Returns:
        scaled_X : array with padded images
        y : array with corresponding labels
        (0 for non-IDC, 1 for IDC)
    """
    X = []
    y = []
    WIDTH = 50
    HEIGHT = 50
    for img_name in patches:
        image = cv2.imread(img_name)
        orig_height, orig_width, _ = image.shape
        if (orig_height != 50 or orig_width != 50):
            height_diff = HEIGHT - orig_height
            width_diff = WIDTH - orig_width
            top = height_diff // 2 # floor division
            bottom = height_diff - top
            left = width_diff // 2
            right = width_diff - left
            constant = cv2.copyMakeBorder(image, top, bottom, left, right, cv2.BORDER_CONSTANT, value=[255,255,255])
            X.append(constant)
        else:
            X.append(image)
        y.append(0) if img_name in class0 else y.append(1)
    scaled_X = np.array(X)/255.0
    return scaled_X, y

In [None]:
def prepare_data_resize():
    """ divide initial data into images and labels
        Interpolates images smaller than 50x50
        Min-Max scaling all images
    Returns:
        scaled_X : array with resized images
        y : array with corresponding labels
        (0: non-IDC, 1: IDC)
    """
    X = []
    y = []
    WIDTH = 50
    HEIGHT = 50
    for img_name in patches:
        image = cv2.imread(img_name)
        image_resized = cv2.resize(image, (WIDTH,HEIGHT), interpolation=cv2.INTER_CUBIC) 
        X.append(image_resized)
        if img_name in class0:
            y.append(0)
        else:
            y.append(1)
    scaled_X = np.array(X)/255.0
    return scaled_X, y

In [None]:
X_pad, y = prepare_data_pad()
X_inter, _ = prepare_data_resize()
np.save('padded_imgs.npy', X_pad) 
np.save('interpolated_imgs.npy', X_inter)
np.save('labels.npy', y)