# Libraries

In [None]:
import os 

os.environ["CUDA_VISIBLE_DEVICES"] = str(0);

import tensorflow as tf
import numpy as np
import pandas as pd
import json
import h5py

import cv2

from tqdm import tqdm

import matplotlib.pyplot as plt
import matplotlib.image as mpimg

from keras.callbacks import EarlyStopping, TensorBoard


from sklearn.model_selection import train_test_split

# Functions to read and get the mask of the data

In [None]:
string_h5py = h5py.special_dtype(vlen=str)

def createH5PYImages():
    files = []
    with h5py.File("../data/dataset.hdf5", "w") as hdf:
        benign_group = hdf.create_group("benign_images")
        malignant_group = hdf.create_group("malignant_images")
        
        for r, d, f in os.walk(path):
            for file in tqdm(f):
                if '.json' in file:
                    paths = os.path.join(r,file)
                    with open(paths) as json_file:
                        data_json = json.load(json_file)
                        if data_json["meta"]["clinical"]["benign_malignant"] == "malignant":
                            data = image.load_img(paths[:-4] + "jpg", target_size=(500,500))
                            malignant_group.create_dataset(file[:-4]+"jpg", data=data)
                        else:
                            data = image.load_img(paths[:-4] + "jpg", target_size=(500,500))
                            
                            benign_group.create_dataset(file[:-4]+"jpg", data=data) 
                            
                            
def createMaskedDB(benign_images, malignant_images):
    with h5py.File("../data/dataset.hdf5", "r+") as hdf:
        benign_group = hdf.create_group("benign_masked")
        malignant_group = hdf.create_group("malignant_masked")
        
        for i in tqdm(range(0, len(benign_images))):
            benign_group.create_dataset("benign_"+str(i), data = benign_images[i])
        
        for i in tqdm(range(0, len(malignant_images))):
            malignant_group.create_dataset("malignant_"+str(i), data = malignant_images[i])
            

def applyMask(mask, images):
    images_masked = []
    for i in range(0, len(images)):
        result = images[i].copy()
        result[mask[i] != 0] = (0,0,0)
        images_masked.append(result)
    return images_masked



def getMask(images_list):
    images_thresholded = []
    for i in range(0, len(images_list)):
        image_gray = cv2.cvtColor(images_list[i], cv2.COLOR_BGR2GRAY)
        image = cv2.medianBlur(image_gray, 5)
        ret, th1 = cv2.threshold(image, 127, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
        images_thresholded.append(th1)
    return images_thresholded
           
def readImages():
    files = []
    malignant_images = []
    benign_images = []
    for r, d, f in os.walk(path):
        for file in tqdm(f):
            if '.json' in file:
                paths = os.path.join(r,file)
                with open(paths) as json_file:
                    data_json = json.load(json_file)
                    if data_json["meta"]["clinical"]["benign_malignant"] == "malignant":
                        img = mpimg.imread(paths[:-4] + "jpg")
                        img = cv2.resize(img, (500,500))
                        malignant_images.append(img)
                    else:
                        img = mpimg.imread(paths[:-4] + "jpg")
                        img = cv2.resize(img, (500,500))
                        benign_images.append(img)
    return malignant_images, benign_images

# Get images from h5py file

In [None]:
path = 'data/'
malignant_images, benign_images = readImages()

# Experiments for getting the masks

In [None]:
def createMask(image, criteria, K, kernel):
    image_first = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    image_dilated = cv2.dilate(image_first, kernel, iterations=2)

    image = image_dilated.reshape((-1, 1))
    image = np.float32(image)
    ret,label,center=cv2.kmeans(image,K,None,criteria,10,cv2.KMEANS_RANDOM_CENTERS)

    center = np.uint8(center)
    res = center[label.flatten()]
    res2 = res.reshape((image_first.shape))
    
    ret, th1 = cv2.threshold(res2, 127, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
    
    return th1

In [None]:
kernel = np.ones((5,5), np.uint8)
criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 10, 1.0)
K = 2
i=1
result = []
for image in tqdm(malignant_images):
    result.append(createMask(image, criteria, K, kernel))

masked_images = applyMask(result, malignant_images)
for i in range(0, len(masked_images)):
    plt.imshow(masked_images[i])
    plt.show()
    plt.imshow(malignant_images[i])
    plt.show()