In [None]:
import numpy as np
import json
import os
import cv2
import copy
import sklearn.metrics as metric
import pandas as pd

from tensorflow.keras import layers
from tensorflow.keras.models import Model, load_model
from tensorflow.keras import backend as K
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint 

import matplotlib.pyplot as plt
import matplotlib

In [4]:
EPOCHS = 50
INIT_LR = 1e-3
BS = 16
IMAGE_SIZE = (256, 256)
checkpoint_path = "models/training_1_40_binary_cp/cp.ckpt"
feature_path = "models/training_1_40_binary_feature.json"
model_path = "models/training_1.h5"
history_path = "models/training_1_40_binary_history.json"
magnification = "40X"
base_dataset = "images/binary_scenario"
class_dir = ['benign', 'malignant']

In [5]:
print("[INFO] indexing file images BreaKHis dataset...")
type_dataset = ['val', 'train']
dataset_train = []
dataset_val = []
for type_set in type_dataset:
    for class_item in class_dir:
        cur_dir = os.path.join(base_dataset, type_set, magnification ,class_item)
        for file in os.listdir(cur_dir):
            if type_set == 'train':
                dataset_train.append(os.path.join(cur_dir, file))
            else:
                dataset_val.append(os.path.join(cur_dir, file))

print("[INFO] load images BreaKHis dataset...")
#  load images
train_images = []
val_images = []
for type_set in type_dataset:
    cur_dataset = dataset_train if type_set == 'train' else dataset_val
    for image_path in cur_dataset:
        if ".png" in image_path:
            image = cv2.imread(image_path)
            image = cv2.resize(image, IMAGE_SIZE)
            if type_set == 'train':
                train_images.append(image)
            else:
                val_images.append(image)

[INFO] indexing file images BreaKHis dataset...
[INFO] load images BreaKHis dataset...


In [6]:
# normalization
print("[INFO] normalization...")
train_x = np.array(train_images).astype("float32") / 255.0
val_x = np.array(val_images).astype("float32") / 255.0

[INFO] normalization...


In [7]:
class ConvAutoEncoder:
    """
    Core Convolutional AutoEncoder
    This class included with build method you can adjust of width height and depth of image
    default filters 128, and latent dimension 48
    """
    @staticmethod
    def build(width, height, depth, filters=(128,), latent_dim=48):
        input_shape = (height, width, depth)
        channel_dim = -1
        inputs = layers.Input(shape=input_shape)
        x = inputs
        # Encoder layer
        for f in filters:
            x = layers.Conv2D(f, (3, 3), strides=2, padding='same')(x)
            x = layers.LeakyReLU(alpha=0.2)(x)
            x = layers.BatchNormalization(axis=channel_dim, name='enc_filter_' + str(f))(x)
        volume_size = K.int_shape(x)
        x = layers.Flatten()(x)
        # Latent layer
        latent = layers.Dense(latent_dim, name="encoded")(x)
        # Decoder layer
        x = layers.Dense(np.prod(volume_size[1:]))(latent)
        x = layers.Reshape((volume_size[1], volume_size[2], volume_size[3]))(x)
        # Reverse on decoder
        for f in filters[::-1]:
            x = layers.Conv2DTranspose(f, (3, 3), strides=2, padding='same')(x)
            x = layers.LeakyReLU(alpha=0.2)(x)
            x = layers.BatchNormalization(axis=channel_dim, name='dec_filter_' + str(f))(x)
        x = layers.Conv2DTranspose(depth, (3, 3), padding="same")(x)
        outputs = layers.Activation("sigmoid", name="decoded")(x)
        auto_encoder = Model(inputs, outputs, name="auto_encoder")
        return auto_encoder

In [8]:
auto_encoder = load_model(model_path, compile=False)
# load our auto_encoder from disk
print("[INFO] loading auto encoder model...")
auto_encoder.load_weights(model_path)

# create the encoder model which consists of *just* the encoder
# portion of the auto encoder
encoder = Model(inputs=auto_encoder.input,
	outputs=auto_encoder.get_layer("encoded").output)

# quantify the contents of our input images using the encoder
print("[INFO] encoding images...")
features = encoder.predict(train_x)



[INFO] loading auto encoder model...
[INFO] encoding images...
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 261ms/step


In [9]:

indexes = list(range(0, train_x.shape[0]))
features_array = [[float(x) for x in y] for y in features]
labels = [path.split("\\")[3] for path in dataset_train]
data = {"indexes": indexes, "features": features_array, "locations": dataset_train, "labels":labels}
     

In [10]:
def euclidean(a, b):
	# compute and return the euclidean distance between two vectors
	return np.linalg.norm(a - b)

In [None]:
def perform_search(query_features, indexed_train, max_results=5):
	retrieved = []
	for idx in range(0, len(indexed_train["features"])):
		distance = euclidean(query_features, indexed_train["features"][idx])
		retrieved.append((distance, idx))
	retrieved = sorted(retrieved)[:max_results]
	return retrieved

In [35]:
print("[INFO] indexing file images BreaKHis dataset...")
# indexing file images
dataset = []
for class_item in class_dir:
    cur_dir = os.path.join(base_dataset, 'test', magnification, class_item)
    for file in os.listdir(cur_dir):
        dataset.append(os.path.join(cur_dir, file))

print("test len to retrieving:", len(dataset))
print("[INFO] load test images BreaKHis dataset...")
#  load images
images = []
for image_path in dataset:
    if ".png" in image_path:
        image = cv2.imread(image_path)
        image = cv2.resize(image, IMAGE_SIZE)
        images.append(image)

[INFO] indexing file images BreaKHis dataset...
test len to retrieving: 199
[INFO] load test images BreaKHis dataset...


In [36]:
# normalization
print("[INFO] normalization...")
test_x = np.array(images).astype("float32") / 255.0

[INFO] normalization...


In [37]:
from collections import Counter

def most_common(arr):
    print(arr)
    print("-"*100)
    return Counter(arr).most_common(1)[0][0]

In [39]:
with open(feature_path) as f:
  training_indexed = json.load(f)

query_indexes = list(range(0, test_x.shape[0]))
label_builder = list(np.unique(training_indexed["labels"]))
class_builder = {label_unique:[] for label_unique in label_builder}
recalls = copy.deepcopy(class_builder)
precisions = copy.deepcopy(class_builder)
# loop over the testing indexes
for i in query_indexes:
    queryFeatures = features[i]
    results = perform_search(queryFeatures, training_indexed, max_results=5)
    # Las etiquetas de los más cercanos
    labels_ret = [training_indexed["labels"][r[1]] for r in results]
    print(most_common(labels_ret))
    # label_true = dataset[i].split("\\")[3]
    # label_trues = [label_true for _ in labels_ret]
    # recall = metric.recall_score(label_trues, labels_ret, average='weighted')
    # precision = metric.precision_score(label_trues, labels_ret, average='weighted')
    # recalls[label_true].append(recall)
    # precisions[label_true].append(precision)

[(np.float64(3517.63174203032), 986), (np.float64(3517.63174203032), 1093), (np.float64(3535.358604168788), 1309), (np.float64(3568.9960607042667), 957), (np.float64(3591.713578598242), 1254)]
['malignant', 'malignant', 'malignant', 'malignant', 'malignant']
----------------------------------------------------------------------------------------------------
malignant
[(np.float64(3651.8569780147836), 986), (np.float64(3651.8569780147836), 1093), (np.float64(3729.838885484723), 957), (np.float64(3772.806048286515), 1262), (np.float64(3789.216120594741), 67)]
['malignant', 'malignant', 'malignant', 'malignant', 'benign']
----------------------------------------------------------------------------------------------------
malignant
[(np.float64(3750.555374780561), 986), (np.float64(3750.555374780561), 1093), (np.float64(3868.9364606983463), 957), (np.float64(3886.89031951345), 1309), (np.float64(3903.3405300853447), 991)]
['malignant', 'malignant', 'malignant', 'malignant', 'malignant']
--

In [15]:
print("recall values:")
comb_recall, comb_precision = [], []
for key in recalls.keys():
    average_val = np.average(recalls[key])
    print(key, average_val)
    comb_recall.append(average_val)
print("combined recall", np.average(comb_recall))

print("\nprecision values:")
for key in precisions.keys():
    average_val = np.average(precisions[key])
    print(key, average_val)
    comb_precision.append(average_val)
print("combined precision", np.average(comb_precision))

recall values:
benign 0.05161290322580645
malignant 0.9576642335766422
combined recall 0.5046385684012243

precision values:
benign 0.25806451612903225
malignant 1.0
combined precision 0.6290322580645161
