## Depedency Library

In [1]:
import numpy as np
import json
import os
import glob
import cv2
import copy
import sklearn.metrics as metric
from sklearn.model_selection import train_test_split
import pandas as pd

import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras.models import Model
from tensorflow.keras import backend as K
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint 

import matplotlib.pyplot as plt
import matplotlib

## Definition of hyperparameter


In [2]:
EPOCHS = 20
INIT_LR = 1e-3
BS = 16
IMAGE_SIZE = (256, 256)

magnification = "400X"
class_type = "binary" #multiclass
if class_type == "binary":
    class_dir = ['benign', 'malignant']
else:
    class_dir = ['tubular_adenoma', 'phyllodes_tumor', 'papillary_carcinoma',
              'mucinous_carcinoma', 'lobular_carcinoma', 'fibroadenoma',
              'ductal_carcinoma', 'adenosis']
    
checkpoint_path = "training_1_40_binary_cp/cp.ckpt"
feature_path = "training_1_40_binary_feature.json"
model_path = "training_1_40_binary_model.h5"
history_path = "training_1_40_binary_history.json"

In [3]:
paths, labels = [], []

# /kaggle/input/breakhis/BreaKHis_v1/BreaKHis_v1/histology_slides/breast/benign/SOB/adenosis/SOB_B_A_14-22549AB/100X/SOB_B_A-14-22549AB-100-001.png

if class_type == "binary":
    for class_ in class_dir:
        ls = glob.glob(f"/kaggle/input/breakhis/BreaKHis_v1/BreaKHis_v1/histology_slides/breast/{class_}/SOB/*/*/{magnification}/*")
        paths.extend(ls)
        labels.extend([class_]*len(ls))
else:
    for class_ in class_dir:
        ls = glob.glob(f"/kaggle/input/breakhis/BreaKHis_v1/BreaKHis_v1/histology_slides/breast/*/SOB/{class_}/*/{magnification}/*")
        paths.extend(ls)
        labels.extend([class_]*len(ls))

## Data Split

In [4]:
def get_array(paths):
    images = []
    for path in paths:
        image = cv2.imread(path)
        image = cv2.resize(image, IMAGE_SIZE)
        images.append(image)
    images = np.array(images).astype("float32") / 255.0
    return images

In [5]:
train_paths, val_paths, train_labels, val_labels = train_test_split(paths, labels, test_size=0.2, stratify=labels)
val_paths, test_paths, val_labels, test_labels = train_test_split(val_paths, val_labels, test_size=0.5, stratify=val_labels)

train_images = get_array(train_paths)
val_images = get_array(val_paths)
test_images = get_array(test_paths)

## Model

In [6]:
class ConvAutoEncoder:
    """
    Core Convolutional AutoEncoder
    This class included with build method you can adjust of width height and depth of image
    default filters 128, and latent dimension 48
    """
    @staticmethod
    def build(width, height, depth, filters=(128,), latent_dim=48):
        input_shape = (height, width, depth)
        channel_dim = -1
        inputs = layers.Input(shape=input_shape)
        x = inputs
        # Encoder layer
        for f in filters:
            x = layers.Conv2D(f, (3, 3), strides=2, padding='same')(x)
            x = layers.LeakyReLU(alpha=0.2)(x)
            x = layers.BatchNormalization(axis=channel_dim, name='enc_filter_' + str(f))(x)
        volume_size = K.int_shape(x)
        x = layers.Flatten()(x)
        # Latent layer
        latent = layers.Dense(latent_dim, name="encoded")(x)
        # Decoder layer
        x = layers.Dense(np.prod(volume_size[1:]))(latent)
        x = layers.Reshape((volume_size[1], volume_size[2], volume_size[3]))(x)
        # Reverse on decoder
        for f in filters[::-1]:
            x = layers.Conv2DTranspose(f, (3, 3), strides=2, padding='same')(x)
            x = layers.LeakyReLU(alpha=0.2)(x)
            x = layers.BatchNormalization(axis=channel_dim, name='dec_filter_' + str(f))(x)
        x = layers.Conv2DTranspose(depth, (3, 3), padding="same")(x)
        outputs = layers.Activation("sigmoid", name="decoded")(x)
        auto_encoder = Model(inputs, outputs, name="auto_encoder")
        return auto_encoder

## Buiding Model based ConvAutoEncoder Class

In [7]:
print("[INFO] building auto encoder...")
auto_encoder = ConvAutoEncoder.build(IMAGE_SIZE[0], IMAGE_SIZE[1], 3)
opt = Adam(learning_rate=INIT_LR)
checkpoint_callback = ModelCheckpoint(
    filepath=checkpoint_path,
    save_weights_only=True,
    monitor="val_loss",
    verbose = 1,
    mode='min',
    save_best_only=True)
auto_encoder.compile(loss="mse", optimizer=opt)
auto_encoder.summary()

[INFO] building auto encoder...
Model: "auto_encoder"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 256, 256, 3)]     0         
                                                                 
 conv2d (Conv2D)             (None, 128, 128, 128)     3584      
                                                                 
 leaky_re_lu (LeakyReLU)     (None, 128, 128, 128)     0         
                                                                 
 enc_filter_128 (BatchNormal  (None, 128, 128, 128)    512       
 ization)                                                        
                                                                 
 flatten (Flatten)           (None, 2097152)           0         
                                                                 
 encoded (Dense)             (None, 48)                100663344 
                      

## Training The Model

In [8]:
# train the convolutional auto encoder
print("[INFO] training auto encoder...")

with tf.device("/GPU:0"):
    H = auto_encoder.fit(
        train_images, train_images,
        shuffle=True,
        validation_data=(val_images, val_images),
        epochs=EPOCHS,
        callbacks = [checkpoint_callback], batch_size=BS)

[INFO] training auto encoder...
Epoch 1/20
Epoch 1: val_loss improved from inf to 0.02464, saving model to training_1_40_binary_cp/cp.ckpt
Epoch 2/20
Epoch 2: val_loss improved from 0.02464 to 0.02237, saving model to training_1_40_binary_cp/cp.ckpt
Epoch 3/20
Epoch 3: val_loss improved from 0.02237 to 0.02142, saving model to training_1_40_binary_cp/cp.ckpt
Epoch 4/20
Epoch 4: val_loss improved from 0.02142 to 0.02082, saving model to training_1_40_binary_cp/cp.ckpt
Epoch 5/20
Epoch 5: val_loss did not improve from 0.02082
Epoch 6/20
Epoch 6: val_loss did not improve from 0.02082
Epoch 7/20
Epoch 7: val_loss improved from 0.02082 to 0.01539, saving model to training_1_40_binary_cp/cp.ckpt
Epoch 8/20
Epoch 8: val_loss improved from 0.01539 to 0.01453, saving model to training_1_40_binary_cp/cp.ckpt
Epoch 9/20
Epoch 9: val_loss did not improve from 0.01453
Epoch 10/20
Epoch 10: val_loss did not improve from 0.01453
Epoch 11/20
Epoch 11: val_loss did not improve from 0.01453
Epoch 12/20


## Saved Result Graphic (.json) and Model Trained (h5)

In [9]:
with open(history_path, 'w') as f:
    json.dump(H.history, f)
auto_encoder.save(model_path)

# Extract Feature Stage

## Load already trained model and extracting feature


In [10]:
auto_encoder = ConvAutoEncoder.build(IMAGE_SIZE[0], IMAGE_SIZE[1], 3)
# load our auto_encoder from disk
print("[INFO] loading auto encoder model...")
auto_encoder.load_weights(checkpoint_path)

# create the encoder model which consists of *just* the encoder
# portion of the auto encoder
encoder = Model(inputs=auto_encoder.input, outputs=auto_encoder.get_layer("encoded").output)

# quantify the contents of our input images using the encoder
print("[INFO] encoding images...")
features = encoder.predict(train_images)

[INFO] loading auto encoder model...
[INFO] encoding images...


## Create mapping feature included location images, label, feature and indexing number

In [11]:
train_indexes = list(range(0, train_images.shape[0]))
train_features_array = [[float(x) for x in y] for y in features]
data = {"indexes": train_indexes, "features": train_features_array, "locations": train_paths, "labels":train_labels}

## Saved model feature (already mapped) as json

In [12]:
with open(feature_path, 'w') as f:
    json.dump(data, f)

# Retrieval Test Stage

## Euclidean function

In [13]:
def euclidean(a, b):
    # compute and return the euclidean distance between two vectors
    return np.linalg.norm(a - b)

## Perform search function
```
Default max result from search is 5
```




In [14]:
def perform_search(query_features, indexed_train, max_results=5):
    retrieved = []
    for idx in range(0, len(indexed_train["features"])):
        distance = euclidean(query_features, indexed_train["features"][idx])
        retrieved.append((distance, idx))
    retrieved = sorted(retrieved)[:max_results]
    return retrieved

## Load the model and feature already extracted

In [15]:
auto_encoder = ConvAutoEncoder.build(IMAGE_SIZE[0], IMAGE_SIZE[1], 3)
# load our auto_encoder from disk
print("[INFO] loading auto encoder model...")
auto_encoder.load_weights(checkpoint_path)
with open(feature_path) as f:
    training_indexed = json.load(f)

# create the encoder model which consists of *just* the encoder
# portion of the auto encoder
encoder = Model(inputs=auto_encoder.input, outputs=auto_encoder.get_layer("encoded").output)

# quantify the contents of our input images using the encoder
print("[INFO] encoding images...")
features_retrieved = encoder.predict(test_images)

[INFO] loading auto encoder model...
[INFO] encoding images...


## Perform search and retrieval based test images

In [16]:
query_indexes = list(range(0, test_images.shape[0]))
class_builder = {label_unique:[] for label_unique in labels}
recalls = copy.deepcopy(class_builder)
precisions = copy.deepcopy(class_builder)
# loop over the testing indexes
for i in query_indexes:
    queryFeatures = features_retrieved[i]
    results = perform_search(queryFeatures, training_indexed, max_results=5)
    labels_ret = [training_indexed["labels"][r[1]] for r in results]
    label_true = test_labels[i]
    label_trues = [label_true for _ in labels_ret]
    recall = metric.recall_score(label_trues, labels_ret, average='weighted')
    precision = metric.precision_score(label_trues, labels_ret, average='weighted')
    recalls[label_true].append(recall)
    precisions[label_true].append(precision)

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_pr

## Print the recall and precision values

In [17]:
print("recall values:")
comb_recall, comb_precision = [], []
for key in recalls.keys():
    average_val = np.average(recalls[key])
    print(key, average_val)
    comb_recall.append(average_val)
print("combined recall", np.average(comb_recall))

print("\nprecision values:")
for key in precisions.keys():
    average_val = np.average(precisions[key])
    print(key, average_val)
    comb_precision.append(average_val)
print("combined precision", np.average(comb_precision))

recall values:
benign 0.7389830508474575
malignant 0.8504065040650406
combined recall 0.7946947774562491

precision values:
benign 0.9661016949152542
malignant 0.983739837398374
combined precision 0.9749207661568141
