<a href="https://colab.research.google.com/github/petr-ngn/CNN_Real_Estate_Offices_Amenities_Listing/blob/main/03_Modelling.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Installing and importing relevant libraries

In [None]:
!pip install keras-tuner
!pip install visualkeras

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting keras-tuner
  Downloading keras_tuner-1.1.3-py3-none-any.whl (135 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m135.7/135.7 KB[0m [31m3.6 MB/s[0m eta [36m0:00:00[0m
Collecting kt-legacy
  Downloading kt_legacy-1.0.4-py3-none-any.whl (9.6 kB)
Collecting jedi>=0.10
  Downloading jedi-0.18.2-py2.py3-none-any.whl (1.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.6/1.6 MB[0m [31m27.6 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: kt-legacy, jedi, keras-tuner
Successfully installed jedi-0.18.2 keras-tuner-1.1.3 kt-legacy-1.0.4
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting visualkeras
  Downloading visualkeras-0.0.2-py3-none-any.whl (12 kB)
Collecting aggdraw>=1.3.11
  Downloading aggdraw-1.3.15-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (992

In [None]:
import warnings
warnings.filterwarnings('ignore')
import os
import random
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.data import AUTOTUNE
from tensorflow.keras.models import Model
from tensorflow.keras.backend import epsilon
from tensorflow.keras.optimizers import Adam
from tensorflow.math import square, maximum, reduce_mean, sqrt, reduce_sum
from tensorflow.keras.layers import Input, Dense, Flatten, Dropout, Conv2D, MaxPooling2D, GlobalAveragePooling2D, AveragePooling2D, BatchNormalization, Lambda
from keras.callbacks import EarlyStopping
import keras_tuner as kt
from google.colab import drive
import sys
import matplotlib.pyplot as plt

Mounting the Google Drive

In [None]:
drive.mount('/content/drive')

Mounted at /content/drive


Importing a costume created module by Petr Nguyen

In [None]:
sys.path.append('/content/drive/MyDrive/Agile_ML/src_PN')

In [None]:
import PN_functions as PN

In [None]:
path = '/content/drive/MyDrive/Agile_ML'
os.chdir(path)

In [None]:
#Parameter initialization
random_seed = 123

In [None]:
train_imgs, train_labels = PN.read_pairs('train')
valid_imgs, valid_labels = PN.read_pairs('valid')
test_imgs, test_labels = PN.read_pairs('test')

In [None]:
print(f'number of pairs in training set: {len(train_imgs)}')
print(f'number of pairs in validation set: {len(valid_imgs)}')
print(f'number of pairs in test set: {len(test_imgs)}')

4050
1250
1250


In [None]:
train_tf = PN.tf_data_processing_pipeline(train_imgs, train_labels)
valid_tf = PN.tf_data_processing_pipeline(valid_imgs, valid_labels)
test_tf = PN.tf_data_processing_pipeline(test_imgs, test_labels)

In [None]:
print('training set:', tf.data.experimental.cardinality(train_tf).numpy())
print('validation set:', tf.data.experimental.cardinality(valid_tf).numpy())
print('test set:', tf.data.experimental.cardinality(test_tf).numpy())

405
125
530
125


In [None]:
#Function for calculation an Euclidean distance between the two feature vectors
def euclidean_distance(vectors):

    x, y = vectors
    sum_square = reduce_sum(square(x - y), axis = 1, keepdims = True)

    return sqrt(maximum(sum_square, epsilon()))

In [None]:
#Function for a calculation of a contrastive loss
def contrastive_loss(margin = 1):

    def contrastive__loss(y_true, y_pred):

        square_pred = tf.math.square(y_pred)
        margin_square = tf.math.square(tf.math.maximum(margin - (y_pred), 0))
        return tf.math.reduce_mean(
            (1 - y_true) * square_pred + (y_true) * margin_square
        )

    return contrastive__loss

In [None]:
def model_building(hp):
  
    #Input layer
    inputs = Input(shape = (224, 224, 3), name = 'input_layer')
    x = inputs

  
        #Tuning the number of convolution's output filters 
        #Within each block, perform 2 convolutions and batch normalization
        #Tuning the number of convolution's output filters
    for i in range(hp.Int('conv_blocks', min_value = 3, max_value = 5, default = 3)):
    
    #Tuning the number of convolution's output filters
      filters = hp.Int('filters_' + str(i), min_value = 32,
                     max_value = 256, step = 32) 
    
      for _ in range(2):

        x = Conv2D(filters, kernel_size=(3, 3), padding = 'same',
                 activation = 'relu')(x)
        x = BatchNormalization()(x)

    #Tuning the pooling type in the convolutional block
      if hp.Choice('pooling_' + str(i), ['avg', 'max']) == 'max':
          x = MaxPooling2D(name = 'maxpooling_'+str(i))(x)
      else:
          x = AveragePooling2D(name = 'avgpooling_'+str(i))(x)

      x = Dropout((hp.Float('dropout_' + str(i), 0, 0.5, step = 0.05, default = 0.5)))(x)

    x = GlobalAveragePooling2D(name = 'globavgpool_01')(x)
    #Flatten the output
    x = Flatten(name = 'flatten_layer')(x)
    #Tuning the number of units in the dense layer
    x = Dense(hp.Int('Dense units' ,min_value = 50, max_value = 100, step = 10, default = 50), name = 'dense_01', activation='relu')(x)
    #Tuning the dropout rate in the dropout layer - the final feature vector layer
    feature_layer = Dropout(hp.Float('dropout_final', 0, 0.5, step = 0.05, default = 0.5), name = 'feature_layer', seed = 123)(x)
    #Mapping a embedding model
    embedding_network = Model(inputs, feature_layer, name = 'SNN')
    #Setting an input layer for the image pairs
    input_1 = Input((224, 224, 3), name = 'left_tower')
    input_2 = Input((224, 224, 3), name = 'right_tower')
    tower_1 = embedding_network(input_1)
    tower_2 = embedding_network(input_2)
    #Layers for calculation of the Euclidean distance between the two feature vectors, with further normalization
    merge_layer = Lambda(euclidean_distance, name = 'lambda_layer')([tower_1, tower_2])
    normal_layer = BatchNormalization(name = 'norm_layer')(merge_layer)
    #Final output layer (classification whether the images are of the same label/person)
    output_layer = Dense(1, activation="sigmoid", name = 'output_layer')(normal_layer)
    #Final model mapping
    model = Model(inputs=[input_1, input_2], outputs = output_layer)
    #Model compilation:
    model.compile(optimizer = Adam(hp.Float('learning_rate', min_value = 1e-4,max_value = 1e-3,sampling = 'log')),loss = contrastive_loss(margin = 1))
    return model

In [None]:
bayes_opt = kt.tuners.BayesianOptimization(model_building,
                                           objective = 'loss',
                                           max_trials = 10,
                                           seed = random_seed)

In [None]:
bayes_opt.search(train_tf,
                 validation_data = valid_tf,
                 epochs = 10,
                 callbacks = [EarlyStopping(
    monitor="val_loss",
    patience=2)])


Search: Running Trial #1

Value             |Best Value So Far |Hyperparameter
3                 |?                 |conv_blocks
256               |?                 |filters_0
avg               |?                 |pooling_0
0.3               |?                 |dropout_0
32                |?                 |filters_1
avg               |?                 |pooling_1
0.3               |?                 |dropout_1
160               |?                 |filters_2
max               |?                 |pooling_2
0.2               |?                 |dropout_2
70                |?                 |Dense units
0.25              |?                 |dropout_final
0.00046723        |?                 |learning_rate

Epoch 1/5


In [None]:
best_hypers = bayes_opt.get_best_hyperparameters(num_trials=1)[0]
display(best_hypers.values)

In [None]:
final_model = bayes_opt.hypermodel.build(best_hypers)

In [None]:
#Visual Keras
visualkeras.layered_view(final_model, legend=True)

In [None]:
#Keras Model Plot
tf.keras.utils.plot_model(final_model, show_shapes = True, show_layer_activations=True,
                          show_layer_names = True, expand_nested = True)

In [None]:
final_model.summary()

In [None]:
history = final_model.fit(train_tf,
                          epochs = 5, verbose = 1,
                          validation_data = valid_tf,
                          callbacks = [EarlyStopping(patience = 2)])

In [None]:
PN.plot_val_train_loss(history)

In [None]:
train_thres = PN.opt_threshold(final_model, train_tf, train_labels)
valid_thres = PN.opt_threshold(final_model, valid_tf, valid_labels)

In [None]:
print(f'training optimal thresold {train_thres}')
print(f'validation optimal thresold {valid_thres}')

In [None]:
test_predictions_1 = PN.make_predictions(final_model, test_tf, train_thres)
test_predictions_2 = PN.make_predictions(final_model, test_tf, valid_thres)

In [None]:
test_accuracy_1 = PN.make_evaluation(test_labels, test_predictions_1, 'accuracy')
test_accuracy_2 = PN.make_evaluation(test_labels, test_predictions_2, 'accuracy')

In [None]:
print(f'accuracy score on test set using optimal threshold from training set: {test_accuracy_1}')
print(f'accuracy score on test set using optimal threshold from validation set: {test_accuracy_2}')