In [None]:
# hci4ai
# saving to disk two pickle files containing 500 embeddings each
# the embeddings correspond to the images in /images
# the embedding for an image is a float array of size 512 which coincides with the output of the model layer named "dense"

In [1]:
import re
import os
import random
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt

###############################################################################

from google.colab import drive
drive.mount('/content/drive')
%cd '/content/drive/My Drive/marco_colab_data/hci4ai'

# load model
model_name = 'model__2023-04-26__22-42-58'
model = tf.keras.models.load_model(model_name)

###############################################################################

import matplotlib.cm as cm

def get_img_array(img_path, size):
    img = tf.keras.preprocessing.image.load_img(img_path, target_size=size)
    array = tf.keras.preprocessing.image.img_to_array(img)
    array = np.expand_dims(array, axis=0)
    return array

def make_embedding(img_array, model, last_conv_layer_name, pred_index=None):

    grad_model = tf.keras.models.Model(
        [model.inputs], [model.get_layer(last_conv_layer_name).output, model.output]
    )

    with tf.GradientTape() as tape:
        last_conv_layer_output, preds = grad_model(img_array)
        if pred_index is None:
            pred_index = tf.argmax(preds[0])
        class_channel = preds[:, pred_index]

    embedding = tf.squeeze(last_conv_layer_output).numpy()
    
    return embedding

###############################################################################

def get_index_from_filename(filename):
  index = filename.split("_")
  index = index[-1]
  index = index.replace(".jpg", "")
  index = int(index)
  return index

###############################################################################

IMG_SIZE = (180, 180)

%cd '/content/drive/My Drive/marco_colab_data/hci4ai/export'

path = '/content/drive/My Drive/marco_colab_data/hci4ai/export'
files = (file for file in os.listdir(path) if os.path.isfile(os.path.join(path, file)))

last_conv_layer_name = "dense"

embeddings_p = np.zeros([500,512])
embeddings_n = np.zeros([500,512])

for filename in files:
  index = get_index_from_filename(filename)
  img_array = get_img_array(filename, size=IMG_SIZE)
  embedding = make_embedding(img_array, model, last_conv_layer_name)
  if 'p_' in filename:
    embeddings_p[index] = embedding
  else:
    embeddings_n[index] = embedding

###############################################################################

import pickle

with open('embeddings/embeddings_p.pkl', 'wb') as outp:
    pickle.dump(embeddings_p, outp, pickle.HIGHEST_PROTOCOL)

with open('embeddings/embeddings_n.pkl', 'wb') as outp:
    pickle.dump(embeddings_n, outp, pickle.HIGHEST_PROTOCOL)

###############################################################################

drive.flush_and_unmount()
print('All changes made in this colab session should now be visible in Drive.')

Mounted at /content/drive
/content/drive/My Drive/marco_colab_data/hci4ai
/content/drive/My Drive/marco_colab_data/hci4ai/export
All changes made in this colab session should now be visible in Drive.
