# Step-1 --> Feature extraction from all the images

In [7]:
import numpy as np
from numpy.linalg import norm
import seaborn as sns
import os
from tqdm.notebook import tqdm
import pickle

In [2]:
import tensorflow
from tensorflow.keras.preprocessing import image
from tensorflow.keras.layers import GlobalMaxPooling2D
from tensorflow.keras.applications.resnet50 import ResNet50,preprocess_input


## Creating the base ResNet50 Model

In [3]:
model = ResNet50(weights='imagenet',include_top=False,input_shape=(224,224,3)) # A model is that trained on imagenet

model.trainable = False

model = tensorflow.keras.Sequential([
    model,
    GlobalMaxPooling2D()
])

## Feature Extractor function

In [8]:
#feature extraction function
def feature_extractor(image_path,my_model):
    img = image.load_img(image_path,target_size=(224,224)) # Loading the image
    img_array = image.img_to_array(img) # Converting into a numpy array
    expanded_image_array = np.expand_dims(img_array,axis=0) # Expanding the dimention of the image
    preprocessed_img = preprocess_input(expanded_image_array) # Preprocesing of the images.
    outcome = my_model.predict(preprocessed_img).flatten() # Flating the preproess into 1D
    normalized_outcome = outcome / norm(outcome) # Normalizing the ourcome [sum(x[i])/sqrt(x)]
    return normalized_outcome


In [9]:

filenames = []
datasets_directory = "../Datasets/images/"

for file in os.listdir(datasets_directory):
    filenames.append(os.path.join(datasets_directory, file))

pickle.dump(filenames,open('filenames.pkl','wb'))

In [2]:
# filenames[0:5]

## Datasets Feature Extraction

In [None]:
feature_list = [] # A 2D list [[1st image feature->2048],[],[],[]]

for file in tqdm(filenames):
    feature_list.append(feature_extractor(file,model))

pickle.dump(feature_list,open('features.pkl','wb'))

# Datasets_size X 2048  
