Step 1: Imports

In [11]:
# Step 1 – Imports

# Import the 'os' module to interact with the operating system (e.g., list files in a folder)
import os

# Import the InceptionV3 model and its preprocessing function from Keras applications
from tensorflow.keras.applications.inception_v3 import InceptionV3, preprocess_input

# Import utility functions to load an image file and convert it to a NumPy array
from tensorflow.keras.preprocessing.image import load_img, img_to_array

# Import the Model class to create a custom model that outputs features instead of class predictions
from tensorflow.keras.models import Model

# Import the 'pickle' module to save and load Python objects (like dictionaries) to/from a file
import pickle


Step 2: Load the InceptionV3 model

In [12]:
# Step 2 – Load the InceptionV3 model

# Load the InceptionV3 model with pre-trained ImageNet weights
# By default, include_top=True, so the final classification layers are included
base_model = InceptionV3(weights='imagenet')

print("InceptionV3 base model loaded.")

InceptionV3 base model loaded.


Step 3: Restructure the model

In [13]:
# Step 3 – Restructure the model

# Create a new model that has the same input as the original InceptionV3 model
# but uses the second-to-last layer's output (before the final classification layer) as the output
feature_extractor_model = Model(
    inputs=base_model.inputs,            # Use the same input tensor as InceptionV3
    outputs=base_model.layers[-2].output # Use the layer just before the final prediction layer
)

print("Feature extractor model created (InceptionV3, second-to-last layer).")


Feature extractor model created (InceptionV3, second-to-last layer).


Step 4: Show model summary

In [14]:
# Step 4 – Show model summary

# Print a summary of the modified model architecture to verify the output shapes and layers
print(feature_extractor_model.summary())


Model: "model_2"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_2 (InputLayer)        [(None, 299, 299, 3)]        0         []                            
                                                                                                  
 conv2d_94 (Conv2D)          (None, 149, 149, 32)         864       ['input_2[0][0]']             
                                                                                                  
 batch_normalization_94 (Ba  (None, 149, 149, 32)         96        ['conv2d_94[0][0]']           
 tchNormalization)                                                                                
                                                                                                  
 activation_94 (Activation)  (None, 149, 149, 32)         0         ['batch_normalization_94

Step 5: Prepare feature storage and image directory

In [15]:
# Step 5 – Prepare feature storage and image directory

# Create an empty dictionary to store extracted features for each image
# The keys will be image paths and the values will be the corresponding feature vectors
features = {}

# Define the directory that contains the input images
# This is the full path (or relative path) to your "front masked images" training data folder
directory = r'test B- front masked images - side'

print(f"Using image directory: {directory}")

Using image directory: test B- front masked images - side


Step 6: Loop through images, preprocess, and extract features

In [16]:
# Step 6 – Loop through images, preprocess, and extract features

# Get a list of all files in the directory
file_list = os.listdir(directory)

# Print how many files were found before starting the loop
print(f"Found {len(file_list)} files in directory: {directory}")

# Loop through every file name in the specified image directory
for idx, image_name in enumerate(file_list, start=1):
    # Build the full path to the current image file by joining directory and file name
    image_path = os.path.join(directory, image_name)
    
    try:
        # Load the image from disk and resize it to 299x299 pixels (required size for InceptionV3)
        img = load_img(image_path, target_size=(299, 299))
        
        # Convert the loaded PIL image to a NumPy array (height, width, channels)
        img = img_to_array(img)
        
        # Add a batch dimension so the array shape becomes (1, height, width, channels)
        # This matches the expected input shape for the model
        img = img.reshape(1, img.shape[0], img.shape[1], img.shape[2])
        
        # Preprocess the image using InceptionV3's preprocessing (scaling, normalization, etc.)
        img = preprocess_input(img)
        
        # Pass the preprocessed image through the feature extractor model to get the feature vector
        # verbose=0 disables progress bar output
        feature_vector = feature_extractor_model.predict(img, verbose=0)
        
        # Store the extracted feature vector in the dictionary using the image path as the key
        features[image_path] = feature_vector

        # Print progress information for this image
        print(f"Processed {idx}/{len(file_list)}: {image_name}")
    
    except Exception as e:
        # If there is any issue with loading/processing a file, print an error and continue
        print(f"Skipping {image_name} due to error: {e}")


Found 1160 files in directory: test B- front masked images - side
Processed 1/1160: 5e09e1b0d43b6c430709a513f594c591.png
Processed 2/1160: 7e6a57e2fcabf518e9007a77d4cc4960.png
Processed 3/1160: 4ee8251ae7e4ad42c75644db390fc5c5.png
Processed 4/1160: 6367d327f96b951e4a498c88d615e8a4.png
Processed 5/1160: 8b16daea6768b92fb27997f76215aa5f.png
Processed 6/1160: 673f0b3ea8d9ffb4e0ed72f564361801.png
Processed 7/1160: 9164d1ff716da838b1371b12455278b1.png
Processed 8/1160: f262404c5d1967612b5b3096855802f0.png
Processed 9/1160: b5eda6d46ad53a794faa4239e20b737e.png
Processed 10/1160: 8096491c2df613c6f566a9be8778702b.png
Processed 11/1160: 5ba57ea310b3a5e0967c20af47163e4c.png
Processed 12/1160: 869e24d80a0ab6ff0846a3ef036022b5.png
Processed 13/1160: 776d8b27b2b64f56dd83a5a53797dcba.png
Processed 14/1160: c19eedb537e0f84217e19351cc5addc7.png
Processed 15/1160: 0870d8675684285a4002a258a839bc9e.png
Processed 16/1160: 266e72a20e7e0f4c0119bbad4825887a.png
Processed 17/1160: a6c0c60de62df86f75565357dcb8

Step 7: Save the dictionary as a pickle file

In [17]:
# Step 7 – Save the dictionary as a pickle file

# Define the output filename for saving the features dictionary as a pickle file
features_pickle_path = 'testB - inceptionv3_side_masked_features.pkl'

# Open the file in binary write mode ('wb') to prepare for writing the pickle data
with open(features_pickle_path, 'wb') as f:
    # Use pickle.dump to serialize (save) the 'features' dictionary into the file
    pickle.dump(features, f)

# Print a confirmation message to indicate that the features have been saved successfully
print(f"Features dictionary saved to: {features_pickle_path}")


Features dictionary saved to: testB - inceptionv3_side_masked_features.pkl


In [18]:
# Step 7b – Rename dictionary keys to file names and overwrite pickle

import os  # Import os to work with file paths (in case this cell is run standalone)

# Create a new dictionary that will store features with only file names as keys
features_renamed = {}

# Loop over the existing dictionary items
for full_path, feature_vector in features.items():
    # Extract only the file name (e.g., "001f3a8850....png") from the full path
    filename = os.path.basename(full_path)
    
    # Store the feature vector in the new dictionary using the file name as the key
    features_renamed[filename] = feature_vector

# Replace the old dictionary with the new one (keys now are file names only)
features = features_renamed

# Optional: print a few keys to verify the change
print("Sample keys after rename:")
for i, k in enumerate(features.keys()):
    print(k)
    if i == 4:  # stop after 5 keys
        break

# Define the pickle file name (same as the original file you used before)
pickle_path = 'testB - inceptionv3_side_masked_features.pkl'

# Open the pickle file in binary write mode to overwrite it with the updated dictionary
with open(pickle_path, 'wb') as f:
    # Save (serialize) the updated 'features' dictionary into the pickle file
    pickle.dump(features, f)

# Print a confirmation message showing how many entries were saved and where
print(f"Updated features dictionary with {len(features)} entries saved to: {pickle_path}")


Sample keys after rename:
5e09e1b0d43b6c430709a513f594c591.png
7e6a57e2fcabf518e9007a77d4cc4960.png
4ee8251ae7e4ad42c75644db390fc5c5.png
6367d327f96b951e4a498c88d615e8a4.png
8b16daea6768b92fb27997f76215aa5f.png
Updated features dictionary with 1160 entries saved to: testB - inceptionv3_side_masked_features.pkl


Step 8: Compare file names and dictionary key

In [19]:
# Step 8 – Compare file names and dictionary keys

import os  # Import os to work with the filesystem (list files, join paths, etc.)

# ---------------------------------------------
# 1. Collect image file names from the directory
# ---------------------------------------------

# Define which file extensions you consider as images
valid_extensions = {'.png', '.jpg', '.jpeg', '.bmp', '.gif'}

# Create an empty list to store image file names found in the folder
image_files_in_dir = []

# Loop through every entry in the directory
for name in os.listdir(directory):
    # Build the full path to this entry
    full_path = os.path.join(directory, name)
    
    # Check that this is a file (not a subfolder) and has a valid image extension
    if os.path.isfile(full_path) and os.path.splitext(name)[1].lower() in valid_extensions:
        # If so, add just the file name (e.g., "001f3a8....png") to our list
        image_files_in_dir.append(name)

# Convert the list of image files and dictionary keys to sets for comparison
files_set = set(image_files_in_dir)   # Set of file names in the folder
keys_set = set(features.keys())       # Set of keys in the features dictionary

# ---------------------------------------------
# 2. Compare sets
# ---------------------------------------------

# Files that exist in the folder but NOT in the dictionary keys
files_not_in_dict = files_set - keys_set

# Keys that exist in the dictionary but NOT as files in the folder
keys_not_in_folder = keys_set - files_set

# ---------------------------------------------
# 3. Print summary
# ---------------------------------------------

print(f"Number of image files in directory: {len(files_set)}")
print(f"Number of keys in features dictionary: {len(keys_set)}\n")

# Check if everything matches perfectly
if not files_not_in_dict and not keys_not_in_folder:
    print("✅ All image filenames and dictionary keys MATCH exactly.")
else:
    print("⚠ Some mismatches were found:\n")
    
    # Report files in folder but not in dictionary
    if files_not_in_dict:
        print(f"Files in directory but NOT in dictionary ({len(files_not_in_dict)}):")
        # Print just a few examples to avoid huge output
        for i, name in enumerate(sorted(files_not_in_dict)):
            print("  -", name)
            if i == 9:  # stop after 10 items
                break
        print()
    
    # Report keys in dictionary but not in folder
    if keys_not_in_folder:
        print(f"Keys in dictionary but NO corresponding file in directory ({len(keys_not_in_folder)}):")
        for i, name in enumerate(sorted(keys_not_in_folder)):
            print("  -", name)
            if i == 9:  # stop after 10 items
                break


Number of image files in directory: 1160
Number of keys in features dictionary: 1160

✅ All image filenames and dictionary keys MATCH exactly.
