1. load EfficientNetB7 model
2. restructure the model by removing final classification layer
3. set feature storage and image directory
4. loop through every file name in the image directory
5. resize the image to 600*600 pixels because of the requirenment of the model
6. preprocess the image according to the requirenments of the model
7. apply the image to the restructured mmodel
8. stored the extracted features in a dictionary

Step 1 â€“ Imports

In [4]:
# Import the 'os' module to interact with the operating system (e.g., list files in a folder)
import os

# Import the EfficientNetB7 model and its preprocessing function from Keras applications
from tensorflow.keras.applications.efficientnet import EfficientNetB7, preprocess_input

# Import utility functions to load an image file and convert it to a NumPy array
from tensorflow.keras.preprocessing.image import load_img, img_to_array

# Import the Model class to create a custom model that outputs features instead of class predictions
from tensorflow.keras.models import Model

# Import the 'pickle' module to save and load Python objects (like dictionaries) to/from a file
import pickle


Step 2 - Load the EfficientNet-B7 model

In [5]:
# Load the EfficientNet-B7 model with pre-trained ImageNet weights
# By default, include_top=True, so the final classification layers are included
base_model = EfficientNetB7(weights='imagenet')

Step 3 - Restructure the model

In [6]:
# Create a new model that has the same input as the original EfficientNet-B7 model
# but uses the second-to-last layer's output (before the final classification layer) as the output
feature_extractor_model = Model(
    inputs=base_model.inputs,            # Use the same input tensor as EfficientNet-B7
    outputs=base_model.layers[-2].output # Use the layer just before the final prediction layer
)

Step 4 - Show model summary

In [7]:
# Print a summary of the modified model architecture to verify the output shapes and layers
print(feature_extractor_model.summary())


Model: "model"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_2 (InputLayer)        [(None, 600, 600, 3)]        0         []                            
                                                                                                  
 rescaling_1 (Rescaling)     (None, 600, 600, 3)          0         ['input_2[0][0]']             
                                                                                                  
 normalization_1 (Normaliza  (None, 600, 600, 3)          7         ['rescaling_1[0][0]']         
 tion)                                                                                            
                                                                                                  
 tf.math.multiply_1 (TFOpLa  (None, 600, 600, 3)          0         ['normalization_1[0][0]'] 

Step 5 - Prepare feature storage and image directory

In [8]:
# Create an empty dictionary to store extracted features for each image
# The keys will be image paths and the values will be the corresponding feature vectors
features = {}

# Define the directory that contains the input images
# This is the full Windows path to your "front masked images" training data folder
directory = r'front masked images'


Step 6 - Loop through images, preprocess, and extract features

In [9]:
# Get a list of all files in the directory
file_list = os.listdir(directory)

# Print how many images were found before starting the loop
print(f"Found {len(file_list)} files in directory: {directory}")

# Loop through every file name in the specified image directory
for idx, image_name in enumerate(file_list, start=1):
    # Build the full path to the current image file by joining directory and file name
    image_path = os.path.join(directory, image_name)
    
    # Load the image from disk and resize it to 600x600 pixels (required size for EfficientNet-B7)
    img = load_img(image_path, target_size=(600, 600))
    
    # Convert the loaded PIL image to a NumPy array (height, width, channels)
    img = img_to_array(img)
    
    # Add a batch dimension so the array shape becomes (1, height, width, channels)
    # This matches the expected input shape for the model
    img = img.reshape(1, img.shape[0], img.shape[1], img.shape[2])
    
    # Preprocess the image using EfficientNet's preprocessing (scaling, normalization, etc.)
    img = preprocess_input(img)
    
    # Pass the preprocessed image through the feature extractor model to get the feature vector
    # verbose=0 disables progress bar output
    feature_vector = feature_extractor_model.predict(img, verbose=0)
    
    # Store the extracted feature vector in the dictionary using the image path as the key
    features[image_path] = feature_vector

    # Print progress information for this image
    print(f"Processed {idx}/{len(file_list)}: {image_name}")


Found 6134 files in directory: front masked images


2025-11-24 14:44:27.239335: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:465] Loaded cuDNN version 8907


Processed 1/6134: 6ab1d061f51c6079633aeceed2faeb0b.png
Processed 2/6134: e94e2e05fb8b099955bbc4fa5ce81e22.png
Processed 3/6134: ba6951a4f37fc9302243370e927a02e2.png
Processed 4/6134: 947d16539d4702427aa74f737329ffb9.png
Processed 5/6134: 9326695bf62926ec22690f576a633bba.png
Processed 6/6134: 660ac3433f472e9bbaeed8b0e5e35254.png
Processed 7/6134: bfc13bbdf0bf989d003df2a30b8582cd.png
Processed 8/6134: 2993db3b9046e13aed7fa00b73c17ed9.png
Processed 9/6134: 942918cc56a7a197a486742587fe6b3d.png
Processed 10/6134: 9e7a4cc6611782335695d76dc4ffa2ad.png
Processed 11/6134: 32bf6b6289f7b9268ca5e15093687d76.png
Processed 12/6134: f5d290f1a7f39452b41c89bd0f40ee58.png
Processed 13/6134: c15896db70e5a9591bbc336d7972a487.png
Processed 14/6134: 45108038273e7fb409461e9a52b2d21a.png
Processed 15/6134: f2f5ad8df0d6d8d958db3608b7f38ee9.png
Processed 16/6134: 44d2ce61ba5f0cbc6476b13e7307758a.png
Processed 17/6134: 25da35d9a3e04ce905427bf81aee32a5.png
Processed 18/6134: cd754dd3dc074689bacbd098c3ef32ea.png
P

Step 7 - Save the dictionary as a pickle file

In [10]:
# Define the output filename for saving the features dictionary as a pickle file
features_pickle_path = 'efficientnet_b7_front_masked_features.pkl'

# Open the file in binary write mode ('wb') to prepare for writing the pickle data
with open(features_pickle_path, 'wb') as f:
    # Use pickle.dump to serialize (save) the 'features' dictionary into the file
    pickle.dump(features, f)

# Print a confirmation message to indicate that the features have been saved successfully
print(f"Features dictionary saved to: {features_pickle_path}")


Features dictionary saved to: efficientnet_b7_front_masked_features.pkl


In [11]:
import os  # Import os to work with file paths

# Create a new dictionary that will store features with only file names as keys
features_renamed = {}

# Loop over the existing dictionary items
for full_path, feature_vector in features.items():
    # Extract only the file name (e.g., "001f3a8850....png") from the full path
    filename = os.path.basename(full_path)
    
    # Store the feature vector in the new dictionary using the file name as the key
    features_renamed[filename] = feature_vector

# Replace the old dictionary with the new one (keys now are file names only)
features = features_renamed

# Optional: print a few keys to verify the change
print("Sample keys after rename:")
for i, k in enumerate(features.keys()):
    print(k)
    if i == 4:  # stop after 5 keys
        break


Sample keys after rename:
6ab1d061f51c6079633aeceed2faeb0b.png
e94e2e05fb8b099955bbc4fa5ce81e22.png
ba6951a4f37fc9302243370e927a02e2.png
947d16539d4702427aa74f737329ffb9.png
9326695bf62926ec22690f576a633bba.png


In [12]:
# Define the pickle file name (same as the original file you used before)
pickle_path = 'efficientnet_b7_front_masked_features.pkl'

# Open the pickle file in binary write mode to overwrite it with the updated dictionary
with open(pickle_path, 'wb') as f:
    # Save (serialize) the updated 'features' dictionary into the pickle file
    pickle.dump(features, f)

# Print a confirmation message showing how many entries were saved and where
print(f"Updated features dictionary with {len(features)} entries saved to: {pickle_path}")

Updated features dictionary with 6134 entries saved to: efficientnet_b7_front_masked_features.pkl


In [13]:
len(features)

6134

In [14]:
feature_vector.shape

(1, 2560)

Step 8 - Comapre file names and dictionary keys

In [15]:
import os  # Import os to work with the filesystem (list files, join paths, etc.)

# ---------------------------------------------
# 1. Collect image file names from the directory
# ---------------------------------------------

# Define which file extensions you consider as images
valid_extensions = {'.png', '.jpg', '.jpeg', '.bmp', '.gif'}

# Create an empty list to store image file names found in the folder
image_files_in_dir = []

# Loop through every entry in the directory
for name in os.listdir(directory):
    # Build the full path to this entry
    full_path = os.path.join(directory, name)
    
    # Check that this is a file (not a subfolder) and has a valid image extension
    if os.path.isfile(full_path) and os.path.splitext(name)[1].lower() in valid_extensions:
        # If so, add just the file name (e.g., "001f3a8....png") to our list
        image_files_in_dir.append(name)

# Convert the list of image files and dictionary keys to sets for comparison
files_set = set(image_files_in_dir)   # Set of file names in the folder
keys_set = set(features.keys())       # Set of keys in the features dictionary

# ---------------------------------------------
# 2. Compare sets
# ---------------------------------------------

# Files that exist in the folder but NOT in the dictionary keys
files_not_in_dict = files_set - keys_set

# Keys that exist in the dictionary but NOT as files in the folder
keys_not_in_folder = keys_set - files_set

# ---------------------------------------------
# 3. Print summary
# ---------------------------------------------

print(f"Number of image files in directory: {len(files_set)}")
print(f"Number of keys in features dictionary: {len(keys_set)}\n")

# Check if everything matches perfectly
if not files_not_in_dict and not keys_not_in_folder:
    print("âœ… All image filenames and dictionary keys MATCH exactly.")
else:
    print("âš  Some mismatches were found:\n")
    
    # Report files in folder but not in dictionary
    if files_not_in_dict:
        print(f"Files in directory but NOT in dictionary ({len(files_not_in_dict)}):")
        # Print just a few examples to avoid huge output
        for i, name in enumerate(sorted(files_not_in_dict)):
            print("  -", name)
            if i == 9:  # stop after 10 items
                break
        print()
    
    # Report keys in dictionary but not in folder
    if keys_not_in_folder:
        print(f"Keys in dictionary but NO corresponding file in directory ({len(keys_not_in_folder)}):")
        for i, name in enumerate(sorted(keys_not_in_folder)):
            print("  -", name)
            if i == 9:  # stop after 10 items
                break


Number of image files in directory: 6134
Number of keys in features dictionary: 6134

âœ… All image filenames and dictionary keys MATCH exactly.
