1. load EfficientNetB7 model
2. restructure the model by removing final classification layer
3. set feature storage and image directory
4. loop through every file name in the image directory
5. resize the image to 600*600 pixels because of the requirenment of the model
6. preprocess the image according to the requirenments of the model
7. apply the image to the restructured mmodel
8. stored the extracted features in a dictionary

Step 1 â€“ Imports

In [1]:
# Import the 'os' module to interact with the operating system (e.g., list files in a folder)
import os

# Import the EfficientNetB7 model and its preprocessing function from Keras applications
from tensorflow.keras.applications.efficientnet import EfficientNetB7, preprocess_input

# Import utility functions to load an image file and convert it to a NumPy array
from tensorflow.keras.preprocessing.image import load_img, img_to_array

# Import the Model class to create a custom model that outputs features instead of class predictions
from tensorflow.keras.models import Model

# Import the 'pickle' module to save and load Python objects (like dictionaries) to/from a file
import pickle


2025-11-25 11:33:48.152430: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-11-25 11:33:57.942750: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:479] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-11-25 11:34:03.997309: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:10575] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2025-11-25 11:34:04.081560: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1442] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-11-25 11:34:11.987666: I tensorflow/core/platform/cpu_feature_gua

Step 2 - Load the EfficientNet-B7 model

In [2]:
# Load the EfficientNet-B7 model with pre-trained ImageNet weights
# By default, include_top=True, so the final classification layers are included
base_model = EfficientNetB7(weights='imagenet')

2025-11-25 11:34:33.266493: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:998] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2025-11-25 11:34:39.546330: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:998] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2025-11-25 11:34:39.549573: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:998] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-

Downloading data from https://storage.googleapis.com/keras-applications/efficientnetb7.h5


Step 3 - Restructure the model

In [3]:
# Create a new model that has the same input as the original EfficientNet-B7 model
# but uses the second-to-last layer's output (before the final classification layer) as the output
feature_extractor_model = Model(
    inputs=base_model.inputs,            # Use the same input tensor as EfficientNet-B7
    outputs=base_model.layers[-2].output # Use the layer just before the final prediction layer
)

Step 4 - Show model summary

In [4]:
# Print a summary of the modified model architecture to verify the output shapes and layers
print(feature_extractor_model.summary())


Model: "model"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_1 (InputLayer)        [(None, 600, 600, 3)]        0         []                            
                                                                                                  
 rescaling (Rescaling)       (None, 600, 600, 3)          0         ['input_1[0][0]']             
                                                                                                  
 normalization (Normalizati  (None, 600, 600, 3)          7         ['rescaling[0][0]']           
 on)                                                                                              
                                                                                                  
 tf.math.multiply (TFOpLamb  (None, 600, 600, 3)          0         ['normalization[0][0]']   

Step 5 - Prepare feature storage and image directory

In [5]:
# Create an empty dictionary to store extracted features for each image
# The keys will be image paths and the values will be the corresponding feature vectors
features = {}

# Define the directory that contains the input images
# This is the full Windows path to your "front masked images" training data folder
directory = r'test - front masked images'


Step 6 - Loop through images, preprocess, and extract features

In [6]:
# Get a list of all files in the directory
file_list = os.listdir(directory)

# Print how many images were found before starting the loop
print(f"Found {len(file_list)} files in directory: {directory}")

# Loop through every file name in the specified image directory
for idx, image_name in enumerate(file_list, start=1):
    # Build the full path to the current image file by joining directory and file name
    image_path = os.path.join(directory, image_name)
    
    # Load the image from disk and resize it to 600x600 pixels (required size for EfficientNet-B7)
    img = load_img(image_path, target_size=(600, 600))
    
    # Convert the loaded PIL image to a NumPy array (height, width, channels)
    img = img_to_array(img)
    
    # Add a batch dimension so the array shape becomes (1, height, width, channels)
    # This matches the expected input shape for the model
    img = img.reshape(1, img.shape[0], img.shape[1], img.shape[2])
    
    # Preprocess the image using EfficientNet's preprocessing (scaling, normalization, etc.)
    img = preprocess_input(img)
    
    # Pass the preprocessed image through the feature extractor model to get the feature vector
    # verbose=0 disables progress bar output
    feature_vector = feature_extractor_model.predict(img, verbose=0)
    
    # Store the extracted feature vector in the dictionary using the image path as the key
    features[image_path] = feature_vector

    # Print progress information for this image
    print(f"Processed {idx}/{len(file_list)}: {image_name}")


Found 1685 files in directory: test - front masked images


2025-11-25 11:35:57.435013: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:465] Loaded cuDNN version 8907


Processed 1/1685: e5ae8fe5bbdf611a1e8d06e66e849bdf.png
Processed 2/1685: 605a5fd09058c48156b0ef518b63b2de.png
Processed 3/1685: 909c9277309e13ee014e347603aba620.png
Processed 4/1685: bef6a68bc8dd475c124f6de2413385d3.png
Processed 5/1685: 6d7ed4bc4a17546447efed0ca6e2ff11.png
Processed 6/1685: 4c12d6a82cb0a75ee556a54ab1afc21e.png
Processed 7/1685: dd27be8b3d6b9c2c14c14318612ba0dc.png
Processed 8/1685: fc65b84d8183e3a872785b4e2eecaa66.png
Processed 9/1685: 851f712c7cfc6b62b20b6f8cba65c20a.png
Processed 10/1685: c9ede0a19f8e79ec7a4cd8f126129f2d.png
Processed 11/1685: 652500aa90597ed06ccc8f15bc9b83aa.png
Processed 12/1685: b55e0664c7c1642cd015b5585a8d5fd3.png
Processed 13/1685: 67194d8a47331d1f722db4f737546021.png
Processed 14/1685: 719fc866000f5edb56700ee0755ea109.png
Processed 15/1685: 46c5d36f10af6cf0fd2b4a02eb9a9add.png
Processed 16/1685: e1a3c3b6df492a58fde15be36a8371bc.png
Processed 17/1685: 7ac05db49a083076cb5ed077f16b90e0.png
Processed 18/1685: 7e1e1d9f8724124bcae0a945c3c324cc.png
P

IsADirectoryError: [Errno 21] Is a directory: 'test - front masked images/.ipynb_checkpoints'

Step 7 - Save the dictionary as a pickle file

In [None]:
# Define the output filename for saving the features dictionary as a pickle file
features_pickle_path = 'efficientnet_b7_front_masked_features - testA.pkl'

# Open the file in binary write mode ('wb') to prepare for writing the pickle data
with open(features_pickle_path, 'wb') as f:
    # Use pickle.dump to serialize (save) the 'features' dictionary into the file
    pickle.dump(features, f)

# Print a confirmation message to indicate that the features have been saved successfully
print(f"Features dictionary saved to: {features_pickle_path}")


In [None]:
import os  # Import os to work with file paths

# Create a new dictionary that will store features with only file names as keys
features_renamed = {}

# Loop over the existing dictionary items
for full_path, feature_vector in features.items():
    # Extract only the file name (e.g., "001f3a8850....png") from the full path
    filename = os.path.basename(full_path)
    
    # Store the feature vector in the new dictionary using the file name as the key
    features_renamed[filename] = feature_vector

# Replace the old dictionary with the new one (keys now are file names only)
features = features_renamed

# Optional: print a few keys to verify the change
print("Sample keys after rename:")
for i, k in enumerate(features.keys()):
    print(k)
    if i == 4:  # stop after 5 keys
        break


In [None]:
# Define the pickle file name (same as the original file you used before)
pickle_path = 'efficientnet_b7_front_masked_features.pkl'

# Open the pickle file in binary write mode to overwrite it with the updated dictionary
with open(pickle_path, 'wb') as f:
    # Save (serialize) the updated 'features' dictionary into the pickle file
    pickle.dump(features, f)

# Print a confirmation message showing how many entries were saved and where
print(f"Updated features dictionary with {len(features)} entries saved to: {pickle_path}")

In [None]:
len(features)

In [None]:
feature_vector.shape

Step 8 - Comapre file names and dictionary keys

In [None]:
import os  # Import os to work with the filesystem (list files, join paths, etc.)

# ---------------------------------------------
# 1. Collect image file names from the directory
# ---------------------------------------------

# Define which file extensions you consider as images
valid_extensions = {'.png', '.jpg', '.jpeg', '.bmp', '.gif'}

# Create an empty list to store image file names found in the folder
image_files_in_dir = []

# Loop through every entry in the directory
for name in os.listdir(directory):
    # Build the full path to this entry
    full_path = os.path.join(directory, name)
    
    # Check that this is a file (not a subfolder) and has a valid image extension
    if os.path.isfile(full_path) and os.path.splitext(name)[1].lower() in valid_extensions:
        # If so, add just the file name (e.g., "001f3a8....png") to our list
        image_files_in_dir.append(name)

# Convert the list of image files and dictionary keys to sets for comparison
files_set = set(image_files_in_dir)   # Set of file names in the folder
keys_set = set(features.keys())       # Set of keys in the features dictionary

# ---------------------------------------------
# 2. Compare sets
# ---------------------------------------------

# Files that exist in the folder but NOT in the dictionary keys
files_not_in_dict = files_set - keys_set

# Keys that exist in the dictionary but NOT as files in the folder
keys_not_in_folder = keys_set - files_set

# ---------------------------------------------
# 3. Print summary
# ---------------------------------------------

print(f"Number of image files in directory: {len(files_set)}")
print(f"Number of keys in features dictionary: {len(keys_set)}\n")

# Check if everything matches perfectly
if not files_not_in_dict and not keys_not_in_folder:
    print("âœ… All image filenames and dictionary keys MATCH exactly.")
else:
    print("âš  Some mismatches were found:\n")
    
    # Report files in folder but not in dictionary
    if files_not_in_dict:
        print(f"Files in directory but NOT in dictionary ({len(files_not_in_dict)}):")
        # Print just a few examples to avoid huge output
        for i, name in enumerate(sorted(files_not_in_dict)):
            print("  -", name)
            if i == 9:  # stop after 10 items
                break
        print()
    
    # Report keys in dictionary but not in folder
    if keys_not_in_folder:
        print(f"Keys in dictionary but NO corresponding file in directory ({len(keys_not_in_folder)}):")
        for i, name in enumerate(sorted(keys_not_in_folder)):
            print("  -", name)
            if i == 9:  # stop after 10 items
                break
