In [11]:
from tensorflow.keras.models import Model
from tensorflow.keras.applications import VGG16
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from PIL import Image
import cv2
import os
import numpy as np
import pandas as pd

dataset_dir = "../StanfordCarDataset"


Preprocessing/Resizing the images 

In [12]:
def normalize_image(image):
  image_array = np.array(image)
  normalized_array = image_array / 255.0
  return Image.fromarray((normalized_array * 255).astype(np.uint8))

def denoise_image(image):
  # Apply median filtering (adjust kernel size as needed)
  image = cv2.medianBlur(image, ksize=5)

  # Normalize the image
  return normalize_image(image)

def preprocess_image(image):
  return normalize_image(image)
  # no need for denoising now
  # denoise_image(image)

Preprocessing images by resizing and normalizing them.

Saving the new images in new train and test directories.

In [13]:
train_dataset_folder = f"{dataset_dir}/cars_train/cars_train"
train_output_folder = f"{dataset_dir}/resized_dataset_train"

test_dataset_folder = f"{dataset_dir}/cars_test/cars_test"
test_output_folder = f"{dataset_dir}/resized_dataset_test"

In [15]:
def resize_images(dataset_folder, output_folder, target_size, force_update=False):
    if os.path.isdir(output_folder) and len(os.listdir(output_folder)) != 0 and not force_update:
        return
    # Loop through all images in the dataset folder
    for filename in os.listdir(dataset_folder):
        # Skiping non-image files in case any
        if not filename.lower().endswith((".jpg", ".jpeg", ".png")):
            continue

        image_path = os.path.join(dataset_folder, filename)
        image = Image.open(image_path)

        resized_image = image.resize(target_size, Image.LANCZOS)

        if image.mode != 'RGB':
            resized_image = resized_image.convert('RGB')
            
        resized_image = preprocess_image(resized_image)

        # Saving the resized image
        output_path = os.path.join(output_folder, filename)
        resized_image.save(output_path)

os.makedirs(train_output_folder, exist_ok=True)
os.makedirs(test_output_folder, exist_ok=True)

resize_images(train_dataset_folder, train_output_folder, target_size=(224, 224))
resize_images(test_dataset_folder, test_output_folder, target_size=(224, 224))

In [16]:
def build_image_features_extraction_model():
    base_model = VGG16(weights='imagenet', include_top=True)
    # Retrieve the last dense layer name
    last_dense_layer_name = base_model.layers[-2].name
    # return Model(inputs=base_model.input, outputs=base_model.get_layer('block5_pool').output)
    # Create a new model for feature extraction from the last dense layer
    return Model(inputs=base_model.input, outputs=base_model.get_layer(last_dense_layer_name).output)

In [17]:
def extract_image_features_as_list(feature_extractor, dir_path, max_iterations=-1):
    #train_output_folder
    features_list = []
    count = 0
    for filename in os.listdir(dir_path):
        if filename.endswith(('.jpg', '.jpeg', '.png')):
            # Load and preprocess the image
            img_path = os.path.join(dir_path, filename)
            img = Image.open(img_path)
            img_array = np.array(img)
            # add batch dimensions
            img_array = np.expand_dims(img_array, axis=0)
            img_array = img_array / 255.0

            # Extract features from the image
            features = feature_extractor.predict(img_array)

            # Append the features to the list
            features_list.append(features)

            count += 1
            if max_iterations > 0 and count >= max_iterations:
                break

    return features_list

In [18]:

feature_extractor = build_image_features_extraction_model()

# Iterate over the images in the directory
features_list = extract_image_features_as_list(feature_extractor, train_output_folder, max_iterations=10)

# features_array = np.vstack(features_list)
features_list_flattened = [features.reshape(features.shape[0], -1) for features in features_list]

# Convert the numpy array to a pandas DataFrame
features_df = pd.DataFrame(np.concatenate(features_list_flattened, axis=0))

# Save the DataFrame to a CSV file
features_df.to_csv('extracted_features.csv', index=False)

ValueError: in user code:

    File "c:\Users\toufic.f\AppData\Local\Programs\Python\Python39\lib\site-packages\keras\src\engine\training.py", line 2440, in predict_function  *
        return step_function(self, iterator)
    File "c:\Users\toufic.f\AppData\Local\Programs\Python\Python39\lib\site-packages\keras\src\engine\training.py", line 2425, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "c:\Users\toufic.f\AppData\Local\Programs\Python\Python39\lib\site-packages\keras\src\engine\training.py", line 2413, in run_step  **
        outputs = model.predict_step(data)
    File "c:\Users\toufic.f\AppData\Local\Programs\Python\Python39\lib\site-packages\keras\src\engine\training.py", line 2381, in predict_step
        return self(x, training=False)
    File "c:\Users\toufic.f\AppData\Local\Programs\Python\Python39\lib\site-packages\keras\src\utils\traceback_utils.py", line 70, in error_handler
        raise e.with_traceback(filtered_tb) from None
    File "c:\Users\toufic.f\AppData\Local\Programs\Python\Python39\lib\site-packages\keras\src\engine\input_spec.py", line 298, in assert_input_compatibility
        raise ValueError(

    ValueError: Input 0 of layer "model_7" is incompatible with the layer: expected shape=(None, 224, 224, 3), found shape=(None, 400, 600, 3)
