In [1]:
from tensorflow.keras.models import Model
from tensorflow.keras.applications import VGG16
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from PIL import Image
import cv2
import os
import numpy as np
import pandas as pd

dataset_dir = "../StanfordCarDataset"





Preprocessing/Resizing the images 

In [2]:
def normalize_image(image):
  image_array = np.array(image)
  normalized_array = image_array / 255.0
  return Image.fromarray((normalized_array * 255).astype(np.uint8))

def denoise_image(image):
  # Apply median filtering (adjust kernel size as needed)
  image = cv2.medianBlur(image, ksize=5)

  # Normalize the image
  return normalize_image(image)

def preprocess_image(image):
  return normalize_image(image)
  # no need for denoising now
  # denoise_image(image)

Preprocessing images by resizing and normalizing them.

Saving the new images in new train and test directories.

In [3]:
train_dataset_folder = f"{dataset_dir}/cars_train/cars_train"
train_output_folder = f"{dataset_dir}/resized_dataset_train"

test_dataset_folder = f"{dataset_dir}/cars_test/cars_test"
test_output_folder = f"{dataset_dir}/resized_dataset_test"

In [6]:
def resize_images(dataset_folder, output_folder, target_size, force_update=False):
    if os.path.isdir(output_folder) and len(os.listdir(output_folder)) != 0 and not force_update:
        return
    # Loop through all images in the dataset folder
    for filename in os.listdir(dataset_folder):
        # Skiping non-image files in case any
        if not filename.lower().endswith((".jpg", ".jpeg", ".png")):
            continue

        image_path = os.path.join(dataset_folder, filename)
        image = Image.open(image_path)

        resized_image = image.resize(target_size, Image.LANCZOS)

        if image.mode != 'RGB':
            resized_image = resized_image.convert('RGB')
            
        resized_image = preprocess_image(resized_image)

        # Saving the resized image
        output_path = os.path.join(output_folder, filename)
        resized_image.save(output_path)

os.makedirs(train_output_folder, exist_ok=True)
os.makedirs(test_output_folder, exist_ok=True)

resize_images(train_dataset_folder, train_output_folder, target_size=(128, 128), force_update=True)
resize_images(test_dataset_folder, test_output_folder, target_size=(128, 128), force_update=True)

In [5]:
def build_image_features_extraction_model():
    base_model = VGG16(weights='imagenet', include_top=True)
    # Retrieve the last dense layer name
    last_dense_layer_name = base_model.layers[-2].name
    # return Model(inputs=base_model.input, outputs=base_model.get_layer('block5_pool').output)
    # Create a new model for feature extraction from the last dense layer
    return Model(inputs=base_model.input, outputs=base_model.get_layer(last_dense_layer_name).output)

In [7]:
def extract_image_features_as_list(feature_extractor, dir_path, max_iterations=-1):
    #train_output_folder
    features_list = []
    count = 0
    for filename in os.listdir(dir_path):
        if filename.endswith(('.jpg', '.jpeg', '.png')):
            # Load and preprocess the image
            img_path = os.path.join(dir_path, filename)
            img = Image.open(img_path)
            img_array = np.array(img)
            # add batch dimensions
            img_array = np.expand_dims(img_array, axis=0)
            img_array = img_array / 255.0

            # Extract features from the image
            features = feature_extractor.predict(img_array)

            # Append the features to the list
            features_list.append(features)

            count += 1
            if max_iterations > 0 and count >= max_iterations:
                break

    return features_list

In [8]:
def extract_images_features_into_csv(feature_extractor, image_dir, csv_file, max_files):
    # Iterate over the images in the directory
    features_list = extract_image_features_as_list(feature_extractor, image_dir, max_iterations=max_files)

    # features_array = np.vstack(features_list)
    features_list_flattened = [features.reshape(features.shape[0], -1) for features in features_list]

    # Convert the numpy array to a pandas DataFrame
    features_df = pd.DataFrame(np.concatenate(features_list_flattened, axis=0))

    # Save the DataFrame to a CSV file
    features_df.to_csv(csv_file, index=False)
    
    return features_df

In [10]:

# building the model
feature_extractor = build_image_features_extraction_model()
# extracting features from the train data and saving them to a csv file
features_df = extract_images_features_into_csv(feature_extractor, train_output_folder, 'extracted_features.csv', 10)
features_df.head()



Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,4086,4087,4088,4089,4090,4091,4092,4093,4094,4095
0,0.0,0.0,1.490126,0.405198,0.855917,0.237567,0.0,0.0,0.778185,0.0,...,0.0,1.067557,1.017491,0.103542,0.397002,0.0,0.280751,0.0,0.0,0.779927
1,0.0,0.0,1.449023,0.272045,0.884024,0.290615,0.0,0.0,0.730201,0.0,...,0.0,1.24632,0.788255,0.192335,0.141271,0.061687,0.466353,0.0,0.0,0.843939
2,0.0,0.0,1.571351,0.135814,0.808737,0.348193,0.0,0.0,0.52038,0.0,...,0.0,1.324136,0.898207,0.257716,0.534768,0.0,0.599359,0.0,0.0,0.77528
3,0.0,0.0,1.582169,0.0,0.814979,0.345997,0.0,0.0,0.590115,0.0,...,0.0,1.507932,0.746466,0.587758,0.589106,0.0,0.80384,0.0,0.0,1.180781
4,0.0,0.0,1.564512,0.329268,0.733793,0.297003,0.0,0.0,0.459763,0.0,...,0.0,1.384885,1.095157,0.195356,0.498631,0.0,0.336337,0.0,0.0,0.768633


In [11]:
test_features_df = extract_images_features_into_csv(feature_extractor, train_output_folder, 'extracted_test_features.csv', 10)
test_features_df.head()



Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,4086,4087,4088,4089,4090,4091,4092,4093,4094,4095
0,0.0,0.0,1.490126,0.405198,0.855917,0.237567,0.0,0.0,0.778185,0.0,...,0.0,1.067557,1.017491,0.103542,0.397002,0.0,0.280751,0.0,0.0,0.779927
1,0.0,0.0,1.449023,0.272045,0.884024,0.290615,0.0,0.0,0.730201,0.0,...,0.0,1.24632,0.788255,0.192335,0.141271,0.061687,0.466353,0.0,0.0,0.843939
2,0.0,0.0,1.571351,0.135814,0.808737,0.348193,0.0,0.0,0.52038,0.0,...,0.0,1.324136,0.898207,0.257716,0.534768,0.0,0.599359,0.0,0.0,0.77528
3,0.0,0.0,1.582169,0.0,0.814979,0.345997,0.0,0.0,0.590115,0.0,...,0.0,1.507932,0.746466,0.587758,0.589106,0.0,0.80384,0.0,0.0,1.180781
4,0.0,0.0,1.564512,0.329268,0.733793,0.297003,0.0,0.0,0.459763,0.0,...,0.0,1.384885,1.095157,0.195356,0.498631,0.0,0.336337,0.0,0.0,0.768633
