In [1]:
# ---------------------------------------------
# STEP 1: Import Required Libraries
# ---------------------------------------------
import os
from PIL import Image
import numpy as np
from sklearn.model_selection import train_test_split


In [7]:
# ---------------------------------------------
# STEP 2: Set Path to RAW Image Dataset Folder
# ---------------------------------------------
dataset_path = "C:\\Users\\dell\\Downloads\\Cars Dataset\\train\\Rolls Royce"   # change if your folder name is different

# Get all image file names
image_files = os.listdir(dataset_path)
print("Total raw images found:", len(image_files))


Total raw images found: 311


In [8]:
# ---------------------------------------------
# STEP 3: Load RAW Images Using PIL
# ---------------------------------------------
raw_images = []

for file in image_files:
    img_path = os.path.join(dataset_path, file)

    try:
        img = Image.open(img_path).convert("RGB")  # Convert to RGB
        raw_images.append(img)
    except:
        print("Skipped corrupted file:", file)

print("Total images loaded successfully:", len(raw_images))


Total images loaded successfully: 311


In [9]:
# ---------------------------------------------
# STEP 4: Resize Images to Fixed Size
# ---------------------------------------------
IMG_SIZE = (128, 128)  # you can change size if needed

resized_images = [img.resize(IMG_SIZE) for img in raw_images]

print("All images resized to:", IMG_SIZE)


All images resized to: (128, 128)


In [10]:
# ---------------------------------------------
# STEP 5: Convert Images to NumPy Arrays & Reshape
# ---------------------------------------------
X = np.array([np.array(img) for img in resized_images])

# Reshape → (num_samples, height, width, channels)
X = X.reshape(len(X), 128, 128, 3)

print("Final dataset shape:", X.shape)


Final dataset shape: (311, 128, 128, 3)


In [11]:
# ---------------------------------------------
# STEP 6: Create Dummy Labels
# ---------------------------------------------
y = np.zeros(len(X))   # All zeros (single class)

print("Labels shape:", y.shape)


Labels shape: (311,)


In [12]:
# ---------------------------------------------
# STEP 7: Normalize Images (0–255 → 0–1)
# ---------------------------------------------
X = X / 255.0

print("Normalization completed!")


Normalization completed!


In [13]:
# ---------------------------------------------
# STEP 8: Split Dataset into Train & Test Sets
# ---------------------------------------------
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

print("Training data:", X_train.shape)
print("Testing data:", X_test.shape)


Training data: (248, 128, 128, 3)
Testing data: (63, 128, 128, 3)


In [14]:
# ---------------------------------------------
# STEP 9: Save Preprocessed Dataset to Disk
# ---------------------------------------------
np.save("X_cars.npy", X)
np.save("y_cars.npy", y)

print("Saved X_cars.npy and y_cars.npy")


Saved X_cars.npy and y_cars.npy


In [15]:
# ---------------------------------------------
# STEP 10: Load Saved NumPy Files Again
# ---------------------------------------------
X_loaded = np.load("X_cars.npy")
y_loaded = np.load("y_cars.npy")

print("Reloaded dataset shapes:")
print("X_loaded:", X_loaded.shape)
print("y_loaded:", y_loaded.shape)


Reloaded dataset shapes:
X_loaded: (311, 128, 128, 3)
y_loaded: (311,)
