In [1]:
# Training the actual model

In [2]:
import numpy as np
import pandas as pd
import os
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report, accuracy_score
from sklearn.model_selection import train_test_split
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow import keras
from skimage.feature import hog

import cv2


2025-03-18 08:59:28.947699: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1742302768.965466  808873 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1742302768.970962  808873 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-03-18 08:59:28.988634: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [3]:
# paths for images
train_path = 'stanford-cars-dataset/cars_train/cars_train/'
test_path = 'stanford-cars-dataset/cars_test/cars_test/'

In [22]:
# Import the labels into a dataframe
path_labels = 'stanford-cars-dataset/stanford_cars_with_class_names.xlsx'

if os.path.exists(path_labels):
    df_labels = pd.read_excel(path_labels)

    df_labels = df_labels.drop('Unnamed: 0', axis=1)
    df_labels = df_labels.rename(columns={'ture_class_name': 'true_class_name'})
df_labels

Unnamed: 0,x1,y1,x2,y2,class,true_class_name,image
0,39,116,569,375,14,Audi TTS Coupe 2012,00001.jpg
1,36,116,868,587,3,Acura TL Sedan 2012,00002.jpg
2,85,109,601,381,91,Dodge Dakota Club Cab 2007,00003.jpg
3,621,393,1484,1096,134,Hyundai Sonata Hybrid Sedan 2012,00004.jpg
4,14,36,133,99,106,Ford F-450 Super Duty Crew Cab 2012,00005.jpg
...,...,...,...,...,...,...,...
8139,3,44,423,336,78,Chrysler Town and Country Minivan 2012,08140.jpg
8140,138,150,706,523,196,smart fortwo Convertible 2012,08141.jpg
8141,26,246,660,449,163,Mercedes-Benz SL-Class Coupe 2009,08142.jpg
8142,78,526,1489,908,112,Ford GT Coupe 2006,08143.jpg


In [5]:
y = df_labels['class']
image_filenames = df_labels['image']

In [6]:
# Add the path to the image
image_filenames = train_path + image_filenames

In [7]:
# Get an actual image objects
target_size = (320, 240)  # TODO: set target_size how we want it to be.
images = []
i = 0
for filename in image_filenames:
    # Read the image
    img = cv2.imread(filename)
    # Convert to RGB (OpenCV reads images in BGR format)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    # TODO: apply the below tomorrow
    img = cv2.resize(img, target_size)  # Resize all images to the same dimensions
    # add to list
    images.append(img)

    # making sure all images get processed
    if i % 1000 == 0:
        print(i)
    i += 1

0
1000
2000
3000
4000
5000
6000
7000
8000


In [8]:
# Extracts HOG (Histogram of Oriented Gradients) features form a list of images
def extract_hog_features(images):
    hog_features = []

    i = 0
    for image in images:
        fd = hog(image, orientations=9, pixels_per_cell=(8, 8), cells_per_block=(2, 2), channel_axis=-1)
        # print(f"Image {i} shape: {image.shape}, HOG feature length: {len(fd)}")
        hog_features.append(fd)

        # making sure all images get processed
        if i % 500 == 0:
            print(i)
        i += 1
    return np.array(hog_features)

In [9]:
# Get features
features = extract_hog_features(images)

0
500
1000
1500
2000
2500
3000
3500
4000
4500
5000
5500
6000
6500
7000
7500
8000


In [10]:
features

array([[0.29228798, 0.3008004 , 0.2193414 , ..., 0.        , 0.        ,
        0.        ],
       [0.03840699, 0.04439234, 0.02844903, ..., 0.1134953 , 0.19899085,
        0.11331112],
       [0.27152548, 0.03324036, 0.04905377, ..., 0.05895387, 0.19980109,
        0.06175043],
       ...,
       [0.23224188, 0.11655216, 0.06693488, ..., 0.032883  , 0.00927248,
        0.00809374],
       [0.23231466, 0.06109139, 0.16978195, ..., 0.08451966, 0.0970991 ,
        0.31364249],
       [0.14970172, 0.09845389, 0.        , ..., 0.04628303, 0.09684546,
        0.16841381]])

In [11]:
# Normalize features
scaler = StandardScaler()
normalized_features = scaler.fit_transform(features)

In [12]:
X_train, X_test, y_train, y_test = train_test_split(normalized_features, y, test_size=0.2, random_state=1)

In [13]:
# Train a simple SVC
def train_svc(X_train, y_train):
    # Create an SVC model
    svc = SVC(kernel='rbf', probability=True)
    
    # Train the model
    svc.fit(X_train, y_train)
    
    return svc

In [14]:
# Train a neural network to run on the data
def train_nn(X_train, y_train, input_dim=40716):
    num_classes = len(np.unique(y_train))
    model = Sequential([
        # Input layer
        Dense(input_dim, activation='relu'),
        BatchNormalization(),
        Dropout(0.05),
        
        # Hidden layers
        Dense(16384, activation='relu'),
        BatchNormalization(),
        Dropout(0.05),
        
        Dense(8192, activation='relu'),
        BatchNormalization(),
        Dropout(0.05),

        Dense(8192, activation='relu'),
        BatchNormalization(),
        Dropout(0.05),

        Dense(4096, activation='relu'),
        BatchNormalization(),
        Dropout(0.05),

        Dense(2048, activation='relu'),
        BatchNormalization(),
        Dropout(0.1),

        Dense(1024, activation='relu'),
        BatchNormalization(),
        Dropout(0.1),
        
        # Output layer
        Dense(num_classes, activation='softmax')
    ])
    
    # Compile the model
    optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)

    model.compile(
        optimizer=optimizer,
        loss='categorical_crossentropy',
        metrics=['accuracy']
    )

    return model

In [15]:
def evaluate_model(model, X_test, y_test):
    # Make predictions
    y_pred_proba = model.predict(X_test)
    
    # Convert probabilities to class labels
    if len(y_pred_proba.shape) > 1 and y_pred_proba.shape[1] > 1:
        # For multi-class classification
        y_pred = np.argmax(y_pred_proba, axis=1)
        
        # If y_test is one-hot encoded, convert it back to class indices
        if len(y_test.shape) > 1 and y_test.shape[1] > 1:
            y_test_classes = np.argmax(y_test, axis=1)
        else:
            y_test_classes = y_test
    else:
        # For binary classification
        y_pred = (y_pred_proba > 0.5).astype(int).flatten()
        y_test_classes = y_test
    
    # Calculate accuracy
    accuracy = accuracy_score(y_test_classes, y_pred)
    print(f"Accuracy: {accuracy:.4f}")
    
    # Detailed classification report
    print("\nClassification Report:")
    print(classification_report(y_test_classes, y_pred))
    
    return y_pred

In [16]:
nn_model = train_nn(X_train, y_train, input_dim=40716)
evaluate_model(nn_model, X_test, y_test)

2025-03-18 09:05:08.066108: E external/local_xla/xla/stream_executor/cuda/cuda_driver.cc:152] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: UNKNOWN ERROR (303)


[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m71s[0m 428ms/step
Accuracy: 0.0049

Classification Report:
              precision    recall  f1-score   support

           1       0.00      0.00      0.00        13
           2       0.00      0.00      0.00         9
           3       0.00      0.00      0.00         9
           4       0.00      0.00      0.00         9
           5       0.00      0.00      0.00         9
           6       0.00      0.00      0.00         6
           7       0.00      0.00      0.00        12
           8       0.00      0.00      0.00        14
           9       0.00      0.00      0.00         5
          10       0.00      0.00      0.00        12
          11       0.00      0.00      0.00         9
          12       0.00      0.00      0.00         9
          13       0.00      0.00      0.00        10
          14       0.00      0.00      0.00         4
          15       0.00      0.00      0.00         8
          16      

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


array([138,  48,  48, ...,  48,  48,  48])

In [17]:
evaluate_model(nn_model, X_train, y_train)

[1m204/204[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m87s[0m 426ms/step
Accuracy: 0.0048

Classification Report:
              precision    recall  f1-score   support

           1       0.00      0.00      0.00        32
           2       0.00      0.00      0.00        23
           3       0.00      0.00      0.00        34
           4       0.00      0.00      0.00        33
           5       0.00      0.00      0.00        32
           6       0.00      0.00      0.00        39
           7       0.00      0.00      0.00        27
           8       0.00      0.00      0.00        31
           9       0.00      0.00      0.00        36
          10       0.00      0.00      0.00        21
          11       0.00      0.00      0.00        29
          12       0.00      0.00      0.00        28
          13       0.00      0.00      0.00        31
          14       0.00      0.00      0.00        39
          15       0.00      0.00      0.00        35
          16    

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


array([189,  48,  48, ...,  48,  48,  59])

In [18]:
X_train

array([[-1.30690867, -0.8215202 , -0.93583995, ..., -1.19493429,
        -1.11189386, -0.95318954],
       [-1.30690867, -0.8215202 , -0.93583995, ...,  0.93108574,
         0.20452542,  0.16514827],
       [-0.29954196, -0.2825284 , -0.67266417, ..., -1.19493429,
        -0.78883631, -0.25608938],
       ...,
       [-0.87778408,  0.3090356 ,  0.0641984 , ..., -1.07885411,
        -1.07761573, -0.95318954],
       [-0.88957855, -0.8215202 , -0.76363142, ...,  1.40138627,
         1.79747316, -0.20899359],
       [-0.26135943, -0.6192863 , -0.62833609, ..., -0.3826643 ,
         0.88243287, -0.78622903]])

In [19]:
X_train.shape

(6515, 40716)