# **Imports**
---

In [None]:
# Common
import os
import numpy as np
import tensorflow as tf

# Data
import imgaug.augmenters as iaa
from sklearn.decomposition import PCA
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, recall_score, precision_score, f1_score, accuracy_score

# Model
from sklearn.svm import SVC
from keras.models import Sequential
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense

# Visualization
import matplotlib.pyplot as plt

# **Data Read**
---

In [None]:
# Function to load images from directories
def load_images_from_folder(folder, flatten):
    images = []
    labels = []
    for filename in os.listdir(folder):
        img_path = os.path.join(folder, filename)
        img = tf.keras.preprocessing.image.load_img(img_path, target_size=(64,64), color_mode='grayscale')
        img_array = tf.keras.preprocessing.image.img_to_array(img)
        # Flatten image into 1D array
        if flatten:
            images.append(img_array.flatten())
        else:
            images.append(img_array)
        labels.append(folder)
    return np.array(images), labels

# Directory paths for bikes and cars images
bikes_folder = 'Car-Bike-Dataset/Bike'
cars_folder = 'Car-Bike-Dataset/Car'

bike_images, bike_labels = load_images_from_folder(bikes_folder, False)
car_images, car_labels = load_images_from_folder(cars_folder, False)

all_images = np.vstack((bike_images, car_images))
all_labels = np.array(bike_labels + car_labels)

bike_images_flat, bike_labels_flat = load_images_from_folder(bikes_folder, True)
car_images_flat, car_labels_flat = load_images_from_folder(cars_folder, True)

all_images_flat = np.vstack((bike_images_flat, car_images_flat))
all_labels_flat = np.array(bike_labels_flat + car_labels_flat)

In [None]:
# Perform train-test split
X_train, X_test, y_train, y_test = train_test_split(all_images, all_labels, test_size=0.2, random_state=44)
X_train_flat, X_test_flat, y_train_flat, y_test_flat = train_test_split(all_images_flat, all_labels_flat, test_size=0.2, random_state=44)

# Perform validation split
X_val, X_train = np.split(X_train, [800])
y_val, y_train = np.split(y_train, [800])

X_val_flat, X_train_flat = np.split(X_train_flat, [800])
y_val_flat, y_train_flat = np.split(y_train_flat, [800])

# Apply fit_transform to labels to make them numeric
lb = LabelEncoder()
y_train = lb.fit_transform(y_train)
y_val = lb.fit_transform(y_val)
y_test = lb.fit_transform(y_test)

Image Augmentation
---

In [None]:
# Function for image augmentation using imgaug
def apply_image_augmentation(images, labels):
    # Define an augmentation pipeline
    seq = iaa.Sequential([
        iaa.Fliplr(0.5),  # horizontal flips
        # iaa.Crop(percent=(0, 0.1)),  # random crops
        iaa.Sometimes(0.5, iaa.GaussianBlur(sigma=(0, 0.5))),  # random Gaussian blur
        iaa.Sometimes(0.5, iaa.AdditiveGaussianNoise(scale=(0, 0.05*255))),  # random noise
    ], random_order=True)

    augmented_images = seq(images=images)
    augmented_labels = labels

    return augmented_images, augmented_labels

# Apply image augmentation to training data
X_train_aug, y_train_aug = apply_image_augmentation(X_train, y_train)
X_train_aug = np.vstack((X_train_aug, X_train))
y_train_aug = np.hstack((y_train_aug, y_train))

X_train_aug_flat, y_train_aug_flat = apply_image_augmentation(X_train_flat, y_train)
X_train_aug_flat = np.vstack((X_train_aug_flat, X_train_flat))
y_train_aug_flat = np.hstack((y_train_aug_flat, y_train))

# **Parameter Optimization**
---

Optimization For The kNN Model
---

Parameter Optimization

In [None]:
k_values = range(5, 39, 2)
knn_accuracies = []
knn_aug_accuracies = []
for k_value in k_values:
    # Without Augmentation
    knn = KNeighborsClassifier(n_neighbors=k_value)
    knn.fit(X_train_flat, y_train)
    accuracy = knn.score(X_val_flat, y_val)
    knn_accuracies.append(accuracy)

    # With Augmentation
    knn = KNeighborsClassifier(n_neighbors=k_value)
    knn.fit(X_train_aug_flat, y_train_aug)
    accuracy = knn.score(X_val_flat, y_val)
    knn_aug_accuracies.append(accuracy)

# Best Parameters
knn_best_param = k_values[np.argmax(knn_accuracies)]
knn_aug_best_param = k_values[np.argmax(knn_aug_accuracies)]

Visualization of the Accuracies

In [None]:
plt.figure(figsize=(10, 6))
plt.plot(k_values, knn_aug_accuracies, marker="o", label="Augmented")
plt.plot(k_values, knn_accuracies, marker="o", label="Non-Augmented")
plt.xlabel("Number of Neighbors")
plt.ylabel("Accuracy")
plt.title("Number of Neighbors vs Accuracy (kNN)")
plt.xticks(k_values)
plt.grid(True)
plt.legend()
plt.show()

Evaluation Metrics

In [None]:
knn_best_param = 5
knn_aug_best_param = 9 # DELETE

def print_metrics_for_k(k_value, X_train, y_train, X_val, y_val):
    knn = KNeighborsClassifier(n_neighbors=k_value)
    knn.fit(X_train, y_train)
    y_pred = knn.predict(X_val)

    # Confusion Matrix
    cm = confusion_matrix(y_val, y_pred)
    print(f"Confusion Matrix for k={k_value}:\n{cm}\n")

    # Precision, Recall, and F1 Score
    recall = recall_score(y_val, y_pred, average='macro')
    precision = precision_score(y_val, y_pred, average='macro')
    f1 = f1_score(y_val, y_pred, average='macro')

    print(f"Metrics for k={k_value}: Recall: {recall}, Precision: {precision}, F1 Score: {f1}\n")

# Print metrics for the best non-augmented model
print("Metrics for Non-Augmented Model")
print_metrics_for_k(knn_best_param, X_train_flat, y_train, X_val_flat, y_val)

# Print metrics for the best augmented model
print("Metrics for Augmented Model")
print_metrics_for_k(knn_aug_best_param, X_train_aug_flat, y_train_aug, X_val_flat, y_val)

---
Optimization For The SVM Model
---

Parameter Optimization

In [None]:
kernel_types = ["linear", "poly", "rbf", "sigmoid"]
svm_accuracies = []
svm_aug_accuracies = []
for kernel_type in kernel_types:
    # Without Augmentation
    svm = SVC(kernel=kernel_type)
    svm.fit(X_train_flat, y_train)
    y_pred = svm.predict(X_val_flat)
    accuracy = accuracy_score(y_val, y_pred)
    svm_accuracies.append(accuracy)

    # With Augmentation
    svm_aug = SVC(kernel=kernel_type)
    svm_aug.fit(X_train_aug_flat, y_train_aug)
    y_pred = svm_aug.predict(X_val_flat)
    accuracy_aug = accuracy_score(y_val, y_pred)
    svm_aug_accuracies.append(accuracy_aug)

# Best Parameters
svm_best_param = kernel_types[np.argmax(svm_accuracies)]
svm_aug_best_param = kernel_types[np.argmax(svm_aug_accuracies)]

Visualization of the Accuracies

In [None]:
plt.figure(figsize=(10, 6))
plt.plot(kernel_types, svm_aug_accuracies, marker="o", label="Augmented")
plt.plot(kernel_types, svm_accuracies, marker="o", label="Non-Augmented")
plt.xlabel("Kernel Types")
plt.ylabel("Accuracy")
plt.title("Kernel Type vs Accuracy (SVM)")
plt.xticks(kernel_types)
plt.grid(True)
plt.legend()
plt.show()

Evaluation Metrics

---
Optimization For The Random Forest Model
---

Parameter Optimization

In [None]:
n_estimators = [10, 20, 50, 100, 150]
rf_accuracies = []
rf_aug_accuracies = []
for n_estimator in n_estimators:
    # Without Augmentation
    rf = RandomForestClassifier(random_state=42, n_estimators=n_estimator)
    rf.fit(X_train_flat, y_train)
    y_pred = rf.predict(X_val_flat)
    accuracy = accuracy_score(y_val, y_pred)
    rf_accuracies.append(accuracy)

    # With Augmentation
    rf = RandomForestClassifier(random_state=42, n_estimators=n_estimator)
    rf.fit(X_train_aug_flat, y_train_aug)
    y_pred = rf.predict(X_val_flat)
    accuracy = accuracy_score(y_val, y_pred)
    rf_aug_accuracies.append(accuracy)

# Best Parameters
rf_best_param = n_estimators[np.argmax(rf_accuracies)]
rf_aug_best_param = n_estimators[np.argmax(rf_aug_accuracies)]

Visualization of the Accuracies

In [None]:
plt.figure(figsize=(10, 6))
plt.plot(n_estimators, rf_aug_accuracies, marker="o", label="Augmented")
plt.plot(n_estimators, rf_accuracies, marker="o", label="Non-Augmented")
plt.xlabel("n_estimators")
plt.ylabel("Accuracy")
plt.title("n_estimators vs Accuracy (Random Forest)")
plt.xticks(n_estimators)
plt.grid(True)
plt.legend()
plt.show()

Evaluation Metrics

---
Optimization For The CNN Model
---

Models to Apply Layer Optimization

In [None]:
cnn_1 = Sequential([
    Conv2D(32, (3, 3), activation="relu", input_shape=(64, 64, 1)),
    MaxPooling2D((2, 2)),
    Flatten(),
    Dense(1, activation="sigmoid")
])

cnn_2 = Sequential([
    Conv2D(32, (3, 3), activation="relu", input_shape=(64, 64, 1)),
    MaxPooling2D((2, 2)),
    Conv2D(64, (3, 3), activation="relu"),
    MaxPooling2D((2, 2)),
    Flatten(),
    Dense(1, activation="sigmoid")
])

cnn_3 = Sequential([
    Conv2D(32, (3, 3), activation="relu", input_shape=(64, 64, 1)),
    MaxPooling2D((2, 2)),
    Conv2D(64, (3, 3), activation="relu"),
    MaxPooling2D((2, 2)),
    Conv2D(128, (3, 3), activation="relu"),
    MaxPooling2D((2, 2)),
    Flatten(),
    Dense(1, activation="sigmoid")
])

cnn_4 = Sequential([
    Conv2D(32, (3, 3), activation="relu", input_shape=(64, 64, 1)),
    MaxPooling2D((2, 2)),
    Conv2D(64, (3, 3), activation="relu"),
    MaxPooling2D((2, 2)),
    Conv2D(128, (3, 3), activation="relu"),
    MaxPooling2D((2, 2)),
    Flatten(),
    Dense(128, activation="relu"),
    Dense(1, activation="sigmoid")
])

Layer Optimization

In [None]:
# Define the models
models = [cnn_1, cnn_2, cnn_3, cnn_4]
model_names = ["CNN 1", "CNN 2", "CNN 3", "CNN 4"]

# Lists to store accuracies for plotting
cnn_accuracies = []
cnn_aug_accuracies = []

# Plot accuracies for each model in a separate graph
for model, model_name in zip(models, model_names):
    # Without augmentation
    print(f"\nTraining {model_name} with non-augmented data:")
    model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])
    model.fit(X_train, y_train, epochs=10, validation_data=(X_val, y_val))
    loss, accuracy = model.evaluate(X_val, y_val)
    cnn_accuracies.append(accuracy)

    # With augmentation
    print(f"\nTraining {model_name} with augmented data:")
    model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])
    model.fit(X_train_aug, y_train_aug, epochs=10, validation_data=(X_val, y_val))
    loss, accuracy = model.evaluate(X_val, y_val)
    cnn_aug_accuracies.append(accuracy)

# Best Models
cnn_best_model = models[np.argmax(cnn_accuracies)]
cnn_aug_best_model = models[np.argmax(cnn_aug_accuracies)]
cnn_best_model_name = model_names[np.argmax(cnn_accuracies)]
cnn_aug_best_model_name = model_names[np.argmax(cnn_aug_accuracies)]

Visualization of the Accuracies

In [None]:
plt.figure(figsize=(10, 6))
plt.plot(model_names, cnn_aug_accuracies, marker="o", label="Augmented")
plt.plot(model_names, cnn_accuracies, marker="o", label="Non-Augmented")
plt.xlabel("CNN Models")
plt.ylabel("Accuracy")
plt.title("CNN Models vs Accuracy")
plt.xticks(model_names)
plt.grid(True)
plt.legend()
plt.show()

Evaluation Metrics

In [None]:
cnn_best_model = cnn_4 # DELETE
cnn_best_model_name = "CNN 4" # DELETE
cnn_aug_best_model = cnn_4 # DELETE
cnn_aug_best_model_name = "CNN 4" # DELETE

def print_metrics_for_model(model, model_name, X_train, y_train, X_val, y_val):
    model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])
    model.fit(X_train, y_train, epochs=10, validation_data=(X_val, y_val), verbose=0)
    y_pred = model.predict(X_val).round()

    # Confusion Matrix
    cm = confusion_matrix(y_val, y_pred)
    print(f"Confusion Matrix for model={model_name}:\n{cm}")

    # Precision, Recall, and F1 Score
    recall = recall_score(y_val, y_pred, average="binary")
    precision = precision_score(y_val, y_pred, average="binary")
    f1 = f1_score(y_val, y_pred, average="binary")

    print(f"Metrics for model={model_name}: Recall: {recall}, Precision: {precision}, F1 Score: {f1}\n")

# Print metrics for the best non-augmented model
print("Metrics for Non-Augmented Model")
print_metrics_for_model(cnn_best_model, cnn_best_model_name, X_train, y_train, X_val, y_val)

# Print metrics for the best augmented model
print("Metrics for Augmented Model")
print_metrics_for_model(cnn_aug_best_model, cnn_aug_best_model_name, X_train_aug, y_train_aug, X_val, y_val)

# **PCA**
---

Apply PCA
---

In [None]:
# Normal
n_components = [1, 50, 100, 250, 500, 1000, 2400]

X_train_flat_pca = []
X_val_flat_pca = []
for components in n_components:
    pca = PCA(n_components=components)
    pca.fit(X_train_flat)
    X_train_flat_pca.append(pca.transform(X_train_flat))
    X_val_flat_pca.append(pca.transform(X_val_flat))

# Augmentation
X_train_aug_flat_pca = []
X_val_aug_flat_pca = []
for components in n_components:
    pca = PCA(n_components=components)
    pca.fit(X_train_aug_flat)
    X_train_aug_flat_pca.append(pca.transform(X_train_aug_flat))
    X_val_aug_flat_pca.append(pca.transform(X_val_flat))


kNN PCA
---

Best kNN model with PCA

In [None]:
knn_accuracies_pca = []
for i in range(len(n_components)):
    knn = KNeighborsClassifier(n_neighbors=knn_best_param)
    knn.fit(X_train_flat_pca[i], y_train)
    accuracy = knn.score(X_val_flat_pca[i], y_val)
    knn_accuracies_pca.append(accuracy)

Best kNN model with augmentation with PCA

In [None]:
knn_aug_accuracies_pca = []
for i in range(len(n_components)):
    knn_aug = KNeighborsClassifier(n_neighbors=knn_aug_best_param)
    knn_aug.fit(X_train_aug_flat_pca[i], y_train_aug)

    accuracy = knn_aug.score(X_val_aug_flat_pca[i], y_val)
    knn_aug_accuracies_pca.append(accuracy)

In [None]:
plt.figure(figsize=(12, 6))
plt.plot(n_components, knn_aug_accuracies_pca, marker="o", label="Augmented")
plt.plot(n_components, knn_accuracies_pca, marker="o", label="Non-Augmented")
plt.xlabel("n_components")
plt.ylabel("Accuracy")
plt.title("n_components vs Accuracy (kNN)")
plt.xticks(n_components, rotation=45)
plt.tight_layout()
plt.grid(True)
plt.legend()
plt.show()

SVM PCA
---

Best SVM model with PCA

In [None]:
svm_accuracies_pca = []
for i in range(len(n_components)):
    svm = SVC(kernel=svm_best_param)
    svm.fit(X_train_flat_pca[i], y_train)
    y_pred = svm.predict(X_val_flat_pca[i])
    accuracy = accuracy_score(y_val, y_pred)
    svm_accuracies_pca.append(accuracy)

Best SVM model with augmentation with PCA

In [None]:
svm_aug_accuracies_pca = []
for i in range(len(n_components)):
    svm_aug = SVC(kernel=svm_aug_best_param)
    svm_aug.fit(X_train_aug_flat_pca[i], y_train_aug)
    y_pred = svm_aug.predict(X_val_aug_flat_pca[i])
    accuracy = accuracy_score(y_val, y_pred)
    svm_aug_accuracies_pca.append(accuracy)

In [None]:
plt.figure(figsize=(12, 6))
plt.plot(n_components, svm_aug_accuracies_pca, marker="o", label="Augmented")
plt.plot(n_components, svm_accuracies_pca, marker="o", label="Non-Augmented")
plt.xlabel("n_components")
plt.ylabel("Accuracy")
plt.title("n_components vs Accuracy (SVM)")
plt.xticks(n_components, rotation=45)
plt.tight_layout()
plt.grid(True)
plt.legend()
plt.show()

Random Forest PCA
---

Best Random Forest with PCA

In [None]:
rf_accuracies_pca = []
for i in range(len(n_components)):
    rf = RandomForestClassifier(random_state=42, n_estimators=rf_best_param)
    rf.fit(X_train_flat_pca[i], y_train)
    y_pred = rf.predict(X_val_flat_pca[i])
    accuracy = accuracy_score(y_val, y_pred)
    rf_accuracies_pca.append(accuracy)

Best Random Forest with Augmentation and PCA

In [None]:
rf_aug_accuracies_pca = []
for i in range(len(n_components)):
    rf_aug = RandomForestClassifier(random_state=42, n_estimators=rf_aug_best_param)
    rf_aug.fit(X_train_aug_flat_pca[i], y_train_aug)
    y_pred = rf_aug.predict(X_val_aug_flat_pca[i])
    accuracy = accuracy_score(y_val, y_pred)
    rf_aug_accuracies_pca.append(accuracy)

In [None]:
plt.figure(figsize=(12, 6))
plt.plot(n_components, rf_aug_accuracies_pca, marker="o", label="Augmented")
plt.plot(n_components, rf_accuracies_pca, marker="o", label="Non-Augmented")
plt.xlabel("n_components")
plt.ylabel("Accuracy")
plt.title("n_components vs Accuracy (Random Forest)")
plt.xticks(n_components, rotation=45)
plt.tight_layout()
plt.grid(True)
plt.legend()
plt.show()

# **Final Evaluation**
---

Combine Train and Validation Sets
---

In [None]:
X_final = np.vstack((X_train, X_val))
y_final = np.hstack((y_train, y_val))

X_final_flat = np.vstack((X_train_flat, X_val_flat))

X_aug_final = np.vstack((X_train_aug, X_val))
X_aug_final_flat = np.vstack((X_train_aug_flat, X_val_flat))
y_final_aug = np.hstack((y_train_aug, y_val))

kNN
---

In [None]:
# Best kNN Model without Augmentation
best_knn = KNeighborsClassifier(n_neighbors=knn_best_param)
best_knn.fit(X_final_flat, y_final)
best_knn_accuracy = best_knn.score(X_test_flat, y_test)

# Best kNN Model with Augmentation
best_knn_aug = KNeighborsClassifier(n_neighbors=knn_best_param)
best_knn_aug.fit(X_aug_final_flat, y_final_aug)
best_knn_aug_accuracy = best_knn_aug.score(X_test_flat, y_test)

SVM
---

In [None]:
# Best SVM Model without Augmentation
best_svm = SVC(kernel=svm_aug_best_param)
best_svm.fit(X_final_flat, y_final)
y_pred = best_svm.predict(X_test_flat)
best_svm_accuracy = accuracy_score(y_test, y_pred)

# Best SVM Model with Augmentation
best_svm_aug = SVC(kernel=svm_aug_best_param)
best_svm_aug.fit(X_aug_final_flat, y_final_aug)
y_pred = best_svm_aug.predict(X_test_flat)
best_svm_aug_accuracy = accuracy_score(y_test, y_pred)

Random Forest
---

In [None]:
# Best Random Forest Model without Augmentation
best_rf = RandomForestClassifier(random_state=42, n_estimators=rf_best_param)
best_rf.fit(X_final_flat, y_final)
y_pred = best_rf.predict(X_test_flat)
best_rf_accuracy = accuracy_score(y_test, y_pred)

# Best Random Forest Model with Augmentation
best_rf_aug = RandomForestClassifier(random_state=42, n_estimators=rf_aug_best_param)
best_rf_aug.fit(X_aug_final_flat, y_final_aug)
y_pred = best_rf_aug.predict(X_test_flat)
best_rf_aug_accuracy = accuracy_score(y_test, y_pred)

CNN
---

In [None]:
# Best CNN Model without Augmentation
model = Sequential([
Conv2D(32, (3, 3), activation="relu", input_shape=(64, 64, 1)),
MaxPooling2D((2, 2)),
Conv2D(64, (3, 3), activation="relu"),
MaxPooling2D((2, 2)),
Conv2D(128, (3, 3), activation="relu"),
MaxPooling2D((2, 2)),
Flatten(),
Dense(128, activation="relu"),
Dense(1, activation="sigmoid")
])

model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])
model.fit(X_final, y_final, epochs=10)
loss, accuracy = model.evaluate(X_test, y_test)
best_cnn_accuracy = accuracy

# Best CNN Model with Augmentation
model_aug = Sequential([
Conv2D(32, (3, 3), activation="relu", input_shape=(64, 64, 1)),
MaxPooling2D((2, 2)),
Conv2D(64, (3, 3), activation="relu"),
MaxPooling2D((2, 2)),
Conv2D(128, (3, 3), activation="relu"),
MaxPooling2D((2, 2)),
Flatten(),
Dense(128, activation="relu"),
Dense(1, activation="sigmoid")
])

model_aug.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])
model_aug.fit(X_aug_final, y_final_aug, epochs=10)
loss, accuracy = model_aug.evaluate(X_test, y_test)
best_cnn_aug_accuracy = accuracy

Visualization
---

In [None]:
models = ["kNN", "kNN-Augmented", "SVM", "SVM-Augmented", "RF", "RF-Augmented", "CNN", "CNN-Augmented"]
best_accuracies = [best_knn_accuracy, best_knn_aug_accuracy, best_svm_accuracy,
                   best_svm_aug_accuracy, best_rf_accuracy, best_rf_aug_accuracy, best_cnn_accuracy, best_cnn_aug_accuracy]

plt.figure(figsize=(12, 6))
plt.scatter(models, best_accuracies, marker="o", s=50)
plt.xlabel("Best Models")
plt.ylabel("Accuracy")
plt.title("Best Models vs Accuracy")
plt.xticks(models, rotation=45)
plt.tight_layout()
plt.grid(True)

for i, (xi, yi) in enumerate(zip(models, best_accuracies)):
    plt.text(xi, yi, f"({yi:.4f})", ha="right", va="bottom")

plt.show()