<a href="https://colab.research.google.com/github/prarthanaaaaa12/knn_classification/blob/main/1RUA24CSE0322_LAB2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Deep Neural Network for Cars vs Bikes Classification
## Binary Image Classification Assignment

This notebook implements a deep neural network from scratch to classify images of cars and bikes.

## Step 1: Upload and Extract the Dataset

First, upload your zip file containing Cars and Bikes folders.

In [None]:
from google.colab import files
import zipfile
import os

# Upload the zip file
print("Please upload your zip file containing Cars and Bikes folders:")
uploaded = files.upload()

# Get the filename
zip_filename = list(uploaded.keys())[0]

# Ensure the 'dataset' directory is clean before extracting
if os.path.exists('dataset'):
    import shutil
    shutil.rmtree('dataset')
os.makedirs('dataset', exist_ok=True)

# Extract the zip file
with zipfile.ZipFile(zip_filename, 'r') as zip_ref:
    zip_ref.extractall('dataset')

print("\nDataset extracted successfully!")
print("\nFolder structure:")
for root, dirs, files in os.walk('dataset'):
    level = root.replace('dataset', '').count(os.sep)
    indent = ' ' * 2 * level
    print(f"{indent}{os.path.basename(root)}/")
    subindent = ' ' * 2 * (level + 1)
    for file in files[:5]:  # Show first 5 files
        print(f"{subindent}{file}")
    if len(files) > 5:
        print(f"{subindent}... and {len(files) - 5} more files")

Please upload your zip file containing Cars and Bikes folders:


## Step 2: Import Required Libraries

In [2]:
import numpy as np
import matplotlib.pyplot as plt
import cv2
from glob import glob
from sklearn.model_selection import train_test_split
import random

# Set random seeds for reproducibility
np.random.seed(42)
random.seed(42)

print("Libraries imported successfully!")

Libraries imported successfully!


## Step 3: Upload the Utility Functions File

Upload the `dnn_app_utils_v3.py` file.

In [8]:
import google.colab.files as files

# Upload the utility file
print("Please upload dnn_app_utils_v3.py file:")
uploaded_utils = files.upload()

# Import the utility functions
from dnn_app_utils_v3 import *

print("\nUtility functions imported successfully!")

Please upload dnn_app_utils_v3.py file:


Saving dnn_app_utils_v3.py to dnn_app_utils_v3.py

Utility functions imported successfully!


## Step 4: Load and Preprocess Images

This step:
- Loads all images from Cars and Bikes folders
- Resizes them to 64x64 pixels
- Normalizes pixel values to [0, 1]
- Assigns labels (0 for Cars, 1 for Bikes)

In [9]:
def load_and_preprocess_data(base_path='dataset', img_size=(64, 64)):
    """
    Load and preprocess images from Cars and Bikes folders

    Arguments:
    base_path -- path to the dataset folder
    img_size -- tuple (height, width) for resizing images

    Returns:
    X -- numpy array of shape (img_size[0] * img_size[1] * 3, number of examples)
    Y -- numpy array of shape (1, number of examples) containing labels
    """

    # Find the actual paths (handling possible nested folders)
    cars_path = None
    bikes_path = None

    for root, dirs, files in os.walk(base_path):
        for dir_name in dirs:
            if 'car' in dir_name.lower():
                cars_path = os.path.join(root, dir_name)
            elif 'bike' in dir_name.lower():
                bikes_path = os.path.join(root, dir_name)

    if not cars_path or not bikes_path:
        raise ValueError("Could not find Cars and Bikes folders in the dataset")

    print(f"Cars folder: {cars_path}")
    print(f"Bikes folder: {bikes_path}")

    # Get all image paths
    cars_images = glob(os.path.join(cars_path, '*.[jp][pn]g')) + \
                  glob(os.path.join(cars_path, '*.[JP][PN]G')) + \
                  glob(os.path.join(cars_path, '*.jpeg')) + \
                  glob(os.path.join(cars_path, '*.JPEG'))

    bikes_images = glob(os.path.join(bikes_path, '*.[jp][pn]g')) + \
                   glob(os.path.join(bikes_path, '*.[JP][PN]G')) + \
                   glob(os.path.join(bikes_path, '*.jpeg')) + \
                   glob(os.path.join(bikes_path, '*.JPEG'))

    print(f"\nFound {len(cars_images)} car images")
    print(f"Found {len(bikes_images)} bike images")

    images = []
    labels = []

    # Load and process car images (label = 0)
    print("\nLoading car images...")
    for img_path in cars_images:
        try:
            img = cv2.imread(img_path)
            if img is not None:
                img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)  # Convert BGR to RGB
                img = cv2.resize(img, img_size)  # Resize to fixed size
                images.append(img)
                labels.append(0)  # Car = 0
        except Exception as e:
            print(f"Error loading {img_path}: {e}")

    # Load and process bike images (label = 1)
    print("Loading bike images...")
    for img_path in bikes_images:
        try:
            img = cv2.imread(img_path)
            if img is not None:
                img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)  # Convert BGR to RGB
                img = cv2.resize(img, img_size)  # Resize to fixed size
                images.append(img)
                labels.append(1)  # Bike = 1
        except Exception as e:
            print(f"Error loading {img_path}: {e}")

    # Convert to numpy arrays
    X = np.array(images)
    Y = np.array(labels).reshape(1, -1)

    print(f"\nTotal images loaded: {X.shape[0]}")
    print(f"Image shape: {X.shape[1:]}")

    # Normalize pixel values to [0, 1]
    X = X / 255.0

    # Flatten images: (num_examples, height, width, channels) -> (height*width*channels, num_examples)
    X = X.reshape(X.shape[0], -1).T

    print(f"\nFinal X shape: {X.shape}")
    print(f"Final Y shape: {Y.shape}")
    print(f"X range: [{X.min():.2f}, {X.max():.2f}]")

    return X, Y

# Load the data
X, Y = load_and_preprocess_data()

ValueError: Could not find Cars and Bikes folders in the dataset

## Step 5: Visualize Sample Images

In [5]:
# Visualize some sample images
def visualize_samples(X, Y, num_samples=10):
    """
    Visualize random samples from the dataset
    """
    indices = np.random.choice(X.shape[1], num_samples, replace=False)

    plt.figure(figsize=(15, 6))
    for i, idx in enumerate(indices):
        plt.subplot(2, 5, i + 1)
        img = X[:, idx].reshape(64, 64, 3)
        plt.imshow(img)
        label = "Bike" if Y[0, idx] == 1 else "Car"
        plt.title(f"{label}")
        plt.axis('off')

    plt.tight_layout()
    plt.show()

visualize_samples(X, Y)

NameError: name 'X' is not defined

## Step 6: Shuffle and Split Dataset (80:20 ratio)

Split the data into training (80%) and testing (20%) sets.

In [6]:
def shuffle_and_split(X, Y, test_size=0.2, random_state=42):
    """
    Shuffle and split the dataset into training and testing sets

    Arguments:
    X -- input features
    Y -- labels
    test_size -- proportion of test set (default 0.2 for 80:20 split)

    Returns:
    X_train, Y_train, X_test, Y_test
    """
    # Transpose for sklearn compatibility
    X_T = X.T
    Y_T = Y.T

    # Split the data
    X_train, X_test, Y_train, Y_test = train_test_split(
        X_T, Y_T, test_size=test_size, random_state=random_state, stratify=Y_T
    )

    # Transpose back
    X_train = X_train.T
    X_test = X_test.T
    Y_train = Y_train.T
    Y_test = Y_test.T

    return X_train, Y_train, X_test, Y_test

# Split the data
X_train, Y_train, X_test, Y_test = shuffle_and_split(X, Y)

print(f"Training set: {X_train.shape[1]} examples")
print(f"Testing set: {X_test.shape[1]} examples")
print(f"\nTraining set - Cars: {np.sum(Y_train == 0)}, Bikes: {np.sum(Y_train == 1)}")
print(f"Testing set - Cars: {np.sum(Y_test == 0)}, Bikes: {np.sum(Y_test == 1)}")

NameError: name 'X' is not defined

## Step 7: Define the Deep Neural Network Architecture

Network structure:
- Input layer: 64 √ó 64 √ó 3 = 12,288 features
- Hidden layer 1: 20 units with ReLU activation
- Hidden layer 2: 7 units with ReLU activation
- Hidden layer 3: 5 units with ReLU activation
- Output layer: 1 unit with Sigmoid activation (binary classification)

In [7]:
# Define network architecture
n_x = X_train.shape[0]  # Input layer size (12288 for 64x64x3 images)
n_h1 = 20               # Hidden layer 1 size
n_h2 = 7                # Hidden layer 2 size
n_h3 = 5                # Hidden layer 3 size
n_y = 1                 # Output layer size (binary classification)

layers_dims = [n_x, n_h1, n_h2, n_h3, n_y]

print("Neural Network Architecture:")
print(f"Input Layer: {layers_dims[0]} units")
for i in range(1, len(layers_dims)-1):
    print(f"Hidden Layer {i}: {layers_dims[i]} units (ReLU activation)")
print(f"Output Layer: {layers_dims[-1]} unit (Sigmoid activation)")
print(f"\nTotal layers: {len(layers_dims)}")

NameError: name 'X_train' is not defined

## Step 8: Build and Train the Deep Neural Network

This implements:
- Forward propagation
- Cost computation (cross-entropy loss)
- Backward propagation
- Parameter updates using gradient descent

In [None]:
def L_layer_model(X, Y, layers_dims, learning_rate=0.0075, num_iterations=2500, print_cost=True):
    """
    Implements a L-layer neural network: [LINEAR->RELU]*(L-1)->LINEAR->SIGMOID.

    Arguments:
    X -- input data, of shape (n_x, number of examples)
    Y -- true "label" vector, of shape (1, number of examples)
    layers_dims -- list containing the input size and each layer size
    learning_rate -- learning rate of the gradient descent update rule
    num_iterations -- number of iterations of the optimization loop
    print_cost -- if True, it prints the cost every 100 steps

    Returns:
    parameters -- parameters learnt by the model
    costs -- list of costs during training
    """

    np.random.seed(1)
    costs = []

    # Initialize parameters
    parameters = initialize_parameters_deep(layers_dims)

    # Training loop
    for i in range(0, num_iterations):

        # Forward propagation: [LINEAR -> RELU]*(L-1) -> LINEAR -> SIGMOID
        AL, caches = L_model_forward(X, parameters)

        # Compute cost
        cost = compute_cost(AL, Y)

        # Backward propagation
        grads = L_model_backward(AL, Y, caches)

        # Update parameters
        parameters = update_parameters(parameters, grads, learning_rate)

        # Print and record the cost
        if print_cost and i % 100 == 0:
            print(f"Cost after iteration {i}: {cost:.6f}")
            costs.append(cost)

    return parameters, costs

# Train the model
print("Training the Deep Neural Network...\n")
parameters, costs = L_layer_model(
    X_train, Y_train,
    layers_dims,
    learning_rate=0.0075,
    num_iterations=2500,
    print_cost=True
)

print("\nTraining completed!")

## Step 9: Plot Loss vs Epochs

In [None]:
# Plot the cost
plt.figure(figsize=(10, 6))
plt.plot(np.squeeze(costs))
plt.ylabel('Cost (Cross-Entropy Loss)', fontsize=12)
plt.xlabel('Iterations (per hundreds)', fontsize=12)
plt.title('Loss vs Epochs - Deep Neural Network Training', fontsize=14, fontweight='bold')
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

print(f"Initial cost: {costs[0]:.6f}")
print(f"Final cost: {costs[-1]:.6f}")
print(f"Cost reduction: {((costs[0] - costs[-1]) / costs[0] * 100):.2f}%")

## Step 10: Evaluate Model Performance

Generate predictions and compute accuracy on both training and testing sets.

In [None]:
# Predictions on training set
print("Training Set Performance:")
print("-" * 40)
pred_train = predict(X_train, Y_train, parameters)

print("\n" + "="*40 + "\n")

# Predictions on test set
print("Testing Set Performance:")
print("-" * 40)
pred_test = predict(X_test, Y_test, parameters)

## Step 11: Visualize Predictions on Test Set

In [None]:
# Visualize some predictions
def visualize_predictions(X, Y, predictions, num_samples=10):
    """
    Visualize predictions with true labels
    """
    indices = np.random.choice(X.shape[1], num_samples, replace=False)

    plt.figure(figsize=(15, 6))
    for i, idx in enumerate(indices):
        plt.subplot(2, 5, i + 1)
        img = X[:, idx].reshape(64, 64, 3)
        plt.imshow(img)

        true_label = "Bike" if Y[0, idx] == 1 else "Car"
        pred_label = "Bike" if predictions[0, idx] == 1 else "Car"

        color = 'green' if true_label == pred_label else 'red'
        plt.title(f"True: {true_label}\nPred: {pred_label}", color=color, fontweight='bold')
        plt.axis('off')

    plt.tight_layout()
    plt.show()

print("Sample predictions from test set:")
print("Green = Correct, Red = Incorrect\n")
visualize_predictions(X_test, Y_test, pred_test, num_samples=10)

## Step 12: Summary and Final Results

In [None]:
# Calculate and display detailed metrics
def display_summary(X_train, Y_train, X_test, Y_test, parameters, costs, layers_dims):
    """
    Display comprehensive summary of the model
    """
    # Calculate accuracies
    pred_train = predict(X_train, Y_train, parameters)
    train_accuracy = np.sum((pred_train == Y_train) / Y_train.shape[1]) * 100

    pred_test = predict(X_test, Y_test, parameters)
    test_accuracy = np.sum((pred_test == Y_test) / Y_test.shape[1]) * 100

    print("="*60)
    print("DEEP NEURAL NETWORK - FINAL SUMMARY")
    print("="*60)

    print("\nüìä DATASET INFORMATION:")
    print("-" * 60)
    print(f"Total samples: {X_train.shape[1] + X_test.shape[1]}")
    print(f"Training samples: {X_train.shape[1]} (80%)")
    print(f"Testing samples: {X_test.shape[1]} (20%)")
    print(f"Input features: {X_train.shape[0]} (64√ó64√ó3 RGB images)")

    print("\nüß† NETWORK ARCHITECTURE:")
    print("-" * 60)
    print(f"Input Layer: {layers_dims[0]} units")
    for i in range(1, len(layers_dims)-1):
        print(f"Hidden Layer {i}: {layers_dims[i]} units (ReLU)")
    print(f"Output Layer: {layers_dims[-1]} unit (Sigmoid)")

    print("\nüìà TRAINING RESULTS:")
    print("-" * 60)
    print(f"Initial Loss: {costs[0]:.6f}")
    print(f"Final Loss: {costs[-1]:.6f}")
    print(f"Loss Reduction: {((costs[0] - costs[-1]) / costs[0] * 100):.2f}%")

    print("\nüéØ MODEL PERFORMANCE:")
    print("-" * 60)
    print(f"Training Accuracy: {train_accuracy:.2f}%")
    print(f"Testing Accuracy: {test_accuracy:.2f}%")

    if train_accuracy - test_accuracy > 10:
        print("\n‚ö†Ô∏è  Note: Significant gap between training and testing accuracy")
        print("    suggests possible overfitting. Consider regularization.")
    elif test_accuracy > 90:
        print("\n‚úÖ Excellent model performance!")
    elif test_accuracy > 75:
        print("\n‚úì Good model performance.")
    else:
        print("\n‚ö†Ô∏è  Model may need more training or architecture adjustments.")

    print("\n" + "="*60)

# Display the summary
display_summary(X_train, Y_train, X_test, Y_test, parameters, costs, layers_dims)

## Bonus: Test on Custom Image (Optional)

Upload your own car or bike image to test the model!

In [None]:
def test_custom_image(parameters):
    """
    Test the model on a custom uploaded image
    """
    print("Upload an image of a car or bike:")
    uploaded_img = files.upload()

    img_name = list(uploaded_img.keys())[0]

    # Load and preprocess the image
    img = cv2.imread(img_name)
    img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img_resized = cv2.resize(img_rgb, (64, 64))

    # Display the image
    plt.figure(figsize=(6, 6))
    plt.imshow(img_resized)
    plt.axis('off')
    plt.title("Uploaded Image", fontsize=14, fontweight='bold')
    plt.show()

    # Prepare for prediction
    img_normalized = img_resized / 255.0
    img_flattened = img_normalized.reshape(-1, 1)

    # Make prediction
    probas, _ = L_model_forward(img_flattened, parameters)
    prediction = 1 if probas[0, 0] > 0.5 else 0
    confidence = probas[0, 0] if prediction == 1 else (1 - probas[0, 0])

    label = "Bike" if prediction == 1 else "Car"

    print(f"\nüîç Prediction: {label}")
    print(f"üìä Confidence: {confidence * 100:.2f}%")

# Uncomment the line below to test on custom image
# test_custom_image(parameters)