# Visual Recognition Mini Project-1
# Part-A

## Dependencies

In [None]:
import os
import cv2
import numpy as np
import pandas as pd
from skimage.feature import hog,local_binary_pattern
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.metrics import precision_score, recall_score, f1_score

## Dataset Preprocessing
Preprocess the image and extract the handcrafted feature(here, HOG) from the image

## Histogram of Oriented Gradients(HOG)
HOG features are extracted by dividing an image into small cells, computing gradient orientations for each cell, and merging these orientations into a histogram. This histogram captures the distribution of gradient directions, forming a feature vector that is useful for object detection.

The function used to do HOG is hog() in skimage. The parameters passed:
- image_gray - Grayscaled image
- Orientations - Number of orientations considered in histogram - 9
- pixels_per_cell - Size of 1 cell - (8,8)
- cells_per_block - Number of cells in 1 block - (2,2)
- feature_vector - Used to return data as a feature vector - True

## Local Binary Pattern(LBP)
LBP features are extracted by comparing each pixel to its neighborings. For each pixel, a binary code is generated by thresholding neighbors based on center pixel’s value. This captures local texture patterns. These binary codes are converted to decimal and then summarized into a histogram, which represents the image's texture.


The function used to do LBP is local_binary_pattern() in skimage.feature . The parameters passed:
- image_gray - Grayscaled image
- lbp_n_points - Number of circular symmetric points around the main pixel - 8
- lbp_radius - Radius of circle that is considered for texture analysis - 1

In [None]:
def extract_hog_features(image):
    # Compute HOG features
    features = hog(image, orientations=9,pixels_per_cell=(8,8),cells_per_block=(2,2),feature_vector=True)
    return features

def extract_lbp_features(image):
    lbp = local_binary_pattern(image, 8, 1)
    lbp_flat = lbp.ravel()
    no_ofbins = np.arange(0, 8)
    (hist, _) = np.histogram(lbp_flat,bins=no_ofbins)
    return hist

def extract_features(image): #Extracts and combines HOG and LBP features from the image.
    image_gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) #Convert to grayscale since color doesn't matter for classifying if image has mask or not.
    hog_features = extract_hog_features(image_gray)
    lbp_features = extract_lbp_features(image_gray)
    combined_features = np.concatenate([hog_features, lbp_features])
    return combined_features

def features_labels(dataset_path):
    data = []
    labels = []

    for label in ['with_mask', 'without_mask']:
        folder = os.path.join(dataset_path, label)
        for file_name in os.listdir(folder):
            file_path = os.path.join(folder, file_name)
            image = cv2.imread(file_path)
            image = cv2.resize(image, (128, 128)) #Gives error that the array has inhomogeneous shape if all images are not resized to same dimensions
            features = extract_features(image) #Extract HOG features from image
            data.append(features) #Features
            labels.append(label) #Labels
    return np.array(data), np.array(labels)

dataset_path = "/content/drive/MyDrive/Colab Notebooks/dataset/"
X, y = features_labels(dataset_path) #Extract the features and labels from the dataset
print("Extracted features shape:", X.shape)
print("Labels shape:", y.shape)

## Label encoding the labels and test-train split

The test-train split is done such that 80% is train and 20% is test, with random state of 42.

In [None]:
# Encode string labels into numerical values for model training
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)  # e.g., "with_mask" becomes 1 and "without_mask" becomes 0

# Split the dataset into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

## Support Vector Machine(SVM): SVM Classifier Training and Evaluation
- SVM classifier with linear kernel and random state of 42.
- The SVM classifier is trained on X_train,y_train and the labels are predicted for the test data.

In [None]:
svm_classifier = SVC(kernel='linear', random_state=42)
svm_classifier.fit(X_train, y_train)
y_pred_svm = svm_classifier.predict(X_test)
svm_accuracy = accuracy_score(y_test, y_pred_svm)
print("SVM Classifier Accuracy: {:.2f}%".format(svm_accuracy * 100))
print("SVM Classification Report:")
print(classification_report(y_test, y_pred_svm, target_names=label_encoder.classes_))
print("SVM Confusion Matrix:") # Confusion matrix
print(confusion_matrix(y_test, y_pred_svm))

## Multilayer perceptron(MLP): MLP (Neural Network) Classifier Training and Evaluation
Parameters:
- hidden_layer_sizes - number of neurons in the hidden layer(single hidden layer in MLP)- (100,)(default)
- max_iter - maximum number of iterations - 500
- random state - Random weights and bias initialization - 42

In [None]:
mlp_classifier = MLPClassifier(max_iter=500, random_state=42)
mlp_classifier.fit(X_train, y_train)
y_pred_mlp = mlp_classifier.predict(X_test)
mlp_accuracy = accuracy_score(y_test, y_pred_mlp)
print("MLP Classifier Accuracy: {:.2f}%".format(mlp_accuracy * 100))
print("MLP Classification Report:")
print(classification_report(y_test, y_pred_mlp, target_names=label_encoder.classes_))
print("MLP Confusion Matrix:")
print(confusion_matrix(y_test, y_pred_mlp))

## Comparison of both classifiers
- Compare the precision score, recall score, F1 score and accuracy of SVM and MLP Classifiers.
- Performance metrics are calculated using macro-average(metrics of both labels are calculated and mean is taken).

In [None]:
svm_precision = precision_score(y_test, y_pred_svm, average='macro')
svm_recall = recall_score(y_test, y_pred_svm, average='macro')
svm_f1 = f1_score(y_test, y_pred_svm, average='macro')

mlp_precision = precision_score(y_test, y_pred_mlp, average='macro')
mlp_recall = recall_score(y_test, y_pred_mlp, average='macro')
mlp_f1 = f1_score(y_test, y_pred_mlp, average='macro')

results = {
    "Classifier": ["SVM", "MLP"],
    "Accuracy": [svm_accuracy, mlp_accuracy],
    "Precision": [svm_precision, mlp_precision],
    "Recall": [svm_recall, mlp_recall],
    "F1 Score": [svm_f1, mlp_f1]
}

results_df = pd.DataFrame(results)
print("Performance Comparison of the Classifiers:")
print(results_df)


# Part B

## Dependencies

In [None]:
import tensorflow as tf
import numpy as np
import random
import matplotlib.pyplot as plt
import os

## Fix randomness for reproducibility

In [None]:
tf.random.set_seed(42)
np.random.seed(42)
random.seed(42)
os.environ['PYTHONHASHSEED'] = '42'
tf.config.experimental.enable_op_determinism()

## Hyperparameters

In [None]:

IMG_SIZE = 224  
BATCH_SIZE = 32
EPOCHS = 200
LEARNING_RATE = 1e-4

In [None]:
def load_data(data_dir):
    train_datagen = tf.keras.preprocessing.image.ImageDataGenerator(
        rescale=1./255,
        rotation_range=20,
        width_shift_range=0.2,
        height_shift_range=0.2,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True,
        validation_split=0.2,
    )
    
    train_generator = train_datagen.flow_from_directory( 
        data_dir,
        target_size=(IMG_SIZE, IMG_SIZE),
        batch_size=BATCH_SIZE,
        class_mode='binary',
        subset='training',
        seed=42
    )
    
    validation_generator = train_datagen.flow_from_directory(
        data_dir,
        target_size=(IMG_SIZE, IMG_SIZE),
        batch_size=BATCH_SIZE,
        class_mode='binary',
        subset='validation',
        seed=42
    )
    
    return train_generator, validation_generator

## Optimized CNN Model

This function builds a **CNN for binary classification** with:
- **4 convolutional blocks** (Conv2D + MaxPooling) for feature extraction.
- **GlobalAveragePooling2D** instead of Flatten to reduce overfitting.
- **256-unit Dense layer + Dropout (0.5)** for better generalization.
- **Sigmoid activation** for binary output.
- Compiled with **Adam optimizer** & `binary_crossentropy` loss.
- Ensures **reproducibility** with `set_random_seed(42)`. 


In [None]:
def create_optimized_model():
    tf.keras.utils.set_random_seed(42)

    model = tf.keras.Sequential([
        # Block 1
        tf.keras.layers.Conv2D(32, (3, 3), padding='same', activation='relu', input_shape=(IMG_SIZE, IMG_SIZE, 3)),
        tf.keras.layers.Conv2D(32, (3, 3), padding='same', activation='relu'),
        tf.keras.layers.MaxPooling2D(pool_size=(2, 2)),

        # Block 2
        tf.keras.layers.Conv2D(64, (3, 3), padding='same', activation='relu'),
        tf.keras.layers.Conv2D(64, (3, 3), padding='same', activation='relu'),
        tf.keras.layers.MaxPooling2D(pool_size=(2, 2)),

        # Block 3
        tf.keras.layers.Conv2D(128, (3, 3), padding='same', activation='relu'),
        tf.keras.layers.Conv2D(128, (3, 3), padding='same', activation='relu'),
        tf.keras.layers.MaxPooling2D(pool_size=(2, 2)),

        # Block 4
        tf.keras.layers.Conv2D(256, (3, 3), padding='same', activation='relu'),
        tf.keras.layers.Conv2D(256, (3, 3), padding='same', activation='relu'),
        tf.keras.layers.MaxPooling2D(pool_size=(2, 2)),

        # GlobalAveragePooling instead of Flatten
        tf.keras.layers.GlobalAveragePooling2D(),

        # Fully Connected Layer
        tf.keras.layers.Dense(256, activation='relu'),
        tf.keras.layers.Dropout(0.5),

        # Output Layer
        tf.keras.layers.Dense(1, activation='sigmoid')
    ])

    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=LEARNING_RATE),
                  loss='binary_crossentropy',
                  metrics=['accuracy'])

    model.summary()
    
    return model

## Training History Plot

Plots accuracy and loss curves to monitor training progress and detect overfitting or underfitting.


In [None]:
def plot_history(history):
    epochs = range(1, len(history.history['accuracy']) + 1)

    plt.figure(figsize=(12, 5))

    # Accuracy plot
    plt.subplot(1, 2, 1)
    plt.plot(epochs, history.history['accuracy'], 'bo-', label='Training Accuracy')
    plt.plot(epochs, history.history['val_accuracy'], 'ro-', label='Validation Accuracy')
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy')
    plt.title('Training and Validation Accuracy')
    plt.legend()

    # Loss plot
    plt.subplot(1, 2, 2)
    plt.plot(epochs, history.history['loss'], 'bo-', label='Training Loss')
    plt.plot(epochs, history.history['val_loss'], 'ro-', label='Validation Loss')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.title('Training and Validation Loss')
    plt.legend()

    plt.show()

## Model Training Pipeline

- **train_model**: Trains the model, tracks validation performance, and identifies the best epoch based on validation loss.
- **main**: Loads data, initializes the model, and starts training.


In [None]:
def train_model(model, train_generator, validation_generator):

    history = model.fit(
        train_generator,
        steps_per_epoch=len(train_generator),
        epochs=EPOCHS,
        validation_data=validation_generator,
        validation_steps=len(validation_generator)
    )

    min_val_loss_epoch = history.history["val_loss"].index(min(history.history["val_loss"]))
    best_val_loss = history.history["val_loss"][min_val_loss_epoch]
    best_val_acc = history.history["val_accuracy"][min_val_loss_epoch]

    print(f"\nBest Validation Loss: {best_val_loss:.4f}")
    print(f"Validation Accuracy at Best Loss: {best_val_acc * 100:.2f}%")

    plot_history(history)

def main():
    data_dir = 'dataset'
    train_generator, validation_generator = load_data(data_dir)
    model = create_optimized_model()
    train_model(model, train_generator, validation_generator)

if __name__ == '__main__':
    main()

## Note
- The below code is using the VGG16 model which is pretrained but showed promising results in the training and validation accuracy.
- It has given a validation accuracy of 99.76% while the above custom optimized CNN model gave validation accuracy of 98.41%.

In [None]:
tf.random.set_seed(42)
np.random.seed(42)
random.seed(42)
os.environ['PYTHONHASHSEED'] = '42'
tf.config.experimental.enable_op_determinism()



IMG_SIZE = 224
BATCH_SIZE = 16
EPOCHS = 150 



def load_data(data_dir):
    train_datagen = tf.keras.preprocessing.image.ImageDataGenerator(
        rescale = 1./255,
        rotation_range=40,
        width_shift_range=0.2,
        height_shift_range=0.2,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True,
        validation_split=0.2,
    )
    
    
    train_generator = train_datagen.flow_from_directory( 
        data_dir,
        target_size=(IMG_SIZE, IMG_SIZE),
        batch_size=BATCH_SIZE,
        class_mode='binary',
        subset='training',
        seed = 42
    )
    
    validation_generator = train_datagen.flow_from_directory(
        data_dir,
        target_size=(IMG_SIZE, IMG_SIZE),
        batch_size=BATCH_SIZE,
        class_mode='binary',
        subset='validation',
        seed = 42
    )
    
    return train_generator, validation_generator
    
def create_model():    
    base_model = tf.keras.applications.VGG16(
        input_shape=(IMG_SIZE, IMG_SIZE, 3),
        include_top=False,
        weights='imagenet'
    )
    tf.keras.utils.set_random_seed(42)
    
    for layer in base_model.layers:
        layer.trainable = False
    
    x = base_model.output
    x = tf.keras.layers.Flatten()(x)
    x = tf.keras.layers.Dense(256, activation='relu')(x)
    
    x = tf.keras.layers.Dropout(0.5)(x)
    output =tf.keras.layers.Dense(1,activation='sigmoid')(x)
    
    model = tf.keras.Model(inputs=base_model.input, outputs=output)
    
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4), loss='binary_crossentropy', metrics=['accuracy']) #adam ->1e-4, 1e-3
    model.summary()
    
    return model

def plot_history(history):
    epochs = range(1, len(history.history['accuracy']) + 1)
    
    # Plot accuracy
    plt.figure(figsize=(12, 5))
    
    plt.subplot(1, 2, 1)
    plt.plot(epochs, history.history['accuracy'], 'bo-', label='Training Accuracy')
    plt.plot(epochs, history.history['val_accuracy'], 'ro-', label='Validation Accuracy')
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy')
    plt.title('Training and Validation Accuracy')
    plt.legend()
    
    # Plot loss
    plt.subplot(1, 2, 2)
    plt.plot(epochs, history.history['loss'], 'bo-', label='Training Loss')
    plt.plot(epochs, history.history['val_loss'], 'ro-', label='Validation Loss')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.title('Training and Validation Loss')
    plt.legend()
    
    plt.show()

def train_model(model, train_generator, validation_generator):
    history = model.fit(
        train_generator,
        steps_per_epoch=len(train_generator),
        epochs=EPOCHS,
        validation_data=validation_generator,
        validation_steps=len(validation_generator),
    )

    # Find the epoch with the minimum validation loss
    min_val_loss_epoch = history.history["val_loss"].index(min(history.history["val_loss"]))
    best_val_loss = history.history["val_loss"][min_val_loss_epoch]
    best_val_acc = history.history["val_accuracy"][min_val_loss_epoch]

    print(f"\nBest Validation Loss: {best_val_loss:.4f}")
    print(f"Validation Accuracy at Best Loss: {best_val_acc * 100:.2f}%")

    plot_history(history)

    
def main():
    data_dir = 'dataset'
    train_generator, validation_generator = load_data(data_dir)
    model = create_model()
    train_model(model, train_generator, validation_generator)
    
if __name__ == '__main__':
    main()

