
## Run 3:
You should try to develop the best classifiers you can! You can choose whatever feature, encoding and classifier you like. Potential features: the GIST feature; Dense SIFT; Dense SIFT in a Gaussian Pyramid; Dense SIFT with spatial pooling (commonly known as PHOW - Pyramid Histogram of Words), etc. Potential classifiers: Naive bayes; non-linear SVM (perhaps using a linear classifier with a Homogeneous Kernel Map), ...

In [47]:
# Import packages
import os
import cv2
import numpy as np
from sklearn.cluster import KMeans
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score
from sklearn.svm import SVC
from skimage.feature import match_descriptors
from sklearn.metrics import average_precision_score
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score
from skimage.feature import hog, daisy
from sklearn.cluster import MiniBatchKMeans
import glob
from sklearn.naive_bayes import GaussianNB

# Function to extract Dense SIFT features
def extract_dense_sift_features(image):
    step = 16  # Step size for Dense SIFT
    sift = cv2.SIFT_create()
    keypoints = [cv2.KeyPoint(x, y, step) for y in range(0, image.shape[0], step) 
                                             for x in range(0, image.shape[1], step)]
    _, descriptors = sift.compute(image, keypoints)
    return descriptors

# Function to perform clustering (KMeans) on Dense SIFT features
def cluster_features(features, num_clusters=128):
    kmeans = MiniBatchKMeans(n_clusters=num_clusters, batch_size=100, random_state=42)
    kmeans.fit(features)
    return kmeans

# Function to encode features using bag-of-visual-words (BoVW) approach
def encode_features(features, kmeans):
    # Predict the closest cluster for each feature
    labels = kmeans.predict(features)
    # Compute histogram of visual words
    hist, _ = np.histogram(labels, bins=np.arange(kmeans.n_clusters + 1))
    return hist
# load the dataset after getting the features
def load_data_from_directory(directory):
    features = []
    labels = []
    all_class_names = []
    for class_name in os.listdir(directory):
        if class_name == '.DS_Store':  # Skip .DS_Store files
            continue
        class_dir = os.path.join(directory, class_name)
        all_class_names.append(class_name)
        print("Processing class:", class_name) #Process all the classes
        for filename in glob.glob(os.path.join(class_dir, "*.jpg")):
            image = cv2.imread(filename, cv2.IMREAD_GRAYSCALE)
            if image is not None:
                # Extract Dense SIFT features
                dense_sift_features = extract_dense_sift_features(image)
                features.extend(dense_sift_features)
                labels.append(class_name)
            else:
                print("Failed to load image:", filename)

    # Cluster the Dense SIFT features
    kmeans = cluster_features(np.array(features))
    
    # Encode features using BoVW approach
    features = []
    for class_name in os.listdir(directory):
        if class_name == '.DS_Store':  # Skip .DS_Store files
            continue
        class_dir = os.path.join(directory, class_name)
        for filename in glob.glob(os.path.join(class_dir, "*.jpg")):
            image = cv2.imread(filename, cv2.IMREAD_GRAYSCALE)
            if image is not None:
                # Extract Dense SIFT features
                dense_sift_features = extract_dense_sift_features(image)
                # Encode features using BoVW approach
                encoded_features = encode_features(dense_sift_features, kmeans)
                features.append(encoded_features)
            else:
                print("Failed to load image:", filename)
    label_encoder.fit(all_class_names)
    labels = label_encoder.transform(labels)
    return np.array(features), np.array(labels),kmeans

# Initialize a LabelEncoder object
label_encoder = LabelEncoder()
# Define the directory containing the training data
train_dir = "training\\training"
# Load the training data and perform clustering using KMeans
X_train, y_train,kmeans = load_data_from_directory(train_dir)

Processing class: bedroom
Processing class: Coast
Processing class: Forest
Processing class: Highway
Processing class: industrial
Processing class: Insidecity
Processing class: kitchen
Processing class: livingroom
Processing class: Mountain
Processing class: Office
Processing class: OpenCountry
Processing class: store
Processing class: Street
Processing class: Suburb
Processing class: TallBuilding


In [48]:
# Print the shape of the training data (features and labels)
print("Training data:", X_train.shape, y_train.shape)
# Print the number of classes encoded by the LabelEncoder
print("class_num: ", label_encoder.classes_)

Training data: (1500, 128) (1500,)
class_num:  ['Coast' 'Forest' 'Highway' 'Insidecity' 'Mountain' 'Office' 'OpenCountry'
 'Street' 'Suburb' 'TallBuilding' 'bedroom' 'industrial' 'kitchen'
 'livingroom' 'store']


In [49]:
from sklearn.naive_bayes import GaussianNB
# scaling and normalizing the data
scaler = StandardScaler()
train_features_scaled = scaler.fit_transform(X_train)

# Train non-linear SVM classifier
svm_classifier = SVC(kernel='rbf', C=10, gamma='scale')
svm_classifier.fit(X_train, y_train)


In [50]:
# Convert lists to NumPy arrays
features = np.array(train_features_scaled)
labels = np.array(y_train)

In [51]:
# Evaluate classifiers
def evaluate_classifiers(classifiers, X_test, y_test):
    predictions = np.zeros((len(X_test), len(classifiers)))
    for i, (_, classifier) in enumerate(classifiers.items()):
        predictions[:, i] = classifier.predict(X_test)
    # Predicted class label is the one with maximum score
    predicted_labels = np.argmax(predictions, axis=1)
    accuracy = accuracy_score(y_test, predicted_labels)
    return accuracy
    
# Split the data into training and evaluation sets
x_train, X_eval, Y_train, y_eval = train_test_split(features, labels, test_size=0.2, random_state=42)
print("Train data:", X_train.shape, y_train.shape)
print("Eval data:", X_eval.shape, y_eval.shape)

# Train one-vs-all linear classifiers
classifiers = {}
for class_label in range(15):
        # Create binary labels for the current class vs the rest
        binary_labels = (Y_train == class_label).astype(int)
        # Train a Naive Bayes classifier for the current class
        classifier = SVC(kernel='rbf', C=10, gamma='scale')
        classifier.fit(x_train, binary_labels)
        classifiers[class_label] = classifier

# Evaluate classifiers
test_accuracy = evaluate_classifiers(classifiers, X_eval, y_eval)
print("Test Accuracy:", test_accuracy)

Train data: (1500, 128) (1500,)
Eval data: (300, 128) (300,)
Test Accuracy: 0.4533333333333333


In [52]:
# Load the test data
def load_test_data(test_folder, kmeans):
    X_test = []
    for filename in os.listdir(test_folder):
        img_path = os.path.join(test_folder, filename)
        image = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
        image = cv2.resize(image, (256, 256))
        dense_sift_features = extract_dense_sift_features(image) 
        encoded_features = encode_features(dense_sift_features, kmeans)  # Encode features using the same KMeans model
        X_test.append(encoded_features)
    return np.array(X_test)

# Function to encode features using bag-of-visual-words (BoVW) approach
def encode_features(features, kmeans):
    # Predict the closest cluster for each feature
    labels = kmeans.predict(features)
    # Compute histogram of visual words
    hist, _ = np.histogram(labels, bins=np.arange(kmeans.n_clusters + 1))
    return hist

# Define the path to the directory containing the test images
test_folder = "testing\\testing"
# Load the test data and encode the features using the pre-trained KMeans model
X_test = load_test_data(test_folder, kmeans)  
# Print the shape of the test data to verify the number of test samples
print("Test data:", X_test.shape)

Test data: (2985, 128)


In [53]:
# Scaling and normalizing the data
scaler = StandardScaler()
test_features_scaled = scaler.fit_transform(X_test)
features_test = np.array(test_features_scaled)

In [54]:
# Print shape of test data
print("Test data:", features_test.shape)

Test data: (2985, 128)


In [55]:
# Evaluate classifiers
def evaluate_classifiers(classifiers, X_test):
    predictions = np.zeros((len(X_test), len(classifiers)))
    for i, (_, classifier) in enumerate(classifiers.items()):
        predictions[:, i] = classifier.predict(X_test)
    # Predicted class label is the one with maximum score
    predicted_labels = np.argmax(predictions, axis=1)
    return predicted_labels

In [56]:
# Initialize an empty list to store the predictions
predictions = []
prediction = np.zeros((len(features_test), len(classifiers)))
# Iterate over each classifier and make predictions on the test features
for i, (_, classifier) in enumerate(classifiers.items()):
        prediction[:, i] = classifier.predict(features_test)
# Predicted class label is the one with maximum score
predictions = np.argmax(prediction, axis=1)

In [57]:
# Print the predicted class label for the 1350th test sample
print(predictions[1350])
# Inverse transform the predicted class label to get the original class name
print(label_encoder.inverse_transform([predictions[1350]])[0])

12
kitchen


In [58]:
# Save the predictions to a run3.txt file
test_dir = "testing\\testing"
output_file = "run3.txt"
with open(output_file, 'w') as f:
    for filename, predicted_class in zip(os.listdir(test_dir), predictions):
        class_name = label_encoder.inverse_transform([predicted_class])[0]
        f.write(f"{filename} {class_name}\n")