In [1]:
import os
import cv2
import numpy as np
from tqdm import tqdm
from sklearn.cluster import KMeans
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import accuracy_score
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from torchvision import datasets, transforms
from PIL import Image

In [2]:
# --- Configuration ---
VOCAB_SIZE = 500
MAX_PER_CLASS = 50

In [4]:
def load_caltech101(path='../data/101_ObjectCategories', max_per_class=MAX_PER_CLASS):
    images, labels = [], []
    class_names = sorted([d for d in os.listdir(path) if os.path.isdir(os.path.join(path, d)) and not d.startswith("BACKGROUND")])
    for label in class_names:
        img_dir = os.path.join(path, label)
        count = 0
        for file in os.listdir(img_dir):
            if count >= max_per_class:
                break
            img_path = os.path.join(img_dir, file)
            img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
            if img is not None:
                images.append(img)
                labels.append(label)
                count += 1
    return images, labels

def load_cifar10(limit=1000):
    transform = transforms.Compose([
        transforms.Grayscale(),
        transforms.ToTensor()
    ])
    dataset = datasets.CIFAR10(root='../data', train=True, download=True, transform=transform)
    images, labels = [], []
    for i in range(min(limit, len(dataset))):
        img_tensor, label = dataset[i]
        img = transforms.ToPILImage()(img_tensor).convert('L')
        images.append(np.array(img))
        labels.append(dataset.classes[label])
    return images, labels

In [5]:
def extract_orb_descriptors(images):
    orb = cv2.ORB_create(nfeatures=1000)
    descriptor_list = []
    image_descriptors = []
    for img in tqdm(images, desc="Extracting ORB"):
        kp, des = orb.detectAndCompute(img, None)
        if des is not None:
            descriptor_list.extend(des.astype(np.float32))
            image_descriptors.append(des.astype(np.float32))
        else:
            image_descriptors.append(np.array([]))
    return descriptor_list, image_descriptors

def build_vocabulary(descriptor_list, k=VOCAB_SIZE):
    all_descriptors = np.vstack(descriptor_list)
    kmeans = KMeans(n_clusters=k, random_state=42)
    kmeans.fit(all_descriptors)
    return kmeans

def compute_bovw(image_descriptors, kmeans):
    k = kmeans.n_clusters
    features = []
    for des in image_descriptors:
        hist = np.zeros(k)
        if des is not None and len(des) > 0:
            words = kmeans.predict(des)
            for w in words:
                hist[w] += 1
        features.append(hist)
    return np.array(features)

In [6]:
def train_orb_pipeline(dataset_name, k=500):
    print(f"\nRunning ORB pipeline for: {dataset_name}")
    if dataset_name == 'caltech101':
        images, labels = load_caltech101()
    elif dataset_name == 'cifar10':
        images, labels = load_cifar10()
    else:
        raise ValueError("Unsupported dataset")

    le = LabelEncoder()
    labels = le.fit_transform(labels)

    descriptor_list, image_descriptors = extract_orb_descriptors(images)
    kmeans = build_vocabulary(descriptor_list, k=k)
    bovw = compute_bovw(image_descriptors, kmeans)

    scaler = StandardScaler()
    X = scaler.fit_transform(bovw)
    y = labels

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    models = {
        "SVC": (SVC(), {
            'C': [1, 5],
            'gamma': ['scale', 'auto'],
            'kernel': ['rbf']
        }),
        "RandomForest": (RandomForestClassifier(random_state=42), {
            'n_estimators': [100, 200],
            'max_depth': [None, 10]
        }),
        "XGBoost": (XGBClassifier(eval_metric='mlogloss', verbosity=0, random_state=42), {
            'n_estimators': [100, 200],
            'max_depth': [3, 6],
            'learning_rate': [0.1, 0.01]
        })
    }

    for name, (model, param_grid) in models.items():
        print(f"\n🔍 Tuning {name} with GridSearchCV...")
        grid = GridSearchCV(model, param_grid, cv=3, scoring='accuracy', verbose=0)
        grid.fit(X_train, y_train)
        best_model = grid.best_estimator_
        preds = best_model.predict(X_test)
        acc = accuracy_score(y_test, preds)
        print(f"{name} Accuracy: {acc * 100:.2f}%")
        print(f"Best Params: {grid.best_params_}")

In [7]:
train_orb_pipeline('caltech101', k=100)


Running ORB pipeline for: caltech101


Extracting ORB: 100%|██████████| 4722/4722 [00:07<00:00, 656.16it/s]



🔍 Tuning SVC with GridSearchCV...
SVC Accuracy: 22.01%
Best Params: {'C': 5, 'gamma': 'scale', 'kernel': 'rbf'}

🔍 Tuning RandomForest with GridSearchCV...
RandomForest Accuracy: 16.51%
Best Params: {'max_depth': None, 'n_estimators': 200}

🔍 Tuning XGBoost with GridSearchCV...
XGBoost Accuracy: 17.14%
Best Params: {'learning_rate': 0.1, 'max_depth': 6, 'n_estimators': 200}


In [8]:
train_orb_pipeline('caltech101', k=300)


Running ORB pipeline for: caltech101


Extracting ORB: 100%|██████████| 4722/4722 [00:07<00:00, 610.91it/s]



🔍 Tuning SVC with GridSearchCV...
SVC Accuracy: 23.07%
Best Params: {'C': 5, 'gamma': 'scale', 'kernel': 'rbf'}

🔍 Tuning RandomForest with GridSearchCV...
RandomForest Accuracy: 15.24%
Best Params: {'max_depth': None, 'n_estimators': 200}

🔍 Tuning XGBoost with GridSearchCV...
XGBoost Accuracy: 16.51%
Best Params: {'learning_rate': 0.1, 'max_depth': 3, 'n_estimators': 200}


In [None]:
train_orb_pipeline('cifar10', k=100)


Running ORB pipeline for: cifar10


Extracting ORB: 100%|██████████| 1000/1000 [00:00<00:00, 21588.08it/s]


ValueError: need at least one array to concatenate