## Libraries and Imports

In [26]:
from datasets import load_dataset
import random
import cv2
import numpy as np
import matplotlib.pyplot as plt
import sklearn 

## Import data

In [None]:
ds_dict = load_dataset("flwrlabs/caltech101")

random.seed(47)

ds = ds_dict["train"]
class_names = ds.features["label"].names

# sample 5 random label IDs
num_of_labels = 5 # change as needed <--------------
num_classes = len(class_names)
selected_label_ids = random.sample(range(num_classes), num_of_labels) 

# filter dataset
ds = ds.filter(lambda x: x["label"] in selected_label_ids)

# We map the ids to class names for printing so we know which labels were selected
selected_labels = [class_names[i] for i in selected_label_ids]
print("Selected labels:", selected_labels)


Filter: 100%|██████████| 8677/8677 [00:02<00:00, 3396.78 examples/s]

Selected labels: ['hawksbill', 'bonsai', 'laptop', 'panda', 'lobster']





## Split data

In [3]:
ds_split = ds.train_test_split(test_size=0.5, seed=42) # define split, it puts the remaining data in "train"

train_ds = ds_split["train"] # training set #50% of data
test_ds  = ds_split["test"] # test set #50% of data

## Extract features

In [24]:
sift = cv2.SIFT_create() #SIFT feature extractor, create the SIFT object once

def extract_features(pil_image):

    # Convert PIL image to numpy array
    image = np.array(pil_image)

    # Convert to grayscale
    if image.ndim == 3:
        image = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)

    #extract_features
    keypoints, descriptors = sift.detectAndCompute(image, None)

    # Handle images with no keypoints
    if descriptors is None:
        return np.empty((0, 128), dtype=np.float32)

    return keypoints ,descriptors

In [18]:
train_features = []
train_labels   = []
for sample in train_ds:
    keypoints, descriptors = extract_features(sample["image"])
    train_features.append(descriptors)
    train_labels.append(sample["label"])


test_features = []
test_labels   = []
for sample in test_ds:
    keypoints, descriptors = extract_features(sample["image"])
    test_features.append(descriptors)
    test_labels.append(sample["label"])

# Convert lists to matrix
train_feature_matrix = np.vstack(train_features)
test_feature_matrix = np.vstack(test_features)

train_feature_matrix.shape, test_feature_matrix.shape #check shapes

((87407, 128), (82923, 128))

The shape above tells us that we currently have 87407 (rows) features represented as vectors of length 128. Each feature describes a patch in a given image in the dataset. A feature could fx. be a pattern, blob, corner, edge or something else.

In [29]:
#Sample some of the descriptors, to reduce size for k-means
num_samples = 1000  # Adjust as needed

num_dec = train_feature_matrix.shape[0]
sample_size = min(num_samples, num_dec) # so we dont sample more than we have

rng = np.random.default_rng(seed=42)  # set random seed
sample_indices = rng.choice(num_dec, size=sample_size, replace=False)



train_sampled_descriptors = train_feature_matrix[sample_indices] # for train
train_sampled_descriptors.shape #check shapes again

(1000, 128)

## K mens clustering

In [30]:
k = 500  # number of clusters / visual words
random_state = 42
KMeans_model = sklearn.cluster.KMeans(n_clusters=k, random_state=random_state, n_init= 10) # Model definition
KMeans_model.fit(train_sampled_descriptors) # Fit model, which means to train the k-means clustering
visual_words = KMeans_model.cluster_centers_  # The cluster centers are our visual words

Found Intel OpenMP ('libiomp') and LLVM OpenMP ('libomp') loaded at
the same time. Both libraries are known to be incompatible and this
can cause random crashes or deadlocks on Linux when loaded in the
same Python program.
Using threadpoolctl may cause crashes or deadlocks. For more
information and possible workarounds, please see
    https://github.com/joblib/threadpoolctl/blob/master/multiple_openmp.md



In [32]:
print("Visual words shape:", visual_words.shape)
print("Inertia:", KMeans_model.inertia_)

Visual words shape: (500, 128)
Inertia: 28151216.0


## Form Bag of words