In [1]:
import cv2
import numpy as np


In [2]:
from keras.datasets import cifar100

2024-12-09 14:59:04.579381: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE4.1 SSE4.2 AVX AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [3]:
# Load the CIFAR-100 dataset
(x_train, y_train), (x_test, y_test) = cifar100.load_data()

# Define the number of bins for the histogram
num_bins = 256

In [4]:
# Define HOG parameters
cell_size = (4, 4)           # Size of each cell in pixels
block_size = (2, 2)          # Number of cells in a block
block_stride = (4, 4)        # Step size for block movement
win_size = (32, 32)          # Same as image size
nbins = 9                    # Number of orientation bins

# Initialize HOG descriptor
hog = cv2.HOGDescriptor(
    _winSize=(win_size[1], win_size[0]),
    _blockSize=(block_size[1] * cell_size[1], block_size[0] * cell_size[0]),
    _blockStride=(block_stride[1], block_stride[0]),
    _cellSize=(cell_size[1], cell_size[0]),
    _nbins=nbins
)

# Function to compute HOG features for the dataset
def compute_hog_features(images):
    features = []
    for img in images:
        hog_feats = hog.compute(img).flatten()  # Flatten to 1D vector
        features.append(hog_feats)
    return np.array(features)



In [5]:
# Convert images to grayscale
x_train_gray = np.array([cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) for img in x_train])
x_test_gray = np.array([cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) for img in x_test])

# Extract HOG features for training and test datasets
x_train_hog = compute_hog_features(x_train_gray)
x_test_hog = compute_hog_features(x_test_gray)

print(f"Training HOG feature shape: {x_train_hog.shape}")
print(f"Test HOG feature shape: {x_test_hog.shape}")

Training HOG feature shape: (50000, 1764)
Test HOG feature shape: (10000, 1764)


In [6]:
# Function to extract color histogram features
def color_histogram(image):
    # Convert image to HSV color space
    hsv_image = cv2.cvtColor(image, cv2.COLOR_RGB2HSV)

    # Calculate histograms for each color channel
    hist_h = cv2.calcHist([hsv_image], [0], None, [num_bins], [0, 180])  # Hue
    hist_s = cv2.calcHist([hsv_image], [1], None, [num_bins], [0, 256])  # Saturation
    hist_v = cv2.calcHist([hsv_image], [2], None, [num_bins], [0, 256])  # Value

    # Normalize histograms
    cv2.normalize(hist_h, hist_h, alpha=0, beta=1, norm_type=cv2.NORM_MINMAX)
    cv2.normalize(hist_s, hist_s, alpha=0, beta=1, norm_type=cv2.NORM_MINMAX)
    cv2.normalize(hist_v, hist_v, alpha=0, beta=1, norm_type=cv2.NORM_MINMAX)

    # Concatenate histograms into a single feature vector
    histogram = np.concatenate((hist_h, hist_s, hist_v), axis=0)

    return histogram

In [7]:
# Extract features from training and test data
train_features_color = []
for image in x_train:
    feature_c = color_histogram(image)
    train_features_color.append(feature_c)

test_features_color = []
for image in x_test:
    feature_c_test = color_histogram(image)
    test_features_color.append(feature_c_test)

# Convert feature lists to NumPy arrays
train_features_color = np.array(train_features_color)
test_features_color = np.array(test_features_color)

print("Training features shape:", train_features_color.shape)
print("Test features shape:", test_features_color.shape)

Training features shape: (50000, 768, 1)
Test features shape: (10000, 768, 1)


In [8]:
# Function to compute LBP histogram
def lbp_histogram(image):
    # Convert to grayscale
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    # Apply LBP operator
    lbp = cv2.calcHist([gray], [0], None, [256], [0, 256])
    lbp = cv2.normalize(lbp, lbp, 0, 255, cv2.NORM_MINMAX)

    # Flatten the histogram
    hist = lbp.flatten()

    return hist

In [9]:
# Extract features from training
train_features_lbp = []
for image in x_train:
    feature_lbp = lbp_histogram(image)
    train_features_lbp.append(feature_lbp)
    
# Extract features from test data
test_features_lbp = []
for image in x_test:
    feature_lbp = lbp_histogram(image)
    test_features_lbp.append(feature_lbp)
    
# Convert feature lists to NumPy arrays
train_features_lbp = np.array(train_features_lbp)
test_features_lbp = np.array(test_features_lbp)

print("Training features shape:", train_features_lbp.shape)
print("Test features shape:", test_features_lbp.shape)

Training features shape: (50000, 256)
Test features shape: (10000, 256)


In [10]:
# KMeans Classifier Implementation
class KMeansClassifier:
    def __init__(self, n_clusters, max_iter=300, tol=1e-4):
        self.n_clusters = n_clusters
        self.max_iter = max_iter
        self.tol = tol
        self.centroids = None
        self.labels = None

    def fit(self, X):
        n_samples, n_features = X.shape
        random_indices = np.random.choice(n_samples, self.n_clusters, replace=False)
        self.centroids = X[random_indices]

        for i in range(self.max_iter):
            distances = np.linalg.norm(X[:, np.newaxis] - self.centroids, axis=2)
            self.labels = np.argmin(distances, axis=1)

            new_centroids = np.array([X[self.labels == k].mean(axis=0) for k in range(self.n_clusters)])
            if np.all(np.abs(new_centroids - self.centroids) < self.tol):
                break
            self.centroids = new_centroids

    def predict(self, X):
        distances = np.linalg.norm(X[:, np.newaxis] - self.centroids, axis=2)
        return np.argmin(distances, axis=1)

In [11]:
# Apply KMeans
n_clusters = 100  # Number of clusters
kmeans = KMeansClassifier(n_clusters=n_clusters)

In [12]:
from sklearn.decomposition import PCA
pca1 = PCA(n_components=1)
pca2 = PCA(n_components=1)
# Fit the PCA model to the data
pca1.fit(x_train_hog)

# Transform the data to the new lower-dimensional space
x_train_hog = pca1.transform(x_train_hog)

pca2.fit(x_test_hog)

# Transform the data to the new lower-dimensional space
x_test_hog = pca2.transform(x_test_hog)

print(f"Training HOG feature shape: {x_train_hog.shape}")
print(f"Test HOG feature shape: {x_test_hog.shape}")

Training HOG feature shape: (50000, 1)
Test HOG feature shape: (10000, 1)


In [13]:
print("Fitting KMeans on training data...")
kmeans.fit(x_train_hog)

Fitting KMeans on training data...


In [14]:
print("Predicting clusters for test data...")
predicted_labels = kmeans.predict(x_test_hog)

Predicting clusters for test data...


In [15]:
# Display some results
print("Cluster assignments for the first 10 test images:", predicted_labels[:10])

Cluster assignments for the first 10 test images: [80 88 86 87 77 68 62 48 80 87]


In [16]:
from sklearn.metrics import confusion_matrix, accuracy_score

print (accuracy_score(y_test, predicted_labels))

0.008


In [17]:
pca3 = PCA(n_components=1)
pca4 = PCA(n_components=1)
# Fit the PCA model to the data
pca3.fit(train_features_lbp)

# Transform the data to the new lower-dimensional space
train_features_lbp = pca3.transform(train_features_lbp)

pca4.fit(test_features_lbp)

# Transform the data to the new lower-dimensional space
test_features_lbp = pca4.transform(test_features_lbp)

print(f"Training LBP feature shape: {train_features_lbp.shape}")
print(f"Test LBP feature shape: {test_features_lbp.shape}")

Training LBP feature shape: (50000, 1)
Test LBP feature shape: (10000, 1)


In [18]:
print("Fitting KMeans on training data...")
kmeans.fit(train_features_lbp)

print("Predicting clusters for test data...")
predicted_labels_lbp = kmeans.predict(test_features_lbp)

print (accuracy_score(y_test, predicted_labels_lbp))

Fitting KMeans on training data...
Predicting clusters for test data...
0.0104


In [21]:
# Reshape the training features
train_features_color = train_features_color.reshape(50000, -1)

# Reshape the test features
test_features_color = test_features_color.reshape(10000, -1)

pca5 = PCA(n_components=1)
pca6 = PCA(n_components=1)
# Fit the PCA model to the data
pca5.fit(train_features_color)

# Transform the data to the new lower-dimensional space
train_features_color = pca5.transform(train_features_color)

pca6.fit(test_features_color)

# Transform the data to the new lower-dimensional space
test_features_color = pca6.transform(test_features_color)

print(f"Training LBP feature shape: {train_features_color.shape}")
print(f"Test LBP feature shape: {test_features_color.shape}")

Training LBP feature shape: (50000, 1)
Test LBP feature shape: (10000, 1)


In [22]:
print("Fitting KMeans on training data...")
kmeans.fit(train_features_color)

print("Predicting clusters for test data...")
predicted_labels_color = kmeans.predict(test_features_color)

print (accuracy_score(y_test, predicted_labels_color))

Fitting KMeans on training data...
Predicting clusters for test data...
0.0097
