In [2]:
import cv2
import numpy as np
from tensorflow.keras.datasets import cifar100

# Load CIFAR-100 dataset
(x_train, y_train), (x_test, y_test) = cifar100.load_data()


In [3]:
# Initialize HOG descriptor
cell_size = (4, 4)           # Size of each cell in pixels
block_size = (2, 2)          # Number of cells in a block
block_stride = (4, 4)        # Step size for block movement
win_size = (32, 32)          # Same as image size
nbins = 9                    # Number of orientation bins

hog= cv2.HOGDescriptor(
     _winSize=(win_size[1], win_size[0]),
     _blockSize=(block_size[1] * cell_size[1], block_size[0] * cell_size[0]),
     _blockStride=(block_stride[1], block_stride[0]),
     _cellSize=(cell_size[1], cell_size[0]),
     _nbins=nbins
)

# Compute HOG features for the dataset
def compute_hog_features(images, hog):
    features = []
    for img in images:
        hog_feats = hog.compute(img).flatten()  # Flatten to 1D vector
        features.append(hog_feats)
    return np.array(features)

In [4]:
# Convert images to grayscale

x_train_gray = np.array([cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) for img in x_train])
x_test_gray = np.array([cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) for img in x_test])

# Extract HOG features for training and test datasets
x_train_hog = compute_hog_features(x_train_gray,hog)
x_test_hog = compute_hog_features(x_test_gray,hog)

print(f"Training HOG feature shape: {x_train_hog.shape}")
print(f"Test HOG feature shape: {x_test_hog.shape}")

Training HOG feature shape: (50000, 1764)
Test HOG feature shape: (10000, 1764)


In [5]:
from sklearn.decomposition import PCA

pca = PCA(n_components=100)
pca2 = PCA(n_components=100)
X_reduced = pca.fit_transform(x_train_hog)
X_reduced_test = pca2.fit_transform(x_test_hog)
print(X_reduced.shape,X_reduced_test.shape)

(50000, 100) (10000, 100)


In [6]:
# KMeans Classifier Implementation

class KMeansClassifier:
    def __init__(self, n_clusters, max_iter=300, tol=1e-1):
        self.n_clusters = n_clusters
        self.max_iter = max_iter
        self.tol = tol
        self.centroids = None
        self.labels = None

    def fit(self, X):
        n_samples, n_features = X.shape
        random_indices = np.random.choice(n_samples, self.n_clusters, replace=False)
        self.centroids = X[random_indices]

        for i in range(self.max_iter):
            distances = np.linalg.norm(X[:, np.newaxis] - self.centroids, axis=2)
            self.labels = np.argmin(distances, axis=1)

            new_centroids = np.array([X[self.labels == k].mean(axis=0) for k in range(self.n_clusters)])
            if np.all(np.abs(new_centroids - self.centroids) < self.tol):
                break
            self.centroids = new_centroids

    def predict(self, X):
        distances = np.linalg.norm(X[:, np.newaxis] - self.centroids, axis=2)
        return np.argmin(distances, axis=1)


In [7]:
n_clusters = 50
kmeans = KMeansClassifier(n_clusters=n_clusters)

print("Fitting KMeans on training data...")
kmeans.fit(X_reduced)

print("Predicting clusters for test data...")
predicted_labels = kmeans.predict(X_reduced_test)


Fitting KMeans on training data...
Predicting clusters for test data...


In [8]:
from sklearn.metrics import confusion_matrix, accuracy_score
print("the accuracy of HOG feature ",accuracy_score(y_test, predicted_labels))
confusion_matrix = confusion_matrix(y_test, predicted_labels)
print(confusion_matrix)

the accuracy of HOG feature  0.01
[[ 0  0  0 ...  0  0  0]
 [ 3  1  2 ...  0  0  0]
 [ 0  1 11 ...  0  0  0]
 ...
 [ 1  1  5 ...  0  0  0]
 [ 2  2 11 ...  0  0  0]
 [ 1  0  3 ...  0  0  0]]


In [9]:
# Function to extract color histogram features
def color_histogram(image):
    # Convert image to HSV color space
    hsv_image = cv2.cvtColor(image, cv2.COLOR_RGB2HSV)
    num_bins=256
    # Calculate histograms for each color channel
    hist_h = cv2.calcHist([hsv_image], [0], None, [num_bins], [0, 180])  # Hue
    hist_s = cv2.calcHist([hsv_image], [1], None, [num_bins], [0, 256])  # Saturation
    hist_v = cv2.calcHist([hsv_image], [2], None, [num_bins], [0, 256])  # Value

    # Normalize histograms
    cv2.normalize(hist_h, hist_h, alpha=0, beta=1, norm_type=cv2.NORM_MINMAX)
    cv2.normalize(hist_s, hist_s, alpha=0, beta=1, norm_type=cv2.NORM_MINMAX)
    cv2.normalize(hist_v, hist_v, alpha=0, beta=1, norm_type=cv2.NORM_MINMAX)

    # Concatenate histograms into a single feature vector
    histogram = np.concatenate((hist_h, hist_s, hist_v), axis=0)

    return histogram.ravel()

# Extract features from training and test data
train_features_color = []
for image in x_train:
    feature_c = color_histogram(image)
    train_features_color.append(feature_c)

test_features_color = []
for image in x_test:
    feature_c_test = color_histogram(image)
    test_features_color.append(feature_c_test)

# Convert feature lists to NumPy arrays
train_features_color = np.array(train_features_color)
test_features_color = np.array(test_features_color)

print("Training features shape:", train_features_color.shape)
print("Test features shape:", test_features_color.shape)

Training features shape: (50000, 768)
Test features shape: (10000, 768)


In [10]:
from sklearn.decomposition import PCA
pca3 = PCA(n_components=100)  # Choose an appropriate number of components
pca4 = PCA(n_components=100)
X_reduced = pca3.fit_transform(train_features_color)
X_reduced_test = pca4.fit_transform(test_features_color)
print(X_reduced.shape,X_reduced_test.shape)

(50000, 100) (10000, 100)


In [11]:
# Apply KMeans
n_clusters = 50  # Number of clusters
kmeans = KMeansClassifier(n_clusters=n_clusters)

print("Fitting KMeans on training data...")
kmeans.fit(X_reduced)

print("Predicting clusters for test data...")
predicted_labels_color = kmeans.predict(X_reduced_test)

Fitting KMeans on training data...
Predicting clusters for test data...


In [12]:
from sklearn.metrics import confusion_matrix, accuracy_score
print("the accuracy of Color feature ",accuracy_score(y_test, predicted_labels_color))
confusion_matrix = confusion_matrix(y_test, predicted_labels_color)
print(confusion_matrix)

the accuracy of Color feature  0.0078
[[0 0 2 ... 0 0 0]
 [3 0 3 ... 0 0 0]
 [0 2 2 ... 0 0 0]
 ...
 [5 5 0 ... 0 0 0]
 [4 0 4 ... 0 0 0]
 [2 1 1 ... 0 0 0]]


In [13]:
# Function to compute LBP histogram
def lbp_histogram(image):
    # Convert to grayscale
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    # Apply LBP operator
    lbp = cv2.calcHist([gray], [0], None, [256], [0, 256])
    lbp = cv2.normalize(lbp, lbp, 0, 255, cv2.NORM_MINMAX)

    # Flatten the histogram
    hist = lbp.flatten()

    return hist

# Extract features from training
train_features_lbp = []
for image in x_train:
    feature_lbp = lbp_histogram(image)
    train_features_lbp.append(feature_lbp)

# Extract features from test data
test_features_lbp = []
for image in x_test:
    feature_lbp = lbp_histogram(image)
    test_features_lbp.append(feature_lbp)

# Convert feature lists to NumPy arrays
train_features_lbp = np.array(train_features_lbp)
test_features_lbp = np.array(test_features_lbp)

print("Training features shape:", train_features_lbp.shape)
print("Test features shape:", test_features_lbp.shape)

Training features shape: (50000, 256)
Test features shape: (10000, 256)


In [14]:
pca5 = PCA(n_components=100)  # Choose an appropriate number of components
pca6 = PCA(n_components=100)
X_reduced = pca5.fit_transform(train_features_lbp)
X_reduced_test = pca6.fit_transform(test_features_lbp)
print(X_reduced.shape,X_reduced_test.shape)

(50000, 100) (10000, 100)


In [15]:
# Apply KMeans
n_clusters = 50  # Number of clusters
kmeans = KMeansClassifier(n_clusters=n_clusters)

print("Fitting KMeans on training data...")
kmeans.fit(train_features_lbp)

print("Predicting clusters for test data...")
predicted_labels_lbp = kmeans.predict(test_features_lbp)

Fitting KMeans on training data...
Predicting clusters for test data...


In [17]:
from sklearn.metrics import confusion_matrix, accuracy_score
print("the accuracy of Color feature ",accuracy_score(y_test, predicted_labels_lbp))
confusion_matrix = confusion_matrix(y_test, predicted_labels_lbp)
print(confusion_matrix)

the accuracy of Color feature  0.0108
[[0 0 0 ... 0 0 0]
 [4 1 0 ... 0 0 0]
 [4 0 0 ... 0 0 0]
 ...
 [0 1 1 ... 0 0 0]
 [2 1 1 ... 0 0 0]
 [0 4 3 ... 0 0 0]]
