#PK_W281_Classification_SIFT_Cifar100.ipynb

W281 Final Project

April 17, 2023

Waqas Ali | Pedro Melendez | Prakash Krishnan

This notebook has image classification ML Models built on **Scale Invariant Feature Transform (SIFT)** features extracted from CIFAR-100 Dataset.

The following Classifiers are evaluated in this notebook.
1. **Logistice Regression Models**
2. **Random Forest Classifier**
3. **Support Vector Machines (SVM)**

Dataset Summary

This dataset has 100 classes containing 600 images each. There are 500 training images and 100 testing images per class. The 100 classes in the CIFAR-100 are grouped into 20 superclasses. Each image comes with a "fine" label (the class to which it belongs) and a "coarse" label (the superclass to which it belongs). 

Data Fields

    image: 32 by 32 RGB image 
    fine label: an integer code specifying the fine label class
    coarse label: mapping of a fine label to a coarse or super-class

Fine Label to Coarse Label Mapping

    mapping = {
    'aquatic mammals': ['beaver', 'dolphin', 'otter', 'seal', 'whale'],
    'fish': ['aquarium_fish', 'flatfish', 'ray', 'shark', 'trout'],
    'flowers': ['orchid', 'poppy', 'rose', 'sunflower', 'tulip'],
    'food containers': ['bottle', 'bowl', 'can', 'cup', 'plate'],
    'fruit and vegetables': ['apple', 'mushroom', 'orange', 'pear', 'sweet_pepper'],
    'household electrical device': ['clock', 'computer_keyboard', 'lamp', 'telephone', 'television'],
    'household furniture': ['bed', 'chair', 'couch', 'table', 'wardrobe'],
    'insects': ['bee', 'beetle', 'butterfly', 'caterpillar', 'cockroach'],
    'large carnivores': ['bear', 'leopard', 'lion', 'tiger', 'wolf'],
    'large man-made outdoor things': ['bridge', 'castle', 'house', 'road', 'skyscraper'],
    'large natural outdoor scenes': ['cloud', 'forest', 'mountain', 'plain', 'sea'],
    'large omnivores and herbivores': ['camel', 'cattle', 'chimpanzee', 'elephant', 'kangaroo'],
    'medium-sized mammals': ['fox', 'porcupine', 'possum', 'raccoon', 'skunk'],
    'non-insect invertebrates': ['crab', 'lobster', 'snail', 'spider', 'worm'],
    'people': ['baby', 'boy', 'girl', 'man', 'woman'],
    'reptiles': ['crocodile', 'dinosaur', 'lizard', 'snake', 'turtle'],
    'small mammals': ['hamster', 'mouse', 'rabbit', 'shrew', 'squirrel'],
    'trees': ['maple_tree', 'oak_tree', 'palm_tree', 'pine_tree', 'willow_tree'],
    'vehicles 1': ['bicycle', 'bus', 'motorcycle', 'pickup_truck', 'train'],
    'vehicles 2': ['lawn_mower', 'rocket', 'streetcar', 'tank', 'tractor'],
    }

# Import Libraries

In [None]:
import pandas as pd
import numpy as np
import time
import cv2

import glob
import os

import tensorflow as tf
tf.random.set_seed(42)
print(tf.__version__)

import matplotlib.pyplot as plt
import seaborn as sns
import textwrap

from tensorflow.keras.optimizers import RMSprop
from keras.preprocessing import image
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import Dense, Flatten, Conv2D, MaxPooling2D, Dropout, BatchNormalization

%matplotlib inline

from skimage.feature import hog
from sklearn.metrics import accuracy_score

from skimage.filters import sobel
from skimage.feature import greycomatrix, greycoprops
from skimage.measure import shannon_entropy

2.11.0


# Setup

In [None]:
SIZE = 32

In [None]:
# Use TPUs if available
try: # detect TPUs
    tpu = None
    tpu = tf.distribute.cluster_resolver.TPUClusterResolver() # TPU detection
    tf.config.experimental_connect_to_cluster(tpu)
    tf.tpu.experimental.initialize_tpu_system(tpu)
    strategy = tf.distribute.experimental.TPUStrategy(tpu)
except ValueError: # detect GPUs
    strategy = tf.distribute.MirroredStrategy() # for GPU or multi-GPU machines

print("Number of accelerators: ", strategy.num_replicas_in_sync)



Number of accelerators:  1


#Load CIFAR 100 Dataset from Keras

In [None]:
# Load CIFAR dataset from Keras

cifar100 = tf.keras.datasets.cifar100
(X_train,Y_train) , (X_test,Y_test) = cifar100.load_data()

In [None]:
# Categories labels (fine and coarse)

fine_label = [ 'apple', 'aquarium_fish', 'baby', 'bear', 'beaver', 'bed', 'bee', 'beetle', 'bicycle', 'bottle', 'bowl', 'boy', 'bridge', 'bus', 'butterfly', 'camel', 'can', 'castle', 'caterpillar', 'cattle', 'chair', 'chimpanzee', 'clock', 'cloud', 'cockroach', 'couch', 'crab', 'crocodile', 'cup', 'dinosaur', 'dolphin', 'elephant', 'flatfish', 'forest', 'fox', 'girl', 'hamster', 'house', 'kangaroo', 'computer_keyboard', 'lamp', 'lawn_mower', 'leopard', 'lion', 'lizard', 'lobster', 'man', 'maple_tree', 'motorcycle', 'mountain', 'mouse', 'mushroom', 'oak_tree', 'orange', 'orchid', 'otter', 'palm_tree', 'pear', 'pickup_truck', 'pine_tree', 'plain', 'plate', 'poppy', 'porcupine', 'possum', 'rabbit', 'raccoon', 'ray', 'road', 'rocket', 'rose', 'sea', 'seal', 'shark', 'shrew', 'skunk', 'skyscraper', 'snail', 'snake', 'spider', 'squirrel', 'streetcar', 'sunflower', 'sweet_pepper', 'table', 'tank', 'telephone', 'television', 'tiger', 'tractor', 'train', 'trout', 'tulip', 'turtle', 'wardrobe', 'whale', 'willow_tree', 'wolf', 'woman', 'worm',
]

mapping = {
'aquatic mammals': ['beaver', 'dolphin', 'otter', 'seal', 'whale'],
'fish': ['aquarium_fish', 'flatfish', 'ray', 'shark', 'trout'],
'flowers': ['orchid', 'poppy', 'rose', 'sunflower', 'tulip'],
'food containers': ['bottle', 'bowl', 'can', 'cup', 'plate'],
'fruit and vegetables': ['apple', 'mushroom', 'orange', 'pear', 'sweet_pepper'],
'household electrical device': ['clock', 'computer_keyboard', 'lamp', 'telephone', 'television'],
'household furniture': ['bed', 'chair', 'couch', 'table', 'wardrobe'],
'insects': ['bee', 'beetle', 'butterfly', 'caterpillar', 'cockroach'],
'large carnivores': ['bear', 'leopard', 'lion', 'tiger', 'wolf'],
'large man-made outdoor things': ['bridge', 'castle', 'house', 'road', 'skyscraper'],
'large natural outdoor scenes': ['cloud', 'forest', 'mountain', 'plain', 'sea'],
'large omnivores and herbivores': ['camel', 'cattle', 'chimpanzee', 'elephant', 'kangaroo'],
'medium-sized mammals': ['fox', 'porcupine', 'possum', 'raccoon', 'skunk'],
'non-insect invertebrates': ['crab', 'lobster', 'snail', 'spider', 'worm'],
'people': ['baby', 'boy', 'girl', 'man', 'woman'],
'reptiles': ['crocodile', 'dinosaur', 'lizard', 'snake', 'turtle'],
'small mammals': ['hamster', 'mouse', 'rabbit', 'shrew', 'squirrel'],
'trees': ['maple_tree', 'oak_tree', 'palm_tree', 'pine_tree', 'willow_tree'],
'vehicles 1': ['bicycle', 'bus', 'motorcycle', 'pickup_truck', 'train'],
'vehicles 2': ['lawn_mower', 'rocket', 'streetcar', 'tank', 'tractor'],
}
coarse_label = list(mapping.keys())

fine_id_coarse_id =  {0: 4,  1: 1,  2: 14,  3: 8,  4: 0,  5: 6,  6: 7,  7: 7,  8: 18,  9: 3,  
                      10: 3,  11: 14,  12: 9,  13: 18,  14: 7,  15: 11,  16: 3,  17: 9,  18: 7,  19: 11,  
                      20: 6,  21: 11,  22: 5,  23: 10,  24: 7,  25: 6,  26: 13,  27: 15,  28: 3,  29: 15,  
                      30: 0,  31: 11,  32: 1,  33: 10,  34: 12,  35: 14,  36: 16,  37: 9,  38: 11,  39: 5,  
                      40: 5,  41: 19,  42: 8,  43: 8,  44: 15,  45: 13,  46: 14,  47: 17,  48: 18,  49: 10,  
                      50: 16,  51: 4,  52: 17,  53: 4,  54: 2,  55: 0,  56: 17,  57: 4,  58: 18,  59: 17,  
                      60: 10,  61: 3,  62: 2,  63: 12,  64: 12,  65: 16,  66: 12,  67: 1,  68: 9,  69: 19,  
                      70: 2,  71: 10,  72: 0,  73: 1,  74: 16,  75: 12,  76: 9,  77: 13,  78: 15,  79: 13,  
                      80: 16,  81: 19,  82: 2,  83: 4,  84: 6,  85: 19,  86: 5,  87: 5,  88: 8,  89: 19,  
                      90: 18,  91: 1,  92: 2,  93: 15,  94: 6,  95: 0,  96: 17,  97: 8,  98: 14,  99: 13}



wrapper = textwrap.TextWrapper(width=15, break_long_words=True)

def fine_to_named_coarse_label(fine_label_id):
  label = wrapper.wrap(coarse_label[fine_id_coarse_id[fine_label_id]])
  return '\n'.join(label)

def get_named_coarse_label(coarse_label_id):
  label = wrapper.wrap(coarse_label[coarse_label_id])
  return '\n'.join(label)

def get_named_coarse_label_no_wrapper(coarse_label_id):
  label = (coarse_label[coarse_label_id])
  return label

# Create Training and Test Labels Based on Coarse Categories

In [None]:
# Data wrangling --> Creating the coarse train arrays since we will train the model based on the 20 coarse categories 
coarse_Y_train = np.asarray([fine_id_coarse_id[x[0]] for x in Y_train])

coarse_Y_test= np.asarray([fine_id_coarse_id[x[0]] for x in Y_test])


In [None]:
# Inspect Train Data

display(X_train.shape)
display(coarse_Y_train.shape)
display(X_train[0])
display(coarse_Y_train)

(50000, 32, 32, 3)

(50000,)

array([[[255, 255, 255],
        [255, 255, 255],
        [255, 255, 255],
        ...,
        [195, 205, 193],
        [212, 224, 204],
        [182, 194, 167]],

       [[255, 255, 255],
        [254, 254, 254],
        [254, 254, 254],
        ...,
        [170, 176, 150],
        [161, 168, 130],
        [146, 154, 113]],

       [[255, 255, 255],
        [254, 254, 254],
        [255, 255, 255],
        ...,
        [189, 199, 169],
        [166, 178, 130],
        [121, 133,  87]],

       ...,

       [[148, 185,  79],
        [142, 182,  57],
        [140, 179,  60],
        ...,
        [ 30,  17,   1],
        [ 65,  62,  15],
        [ 76,  77,  20]],

       [[122, 157,  66],
        [120, 155,  58],
        [126, 160,  71],
        ...,
        [ 22,  16,   3],
        [ 97, 112,  56],
        [141, 161,  87]],

       [[ 87, 122,  41],
        [ 88, 122,  39],
        [101, 134,  56],
        ...,
        [ 34,  36,  10],
        [105, 133,  59],
        [138, 173,  79]]

array([11, 15,  4, ...,  8,  7,  1])

In [None]:
# Inspect Test Data

display(X_test.shape)
display(coarse_Y_test.shape)
display(X_test[0])
display(coarse_Y_test)

(10000, 32, 32, 3)

(10000,)

array([[[199, 215, 249],
        [196, 211, 244],
        [195, 210, 243],
        ...,
        [216, 231, 250],
        [217, 231, 250],
        [224, 234, 252]],

       [[197, 210, 239],
        [195, 208, 238],
        [195, 210, 240],
        ...,
        [231, 243, 250],
        [233, 243, 250],
        [241, 245, 253]],

       [[222, 226, 246],
        [213, 220, 242],
        [209, 219, 243],
        ...,
        [243, 250, 251],
        [244, 249, 251],
        [250, 250, 253]],

       ...,

       [[ 72,  73,  99],
        [ 71,  74, 102],
        [ 74,  78, 108],
        ...,
        [220, 208, 217],
        [183, 168, 181],
        [155, 141, 150]],

       [[ 72,  75, 104],
        [ 76,  81, 111],
        [ 84,  89, 122],
        ...,
        [222, 212, 220],
        [187, 174, 192],
        [145, 132, 149]],

       [[ 80,  85, 118],
        [ 84,  90, 123],
        [ 85,  92, 127],
        ...,
        [217, 207, 215],
        [207, 194, 211],
        [176, 164, 183]]

array([10, 10,  0, ...,  4,  8,  2])

# Pre-Process Images (Convert to Gray Scale for Feature Extraction)


In [None]:
# Convert Color Images to Gray Scale
# Convert int to float

X_train_gray_cv2 = [cv2.cvtColor (image, cv2.COLOR_BGR2GRAY) for image in X_train]
X_train_gray_cv2_np = np.array(X_train_gray_cv2)  
display("Gray Scale Train Images")
display(X_train_gray_cv2_np.shape)
display(X_train_gray_cv2_np[0])
print("")

X_test_gray_cv2 = [cv2.cvtColor (image, cv2.COLOR_BGR2GRAY) for image in X_test]
X_test_gray_cv2_np = np.array(X_test_gray_cv2)  
display("Gray Scale Test Images")
display(X_test_gray_cv2_np.shape)
display(X_test_gray_cv2_np[0])



'Gray Scale Train Images'

(50000, 32, 32)

array([[255, 255, 255, ..., 200, 217, 185],
       [255, 254, 254, ..., 168, 156, 141],
       [255, 254, 255, ..., 189, 162, 118],
       ...,
       [149, 140, 139, ...,  14,  48,  60],
       [126, 122, 130, ...,  13,  94, 137],
       [ 94,  93, 107, ...,  28, 108, 141]], dtype=uint8)




'Gray Scale Test Images'

(10000, 32, 32)

array([[223, 219, 218, ..., 235, 235, 238],
       [217, 215, 217, ..., 244, 244, 247],
       [232, 226, 225, ..., 250, 249, 251],
       ...,
       [ 81,  82,  87, ..., 212, 174, 145],
       [ 83,  89,  98, ..., 216, 181, 139],
       [ 94,  99, 102, ..., 211, 201, 171]], dtype=uint8)

In [None]:
X_train_gray_cv2_np.shape

(50000, 32, 32)

In [None]:
# Reduce Dataset Size for Prototyping

num_rows = 1000

coarse_label_id_list = [11,5,9,6,18]

# X_train_gray_cv2_np = X_train_gray_cv2_np[0:num_rows]
# coarse_Y_train = coarse_Y_train[0:num_rows]
# Y_train = Y_train[0:num_rows]

# display("Train")
# display(X_train_gray_cv2_np.shape)
# display(coarse_Y_train.shape)
# print("")

# X_test_gray_cv2_np = X_test_gray_cv2_np[0:num_rows]
# coarse_Y_test = coarse_Y_test[0:num_rows]
# Y_test = Y_test[0:num_rows]

# display("Test")
# display(X_test_gray_cv2_np.shape)
# display(coarse_Y_test.shape)



# SIFT Feature Extraction

In [None]:
def feature_extractor(x_dataset, y_dataset):
    image_feature_list = []
    index_list = []
    for index, img in enumerate(x_dataset):  #iterate through each image
        sift = cv2.SIFT_create(nfeatures=5)
        kp = sift.detect(img,None)
        if len(kp) <= 4:
          index_list.append(index)

    display("before", x_dataset.shape, y_dataset.shape)
    x_dataset = np.delete(x_dataset, index_list, axis=0)
    y_dataset = np.delete(y_dataset, index_list, axis=0)
    display("after", x_dataset.shape, y_dataset.shape)

    for index, img in enumerate(x_dataset):  #iterate through each image
        sift = cv2.SIFT_create(nfeatures=5)
        kp = sift.detect(img,None)
        kp, des = sift.compute(img,kp)
        # print("des",(des.shape))
        des = des.flatten()
        image_feature_list.append(des)
    
    x_feature_df = pd.DataFrame(image_feature_list)  

    return x_feature_df, y_dataset

# Build Out Features for Train and Test Data For Full Dataset

In [None]:
# Build Out Features for Train and Test

#Extract features from training images
x_train_sift_np, y_train_revise = feature_extractor(X_train_gray_cv2_np, coarse_Y_train)
x_train_sift_np = x_train_sift_np.to_numpy()
x_train_sift_np = x_train_sift_np[:,0:640]
display(x_train_sift_np.shape, y_train_revise.shape)
 
#Extract features from test images 
x_test_sift_np, y_test_revise = feature_extractor(X_test_gray_cv2_np, coarse_Y_test)
x_test_sift_np = x_test_sift_np.to_numpy() 
x_test_sift_np = x_test_sift_np[:,0:640]
display(x_test_sift_np.shape, y_test_revise.shape)

'before'

(50000, 32, 32)

(50000,)

'after'

(46392, 32, 32)

(46392,)

(46392, 640)

(46392,)

'before'

(10000, 32, 32)

(10000,)

'after'

(9289, 32, 32)

(9289,)

(9289, 640)

(9289,)

# Classification Model: Logistic Regression

In [None]:
# Logistic Regression Model
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, f1_score, accuracy_score

lr_model = LogisticRegression(C=100, random_state=1, solver='lbfgs', multi_class='ovr')
lr_model.fit(x_train_sift_np , y_train_revise.ravel())

# Evaluate the model
prdct = lr_model.predict(x_test_sift_np)
print(accuracy_score(y_test_revise , prdct))

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.1338141888254925


# Classification Model: Random Forest Classifier

In [None]:
from sklearn.ensemble import RandomForestClassifier
forest_model = RandomForestClassifier(n_estimators = 500, max_depth = 6, bootstrap = True, random_state = 18)

forest_model.fit(x_train_sift_np , y_train_revise.ravel())

# Evaluate the model
prdct = forest_model.predict(x_test_sift_np)
print(accuracy_score(y_test_revise , prdct))

0.12186457099795457


# Classification Model: Support Vector Machine (SVM)

In [None]:
import numpy as np
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
clf = make_pipeline(StandardScaler(), SVC(gamma='auto'))
clf.fit(x_train_sift_np , y_train_revise.ravel())

# Evaluate the model
prdct = clf.predict(x_test_sift_np)
print(accuracy_score(y_test_revise , prdct))

0.16740230380019377


# Build Out Features for Train and Test Data For Five Classes Only

In [None]:
# Build Out Features for Train and Test for 5 Classes Only

coarse_label_id_list = [11,5,9,6,18]

# Training Set
X_train_five_classes_gray = X_train_gray_cv2_np[(coarse_Y_train==11) | (coarse_Y_train==5) | (coarse_Y_train==9) | (coarse_Y_train==6) | (coarse_Y_train==18)]
coarse_Y_train_five_classes = coarse_Y_train[(coarse_Y_train==11) | (coarse_Y_train==5) | (coarse_Y_train==9) | (coarse_Y_train==6) | (coarse_Y_train==18)]
display(X_train_five_classes_gray.shape, coarse_Y_train_five_classes.shape)

# Extract features from training images
x_train_sift_np, y_train_revise = feature_extractor(X_train_five_classes_gray, coarse_Y_train_five_classes )
x_train_sift_np = x_train_sift_np.to_numpy()
x_train_sift_np = x_train_sift_np[:,0:640]
display(x_train_sift_np.shape, y_train_revise.shape)

# Test Set
X_test_five_classes_gray = X_test_gray_cv2_np[(coarse_Y_test==11) | (coarse_Y_test==5) | (coarse_Y_test==9) | (coarse_Y_test==6) | (coarse_Y_test==18)]
coarse_Y_test_five_classes = coarse_Y_test[(coarse_Y_test==11) | (coarse_Y_test==5) | (coarse_Y_test==9) | (coarse_Y_test==6) | (coarse_Y_test==18)]
display(X_train_five_classes_gray.shape, coarse_Y_train_five_classes.shape)

# #Extract features from test images
x_test_sift_np, y_test_revise = feature_extractor(X_test_five_classes_gray, coarse_Y_test_five_classes)
x_test_sift_np = x_test_sift_np.to_numpy() 
x_test_sift_np = x_test_sift_np[:,0:640]
display(x_test_sift_np.shape, y_test_revise.shape)

(12500, 32, 32)

(12500,)

'before'

(12500, 32, 32)

(12500,)

'after'

(11779, 32, 32)

(11779,)

(11779, 640)

(11779,)

(12500, 32, 32)

(12500,)

'before'

(2500, 32, 32)

(2500,)

'after'

(2346, 32, 32)

(2346,)

(2346, 640)

(2346,)

# Classification Model: Logistic Regression

In [None]:
# Logistic Regression Model
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, f1_score, accuracy_score

lr_model = LogisticRegression(C=100, random_state=1, solver='lbfgs', multi_class='ovr')
lr_model.fit(x_train_sift_np , y_train_revise.ravel())

# Evaluate the model
prdct = lr_model.predict(x_test_sift_np)
print(accuracy_score(y_test_revise , prdct))

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.3218243819266837


# Classification Model: Random Forest Classifier

In [None]:
from sklearn.ensemble import RandomForestClassifier
forest_model = RandomForestClassifier(n_estimators = 500, max_depth = 6, bootstrap = True, random_state = 18)

forest_model.fit(x_train_sift_np , y_train_revise.ravel())

# Evaluate the model
prdct = forest_model.predict(x_test_sift_np)
print(accuracy_score(y_test_revise , prdct))

0.3422847399829497


# Classification Model: Support Vector Machine (SVM)

In [None]:
import numpy as np
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
clf = make_pipeline(StandardScaler(), SVC(gamma='auto'))
clf.fit(x_train_sift_np , y_train_revise.ravel())

# Evaluate the model
prdct = clf.predict(x_test_sift_np)
print(accuracy_score(y_test_revise , prdct))

0.37084398976982097
