# Imports

In [1]:
# imports

import os
import numpy as np
import random
import pandas as pd
import json
import pickle
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from utils import *

# Important NOTE: Use opencv >=4.4 
import cv2

In [2]:
# Global tunable knobs

# Debug prints
debug = True

# Number of K-Means clusters needed for BoVW 
NUM_KMEANS_CLUSTER = 20

# Import and split data

In [3]:
# define file directory
directory = '../data/interim/PatternNet/PatternNet/images'

# create a list of classes considered for this project
classes = ['beach', 'chaparral', 'dense_residential', 'forest', 'freeway', 'harbor', 'overpass', 'parking_space', 'river', 'swimming_pool']

# define the train, val, and test sets
train_files, val_files, test_files = generate_splits(classes, directory)

train/validation/test subsets were loaded from a pre-generated file
	Number of train files: 4799
	Number of val files: 1599
	Number of test files: 1601


# Extraction function

In [4]:
def extract_key_points(img):

    # Converting image to grayscale
    gray= cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)

    # Applying SIFT detector
    sift = cv2.SIFT_create(nfeatures=0, nOctaveLayers=3, edgeThreshold=0.2, contrastThreshold=0.07)

    #kp = sift.detect(gray, None)
    kp, des = sift.detectAndCompute(gray,None)

    # Marking the keypoint on the image using circles
    sift_img=cv2.drawKeypoints(gray, kp, img,
                          flags=cv2.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS)

    return sift_img, des


In [5]:
# define a function to show a grid of SIFT image in a directory (given a file subset)
def visualize_sift(files, directory, images_per_class=3):
    # create a dictionary to store class images
    class_images = {}
    
    # iterate over each file
    for class_name, file_name in files:
        # load the image
        img = plt.imread(os.path.join(directory, class_name, file_name))
        # if class not in dictionary, initialize empty list
        if class_name not in class_images:
            class_images[class_name] = []
        # append image to class list
        class_images[class_name].append(img)

    # create a grid of images
    num_classes = len(class_images)
    fig, axes = plt.subplots(num_classes, images_per_class + 1, figsize=(12, 3*num_classes))
    for i, (class_name, images) in enumerate(class_images.items()):
        # display class name in the first column
        axes[i, 0].text(0.5, 0.5, class_name, fontsize=16, ha='center', va='center')
        axes[i, 0].axis('off')

        # display random images in the subsequent columns
        random.shuffle(images)
        for j in range(images_per_class):
            sift_image = extract_key_points(images[j])
            axes[i, j+1].imshow(sift_image)

    plt.tight_layout()
    plt.show()

In [6]:
# Function to get keypoint descriptors for a subset of images (per class) in a directory 
def generate_sift_vectors (files, directory, images_per_class=3):
    # create a dictionary to store class images
    class_images = {}
    
    # create an empty array to store labels and keypoint descriptors of all images
    desc_list = []
    label_list = []

    # iterate over each file
    for class_name, file_name in files:
        # load the image
        img = plt.imread(os.path.join(directory, class_name, file_name))
        # if class not in dictionary, initialize empty list
        if class_name not in class_images:
            class_images[class_name] = []
        # append image to class list
        class_images[class_name].append(img)

    # For each subset of images in a class, extract keypoints
    num_classes = len(class_images)
    for i, (class_name, images) in enumerate(class_images.items()):
        random.shuffle(images)
        for j in range(images_per_class):
            sift_image, desc = extract_key_points(images[j])
            desc_list.append(desc)
            label_list.append(class_name)

    return desc_list, label_list

Extract SIFT keypoints from all training images

In [7]:
# generate the set of feature vectors for all images in each class of the training set
train_sift_desc_list, train_label_list = generate_sift_vectors(train_files, directory, images_per_class=400)


# sift_desc_list is a 2D array per image: (Number of keypoints X 128).
# the Number of keypoints per image can vary from 0 to several thousands.
# label_list is a 1D array containing the class label for all those images.

if debug:
    print(f'Number of train images: Labels: {len(train_label_list)} KP_Descriptors: {len(train_sift_desc_list)}')

# Stack all the kp descriptors vertically so that we get one giant 2D array
# Number of keypoints across all images X 128
vStack = np.array(train_sift_desc_list[0])
for remaining in train_sift_desc_list[1:]:
    if remaining is not None:
        vStack = np.vstack((vStack, remaining))

if debug:
    print(vStack.shape)

Number of train images: Labels: 4000 KP_Descriptors: 4000
(1419665, 128)


## Create the bag of Visual words
Feed all KP descriptors into KMeans algorithm attempting to create a max of NUM_KMEANS_CLUSTERS based on the keypoints extracted from all images

In [8]:
kmeans = KMeans(init="k-means++", n_clusters=NUM_KMEANS_CLUSTER, n_init=4)
kmeans_fit = kmeans.fit_predict(vStack)

if debug:
    print(f'Number of KPs: {kmeans_fit.shape}, MinCluster: {kmeans_fit.min()}, MaxCluster: {kmeans_fit.max()}')

# Create a histogram for each image - each kp for the image belongs to one bucket (visual word)
# Keep a count of such visual words per image.
num_images = len(train_sift_desc_list)
histogram = np.array([np.zeros(NUM_KMEANS_CLUSTER) for i in range(num_images)])
old_count = 0
for img_num in range(num_images):
    if train_sift_desc_list[img_num] is None:
        # Some images have zero keypoints. Skip
        continue
    num_kp_in_image = len(train_sift_desc_list[img_num])
    for j in range(num_kp_in_image):
        idx = kmeans_fit[old_count+j]
        histogram[img_num][idx] += 1
    old_count += num_kp_in_image
        

Number of KPs: (1419665,), MinCluster: 0, MaxCluster: 19


## Support vector for supervised training
Since the histogram for each image carries a (weighted) combination of visual words, feed this data into SVM with the labels from the training set.  

In [10]:
X_train = histogram
y_train = train_label_list

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
svm_clf = SVC(random_state=42)
svm_clf.fit(X_train_scaled, y_train)

#svc = SVC().fit(histogram, label_list)

## Prediction
Extract the SIFT keypoints and create the BoVW histogram.
Feed this histogram into the classifier to predict the class.

In [11]:
# generate the set of feature vectors for all images in each class of the training set
val_sift_desc_list, val_label_list = generate_sift_vectors(val_files, directory, images_per_class=40)
if debug:
    print(f'Number of val images: Labels: {len(val_label_list)} KP_Descriptors: {len(val_sift_desc_list)}')
    
num_images = len(val_sift_desc_list)
words = np.array([np.zeros(NUM_KMEANS_CLUSTER) for i in range(num_images)])
old_count = 0
for i in range(num_images):
    if val_sift_desc_list[i] is None:
        continue
    l = len(val_sift_desc_list[i])
    ret = kmeans.predict(val_sift_desc_list[i])
    for j in ret:
        words[i][j] += 1

Number of val images: Labels: 400 KP_Descriptors: 400


In [12]:
#scalar = StandardScaler().fit(vocab)
#vocab = scalar.transform(vocab)

X_val = words
y_val = val_label_list

norm_X_val = scaler.transform(X_val)
y_pred = svm_clf.predict(norm_X_val)

match_count = 0
for i in range(len(y_val)):
    if y_val[i] != y_pred[i]:
        print(f'{y_val[i]}, {y_pred[i]}')
    else:
        match_count += 1
        
print(f'Matches: {match_count}; Mismatches: {len(y_val)-match_count}')

overpass, freeway
overpass, freeway
overpass, freeway
overpass, freeway
overpass, swimming_pool
overpass, forest
overpass, freeway
overpass, freeway
overpass, beach
overpass, beach
beach, freeway
beach, forest
beach, parking_space
beach, dense_residential
beach, overpass
beach, river
river, freeway
river, freeway
forest, river
forest, river
forest, river
forest, river
forest, river
forest, river
forest, river
swimming_pool, dense_residential
swimming_pool, dense_residential
swimming_pool, dense_residential
swimming_pool, dense_residential
swimming_pool, dense_residential
swimming_pool, harbor
swimming_pool, harbor
swimming_pool, beach
swimming_pool, beach
swimming_pool, dense_residential
swimming_pool, dense_residential
harbor, swimming_pool
harbor, dense_residential
harbor, swimming_pool
parking_space, forest
parking_space, beach
parking_space, beach
parking_space, freeway
parking_space, beach
freeway, river
freeway, river
freeway, river
freeway, river
freeway, river
freeway, overpass

In [14]:
import os
import numpy as np
import random
import pandas as pd
import json
import pickle
import matplotlib.pyplot as plt
from utils import *

# Important NOTE: Use opencv >=4.4 
import cv2

sample = '../data/interim/PatternNet/images' + '/' + 'parking_space' + '/' + 'parkingspace005.jpg'

img = plt.imread(sample)

# Converting image to grayscale
gray= cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)

# Applying SIFT detector
sift = cv2.SIFT_create(nfeatures=0, nOctaveLayers=3, edgeThreshold=0.2, contrastThreshold=0.07)

#kp = sift.detect(gray, None)
kp, des = sift.detectAndCompute(gray,None)

# Marking the keypoint on the image using circles
sift_img=cv2.drawKeypoints(gray, kp, img,
                          flags=cv2.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS)

plt.imshow(sift_img)

FileNotFoundError: [Errno 2] No such file or directory: '../data/interim/PatternNet/images/parking_space/parkingspace005.jpg'

In [None]:
des.shape

https://kushalvyas.github.io/BOV.html

https://www.vlfeat.org/api/sift.html#sift-tech-descriptor

https://docs.opencv.org/3.4/d7/d60/classcv_1_1SIFT.html

https://machinelearningmastery.com/opencv_sift_surf_orb_keypoints

https://github.com/kushalvyas/Bag-of-Visual-Words-Python/blob/8ddda6ab804f14777855c8f4119f749f61e2da6e/Bag.py#L140