In [1]:
import matplotlib, cv2
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
from sklearn.cluster import KMeans, MiniBatchKMeans, DBSCAN
from sklearn.metrics import silhouette_score
import math
import os
import itertools
import random
import time
%matplotlib inline

In [2]:
task_2_train = "Task2Dataset/Training/png/"
task_2_test = "Task2Dataset/TestWithoutRotations/images/"
task_2_test_annotations = "Task2Dataset/TestWithoutRotations/annotations/"

task_3_test_annotations = "Task3AdditionalTestDataset/annotations/"
task_3_test = "Task3AdditionalTestDataset/images/"

In [47]:
#refactored to class structure to simplify parameter tuning and testing of different matching algorithms, outlier rejection etc
class SIFT:
    #c_thresh=0.09 to get 0.03 used by lowe as used in paper
    def __init__(self, n_octaves=3, n_feat=0, c_thresh=0.09, e_thresh=10, lowe_thresh=0.7, ransac_thresh=5.0, sigma=1.6, k=2, min_matches=10, train_folder="", test_folder="", test_annotation_folder="", matcher=cv2.FlannBasedMatcher()):
        """
        Initialise parameters, by default set to OpenCV default values.
        Allows easy testing of parameters by simply calling SIFT(n_octaves=5), for example
        """
        self.n_octaves=n_octaves #num octaves used in computing DoG pyramid
        self.n_feat=n_feat #num features to retain when getting keypoints/descriptors
        self.c_thresh=c_thresh #contrast threshold used in SIFT feature detection
        self.e_thresh=e_thresh #edge threshold in SIFT feature detection
        self.lowe_thresh=lowe_thresh #threshold for lowe's ratio test
        self.ransac_thresh=ransac_thresh #threshold used in RANSAC outlier rejection
        self.sigma=sigma #dictates level of gaussian blur in DoG pyramid
        self.k=k #number of nearest neighbours obtained in knnmatch
        self.min_matches=min_matches #number of matches required to calculate homography - kinda redundant
        self.train_folder=train_folder
        self.test_folder=test_folder
        self.test_annotation_folder=test_annotation_folder
        self.train_pts = {} # store keypoints of training images
        self.train_desc = {} # store descriptors of training images
        self.test_points = {}
        self.test_desc = {}
        self.train_feat_count = {} #store number of features found for training image for scoring mechanism
        self.matcher = matcher #FlannBasedMatcher or BFMatcher
        self.matches = [] #store retained matches
        self.homography_matrix = None #store homography matrix
        self.inlier_mask = "" #store inlier mask
        self.test_annotations = {} #store annotations for test images
        self.num_train_images=0
        self.sift = cv2.SIFT_create(self.n_feat, self.n_octaves, self.c_thresh, self.e_thresh, self.sigma)
        self.cluster_desc = []

    def get_label(self, file):
        """
        Helper function to get label from filename
        i.e. 001-this-is-the-label.png returns this-is-the-label
        """
        return file.split("-", 1)[1].split(".")[0]
    
    def get_file_name(self, file):
        """
        Helper function to get full file name barring extension
        i.e. 001-name.png returns 001-name
        """
        return file.split(".")[0]
        
    def get_sift_train_features(self):
        """
        Gets SIFT keypoints and descriptors for all images in train folder
        Stored under dictionaries for easy access using image name
        """
        
        for train_image in os.listdir(self.train_folder):
            image = cv2.imread(os.path.join(self.train_folder, train_image), 0)
            #image = cv2.medianBlur(image, 5)
            pt, desc = self.sift.detectAndCompute(image, None)
            self.train_pts[self.get_file_name(train_image)] = pt
            self.train_desc[self.get_file_name(train_image)] = desc
            self.train_feat_count[self.get_file_name(train_image)] = len(pt)
            self.num_train_images+=1

    def get_sift_test_features(self):
        for test_image in os.listdir(self.test_folder):
            image = cv2.imread(os.path.join(self.test_folder, test_image), 0)
            #image = cv2.medianBlur(image, 5)

            pt, desc = self.sift.detectAndCompute(image, None)
            test_image = self.get_file_name(test_image)
            self.test_points[test_image] = pt
            self.test_desc[test_image] = desc

    def get_annotations(self):
        """
        Get annotations for each test image
        """
        for annotation_file in os.listdir(self.test_annotation_folder):
            
            image_name = self.get_file_name(annotation_file)
            annotation_file = open(os.path.join(self.test_annotation_folder, annotation_file), "r")
            
            annotation_lines = annotation_file.readlines()
            image_annot = []
            
            for line in annotation_lines:
                image_annot.append(line.split(",")[0])
            
            self.test_annotations[image_name] = image_annot

    def is_bound_box(self,vertices):
        if np.any(vertices < 0) or np.any(vertices > 512):
            return False
        distances =[]
        for i in range(4):
            j = (i+1)%4
            distances.append(math.sqrt((vertices[i][0][0] - vertices[j][0][0])**2 + (vertices[i][0][1] - vertices[j][0][1])**2))
        
        if np.abs(distances[0] - distances[2]) > 0.8 and np.abs(distances[1] - distances[3]) > 0.8:
            return False
        
        a = [vertices[1][0][0] - vertices[0][0][0],vertices[1][0][1] - vertices[0][0][1] ]
        b = [ vertices[2][0][0] - vertices[1][0][0],vertices[2][0][1] - vertices[1][0][1]]
        dot_product = np.dot(a,b)/(np.linalg.norm(a)*np.linalg.norm(b))
        
        if np.abs(dot_product) > 0.1:
            return False
        return True

    def draw_boxes(self, train_image_name, test_image_name, homography_method, good_matches):
        assert self.train_desc, "Run SIFT.get_sift_train_features()"
        assert self.test_desc, "Run SIFT.get_sift_test_features()"
        canvas = cv2.imread(f"./{test_image_name}_boxed.png")
        train_image = cv2.imread(os.path.join(self.train_folder, train_image_name + ".png"))
        h, w = train_image.shape[:2]
        
        src_pts = np.float32([self.train_pts[self.get_file_name(train_image_name)][m[0].queryIdx].pt for m in good_matches]).reshape(-1,1,2)        
        dst_pts = np.float32([self.test_points[test_image_name][m[0].trainIdx].pt for m in good_matches]).reshape(-1,1,2)
        
        if len(src_pts) < 4 or len(dst_pts) < 4:
            return
        M, mask = cv2.findHomography(src_pts, dst_pts, homography_method, self.ransac_thresh)
        if M is None:
            return

        pts = np.float32([ [0,0],[0,h-1],[w-1,h-1],[w-1,0] ]).reshape(-1,1,2)
        dst = cv2.perspectiveTransform(pts, M)
        
        canvas = cv2.polylines(canvas, [np.int32(dst)], True, (0,0,255), 2, cv2.LINE_AA)
        centroid = np.int32(dst.mean(axis=0)[0])
        centroid[0]-=55
        centroid[1]-=37
        centroid = tuple(centroid)

        cv2.putText(canvas, self.get_label(train_image_name), centroid, cv2.FONT_HERSHEY_COMPLEX, 0.4, (255,0,0), 1, cv2.LINE_AA )
        cv2.imwrite(f"./{test_image_name}_boxed.png", canvas)

    def sift_matches_lowes(self, train_image_name, test_image_name):
        """
        Match SIFT features, using Lowe's Ratio Test as a means of outlier rejection
        """

        #match features
        matches = self.matcher.knnMatch(self.train_desc[self.get_file_name(train_image_name)], self.test_desc[test_image_name], self.k)

        #lowe's ratio test
        for one, two in matches:
            if one.distance < self.lowe_thresh * two.distance:
                self.matches.append([one])

        #self.draw_boxes(train_image_name, test_image_name, 0, self.matches)
    

    def sift_matches_ransac(self, train_image_name, test_image_name):
        """
        Match SIFT features, obtain homography matrix and use RANSAC/inlier mask for outlier rejection
        """

        #match features
        matches = self.matcher.knnMatch(self.train_desc[self.get_file_name(train_image_name)], self.test_desc[test_image_name], self.k)
        if len(matches) > self.min_matches:
            #if sufficient matches to compute homography, calculate source/dest points
            src_pts = np.float32([self.train_pts[self.get_file_name(train_image_name)][m[0].queryIdx].pt for m in matches]).reshape(-1, 1, 2)
            dst_pts = np.float32([self.test_points[test_image_name][m[0].trainIdx].pt for m in matches]).reshape(-1, 1, 2)
            
            #get homography matrix + mask
            H, mask = cv2.findHomography(src_pts, dst_pts, cv2.RANSAC, self.ransac_thresh)
            self.homography_matrix = H
            inlier_mask = mask.ravel().tolist()
            #extract inlier matches
            for i in range(len(matches)):
                if inlier_mask[i]:
                    self.matches.append(matches[i])
        else:
            #cannot compute homography
            self.matches=[]

        #self.draw_boxes(train_image_name, test_image_name, 0, self.matches)


    def count_features(self, image_path):
        image = cv2.medianBlur(cv2.imread(image_path, 0), 25)
        null, threshold_image = cv2.threshold(image, 250, 255, cv2.THRESH_BINARY_INV)
        outlines, null = cv2.findContours(threshold_image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
        return len(outlines)

    def calc_iou(boxA_coord, boxB_coord, w, h):
    
        boxA = [boxA_coord[0],boxA_coord[1], boxA_coord[0]+w, boxA_coord[1]+h]
        boxB = [boxB_coord[0],boxB_coord[1], boxB_coord[0]+w, boxB_coord[1]+h]
        
        xA = max(boxA[0], boxB[0])
        yA = max(boxA[1], boxB[1])
        xB = min(boxA[2], boxB[2])
        yB = min(boxA[3], boxB[3])
        # compute the area of intersection rectangle
        interArea = max(0, xB - xA + 1) * max(0, yB - yA + 1)
        
        
        boxAArea = (boxA[2] - boxA[0] + 1) * (boxA[3] - boxA[1] + 1)
        boxBArea = (boxB[2] - boxB[0] + 1) * (boxB[3] - boxB[1] + 1)
        
        iou = interArea /float(boxAArea + boxBArea - interArea)
        
        return iou
    
    def compare_scores_annot(self, scores, test_image):

        true_pos, false_pos, true_neg, false_neg = 0, 0, 0, 0
        print("----------------------------------------------------")
        print(f"Correct labels: {self.test_annotations[test_image]}")
        print(f"Our labels: {scores}")
        for train_image in scores.keys():

            if self.get_label(train_image) in self.test_annotations[test_image]:
                true_pos+=1
            else:
                false_pos+=1
                false_neg+=1
        total = true_pos+false_pos
        print(f"{test_image}: {true_pos}/{(true_pos+false_pos)}")
        print("----------------------------------------------------")

        true_neg = self.num_train_images - false_neg - true_pos - false_pos

        return true_pos, false_pos, true_neg, false_neg
    
    def eval(self, lowes=False, normalise=False):
        """
        Get score for a specific test image
        
        Normalise == False: score is purely a ranking of the number of matches between train images and a test image
        Normalise == True: normalise number of matches by number of features extracted from train image
        
        If lowes=false, use ransac. Vice versa.
        """
        tp, fp, tn, fn = 0, 0, 0, 0
        for test_image in os.listdir(self.test_folder):
            scores = {}
            canvas = cv2.imread(os.path.join(self.test_folder, test_image))
            cv2.imwrite(f"./{self.get_file_name(test_image)}_boxed.png", canvas)
            for image, desc in self.train_desc.items():
                self.matches = []

                if lowes:
                    self.sift_matches_lowes(image, self.get_file_name(test_image))
                else:
                    self.sift_matches_ransac(image, self.get_file_name(test_image))

                if normalise:
                    scores[self.get_file_name(image)] = len(self.matches) / self.train_feat_count[image]
                else:
                    scores[self.get_file_name(image)] = len(self.matches)

            scores = dict(sorted(scores.items(), key=lambda x: x[1], reverse=True))
            scores = dict(itertools.islice(scores.items(), self.count_features(os.path.join(self.test_folder, test_image))))
            # [print(key) for key in scores.keys()]
            # [self.draw_boxes(train_image, self.get_file_name(test_image), 0, self.matches) for train_image in scores.keys()]
            test_image = self.get_file_name(test_image)
            true_pos, false_pos, true_neg, false_neg = self.compare_scores_annot(scores, test_image)
            tp+=true_pos
            tn+=true_neg
            fp+=false_pos
            fn+=false_neg
            
        acc = ((tp+tn)/(tp+tn+fp+fn)) # accuracy
        tpr = (tp)/(tp+fn) # true positive rate
        fpr = (fp)/(fp+tn) # false positive rate
        return acc, tpr, fpr


In [17]:
import cv2
import numpy as np

def count_objects(image_path):
    # Read the image
    gray = cv2.imread(image_path, 0)
    gray = cv2.medianBlur(gray, 25)
    # Convert the image to grayscale

    # Threshold the image to create a binary image
    _, binary = cv2.threshold(gray, 250, 255, cv2.THRESH_BINARY_INV)

    # Find contours in the binary image
    contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    # Draw contours on the original image
    result = gray.copy()
    for contour in contours:
        cv2.drawContours(result, [contour], -1, (0, 255, 0), 2)

   # Show the original image and the image with contours if running in an environment that supports GUIs
    if 'DISPLAY' in os.environ:
        cv2.imshow('Image with Contours', result)
        # Add a small delay to give the window some time to appear
        cv2.waitKey(5000)
        cv2.destroyAllWindows()

    # # Close the image windows when a key is pressed
    cv2.waitKey(0)
    cv2.destroyAllWindows()

    # Return the number of objects
    return len(contours)

# Replace 'image_path' with the path to your image
for test_image in os.listdir("./Task2Dataset/TestWithoutRotations/images/"):
    print(test_image, count_objects(os.path.join("./Task2Dataset/TestWithoutRotations/images/", test_image)))
image_path = './Task3AdditionalTestDataset/images/test_image_10.png'
num_objects = count_objects(image_path)
print(f"Number of objects: {num_objects}")


test_image_1.png 4
test_image_10.png 5
test_image_11.png 4
test_image_12.png 5
test_image_13.png 5
test_image_14.png 4
test_image_15.png 4
test_image_16.png 5
test_image_17.png 5
test_image_18.png 5
test_image_19.png 4
test_image_2.png 5
test_image_20.png 4
test_image_3.png 4
test_image_4.png 5
test_image_5.png 5
test_image_6.png 4
test_image_7.png 5
test_image_8.png 5
test_image_9.png 4
Number of objects: 4


In [5]:
matcher, outlier_method, test_set, normalised, acc, prec, rec, f1 = [], [], [], [], [], [], [], []

In [52]:
# FLANN with RANSAC 

sift = SIFT(train_folder=task_2_train, 
            test_folder=task_3_test, 
            test_annotation_folder=task_3_test_annotations, 
            matcher=cv2.FlannBasedMatcher_create())

sift.get_annotations()
sift.get_sift_train_features()
sift.get_sift_test_features()
sift.eval(normalise=True)
# acc.append(accuracy), prec.append(precision), rec.append(recall), f1.append(f1_score), matcher.append("Brute Force"), outlier_method.append("RANSAC"), test_set.append(task_2_test), normalised.append(True)

----------------------------------------------------
Correct labels: ['barn', 'hospital', 'university', 'hotel']
Our labels: {'028-government': 0.75, '008-courthouse': 0.5142857142857142, '011-trash': 0.4861111111111111, '027-gas-station': 0.48}
test_image_1: 0/4
----------------------------------------------------
----------------------------------------------------
Correct labels: ['post-office', 'police', 'fire-station', 'lighthouse']
Our labels: {'001-lighthouse': 0.5348837209302325, '028-government': 0.5, '015-barn': 0.47619047619047616, '046-fire-station': 0.4444444444444444}
test_image_10: 2/4
----------------------------------------------------
----------------------------------------------------
Correct labels: ['supermarket', 'bus-stop', 'church', 'bus']
Our labels: {'011-trash': 0.5555555555555556, '006-church': 0.5, '008-courthouse': 0.42857142857142855, '028-government': 0.4166666666666667}
test_image_11: 1/4
----------------------------------------------------
-----------

(0.864, 0.20930232558139536, 0.07439824945295405)

In [55]:
#bf with lowes
sift = SIFT(train_folder=task_2_train, 
            test_folder=task_2_test, 
            test_annotation_folder=task_2_test_annotations, 
            matcher=cv2.BFMatcher_create())

sift.get_annotations()
sift.get_sift_train_features()
sift.get_sift_test_features()
sift.eval(normalise=True, lowes=False)
# acc.append(accuracy), prec.append(precision), rec.append(recall), f1.append(f1_score), matcher.append("Brute Force"), outlier_method.append("lowes"), test_set.append(task_2_test), normalised.append(True)

----------------------------------------------------
Correct labels: ['gas-station', 'trash', 'theater', 'house']
Our labels: {'011-trash': 0.7222222222222222, '008-courthouse': 0.4857142857142857, '027-gas-station': 0.48, '045-museum': 0.46875}
test_image_1: 2/4
----------------------------------------------------
----------------------------------------------------
Correct labels: ['factory', 'hotel', 'university', 'bank', 'cinema']
Our labels: {'028-government': 0.5, '039-university': 0.5, '036-hotel': 0.43636363636363634, '010-bench': 0.4230769230769231, '011-trash': 0.4166666666666667}
test_image_10: 2/5
----------------------------------------------------
----------------------------------------------------
Correct labels: ['supermarket', 'post-office', 'bridge', 'van']
Our labels: {'011-trash': 0.8194444444444444, '028-government': 0.5, '045-museum': 0.5, '015-barn': 0.4880952380952381}
test_image_11: 0/4
----------------------------------------------------
---------------------

KeyboardInterrupt: 

In [None]:
print(precision)

0.8791208791208791


In [54]:
#bf with lowes
ac=[]
for parameter in np.arange(0.0, 0.5, 0.01):
    sift = SIFT(train_folder=task_2_train, 
                test_folder=task_2_test, 
                test_annotation_folder=task_2_test_annotations, 
                matcher=cv2.BFMatcher_create(),
                e_thresh=parameter)

    sift.get_annotations()
    sift.get_sift_train_features()
    sift.get_sift_test_features()
    sift.eval(normalise=True, lowes=False)
    # ac.append(accuracy), prec.append(precision), rec.append(recall), f1.append(f1_score), matcher.append("Brute Force"), outlier_method.append("lowes"), test_set.append(task_2_test), normalised.append(True)

----------------------------------------------------
Correct labels: ['gas-station', 'trash', 'theater', 'house']
Our labels: {'031-field': 0.47368421052631576, '041-windmill': 0.4552683896620278, '050-cemetery': 0.4444444444444444, '027-gas-station': 0.42105263157894735}
test_image_1: 1/4
----------------------------------------------------
----------------------------------------------------
Correct labels: ['factory', 'hotel', 'university', 'bank', 'cinema']
Our labels: {'011-trash': 0.5952380952380952, '041-windmill': 0.5765407554671969, '006-church': 0.5348837209302325, '036-hotel': 0.4642857142857143, '039-university': 0.44}
test_image_10: 2/5
----------------------------------------------------
----------------------------------------------------
Correct labels: ['supermarket', 'post-office', 'bridge', 'van']
Our labels: {'011-trash': 0.7857142857142857, '006-church': 0.5348837209302325, '032-van': 0.4765625, '022-car': 0.46296296296296297}
test_image_11: 1/4
-------------------

KeyboardInterrupt: 

In [None]:
print(prec)

[0.31868131868131866, 0.08791208791208792, 0.08791208791208792, 0.08791208791208792, 0.0989010989010989, 0.25274725274725274, 0.42857142857142855, 0.5824175824175825, 0.7142857142857143, 0.8351648351648352, 0.8351648351648352, 0.8461538461538461, 0.8791208791208791, 0.8791208791208791, 0.8901098901098901, 0.8901098901098901, 0.9120879120879121, 0.9230769230769231, 0.945054945054945, 0.945054945054945, 0.945054945054945, 0.9560439560439561, 0.9230769230769231, 0.9340659340659341, 0.9230769230769231, 0.8901098901098901, 0.8901098901098901, 0.9010989010989011, 0.8901098901098901, 0.8791208791208791, 0.8791208791208791, 0.8681318681318682, 0.8351648351648352, 0.8351648351648352, 0.8241758241758241, 0.8131868131868132, 0.8021978021978022, 0.7802197802197802, 0.7692307692307693, 0.7472527472527473, 0.7252747252747253, 0.7142857142857143, 0.7472527472527473, 0.6703296703296703, 0.7032967032967034, 0.6703296703296703, 0.6153846153846154, 0.5604395604395604, 0.5164835164835165, 0.48351648351648

In [None]:
#bf with ransac
sift = SIFT(train_folder=task_2_train, 
            test_folder=task_2_test, 
            test_annotation_folder=task_2_test_annotations, 
            matcher=cv2.BFMatcher_create())

sift.get_annotations()
sift.get_sift_train_features()
sift.get_sift_test_features()
sift.eval(normalise=True)

----------------------------------------------------
Correct labels: ['gas-station', 'trash', 'theater', 'house']
Our labels: {'trash': 0.7222222222222222, 'courthouse': 0.4857142857142857, 'gas-station': 0.48, 'museum': 0.46875}
test_image_1: 2/4
----------------------------------------------------
----------------------------------------------------
Correct labels: ['factory', 'hotel', 'university', 'bank', 'cinema']
Our labels: {'government': 0.5, 'university': 0.5, 'hotel': 0.43636363636363634, 'bench': 0.4230769230769231, 'trash': 0.4166666666666667}
test_image_10: 2/5
----------------------------------------------------
----------------------------------------------------
Correct labels: ['supermarket', 'post-office', 'bridge', 'van']
Our labels: {'trash': 0.8194444444444444, 'government': 0.5, 'museum': 0.5, 'barn': 0.4880952380952381}
test_image_11: 0/4
----------------------------------------------------
----------------------------------------------------
Correct labels: ['fl

(0.868, 0.27472527472527475, 0.27472527472527475, 0.27472527472527475)

In [None]:
#flann with lowes
sift = SIFT(train_folder=task_2_train, 
            test_folder=task_2_test, 
            test_annotation_folder=task_2_test_annotations, 
            matcher=cv2.FlannBasedMatcher_create())

sift.get_annotations()
sift.get_sift_train_features()
sift.get_sift_test_features()
sift.eval(normalise=True ,lowes=True)

['gas-station', 'trash', 'theater', 'house']
{'gas-station': 0.48, 'cemetery': 0.4, 'house': 0.32558139534883723, 'museum': 0.28125}
-----------------------------
test_image_1 : 2 / 4
['factory', 'hotel', 'university', 'bank', 'cinema']
{'hotel': 0.38181818181818183, 'bank': 0.38095238095238093, 'university': 0.29545454545454547, 'factory': 0.2641509433962264, 'cinema': 0.171875}
-----------------------------
test_image_10 : 5 / 5
['supermarket', 'post-office', 'bridge', 'van']
{'supermarket': 0.3333333333333333, 'post-office': 0.32558139534883723, 'bridge': 0.2535211267605634, 'van': 0.23404255319148937}
-----------------------------
test_image_11 : 4 / 4
['flower', 'cemetery', 'traffic-light', 'fountain', 'ferris-wheel']
{'trash': 0.375, 'fountain': 0.3333333333333333, 'ferris-wheel': 0.3028169014084507, 'tractor': 0.26582278481012656, 'flower': 0.24390243902439024}
-----------------------------
test_image_12 : 3 / 5
['government', 'telephone-booth', 'car', 'shop', 'cemetery']
{'tele

(0.964, 0.8021978021978022, 0.8021978021978022, 0.8021978021978022)