###### Disclaimer
This notebook contains experiments not included in the thesis.

In [None]:
import cv2
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline

In [None]:
import os
import PIL
import PIL.Image
import pathlib
import math
import copy

## Preprocessing

In [None]:
 def downscale_images(images, scale=30):
    """
    Downscales images in an array.
    Args:
        images: array of images to be downscaled 
        scale: 

    Returns: array of downscaled down images

    """
    resized = []
    
    for img in images:
        
        width = int(img.shape[1] * scale / 100)
        height = int(img.shape[0] * scale / 100)
        dim = (width, height)

        resized.append(cv2.resize(img, dim, interpolation = cv2.INTER_AREA))
    
    return resized


In [None]:
def load_and_preprocess(path):
    
    
    # directory with the images
    data_dir = pathlib.Path(path)
    
    
    # how many images are in the directory
    image_count = len(list(data_dir.glob('*')))
    print(f"Number of images found in {path}: {image_count}")


    paintings_path = list(data_dir.glob('*'))
    paintings_path = [str(path) for path in paintings_path]
    
    # sort by img number
    paintings_path = sorted(paintings_path, key=lambda path: int(path.split('_')[-1].split('.')[0]))
    
    
    # loading images, in BGR
    paintings = [cv2.imread(painting_path) for painting_path in paintings_path]
    paintings = downscale_images(paintings)
    
    
    # converting to gray
    paintings_gray = [cv2.cvtColor(painting, cv2.COLOR_BGR2GRAY) for painting in paintings]
    
    # converting to RGB for later visualisation
    paintings_rgb = [cv2.cvtColor(painting, cv2.COLOR_BGR2RGB) for painting in paintings]
    
    painting_tuple = (paintings_gray, paintings_rgb)
    
    return painting_tuple


## Loading stolen paintings

In [None]:
stolen_art_dir_path = "../datasets/Tate-500-oil-canvas/"
stolen_paintings = load_and_preprocess(stolen_art_dir_path)


## Loading found paintings
Paintings which I want to test against the stolen images.

In [None]:
found_paintings_dir_path = "../datasets/Tate-500-oil-canvas/" 
found_paintings = load_and_preprocess(found_paintings_dir_path)

In [None]:
def display_compared(img1, img2):
    # Display traning image and testing image
    fx, plots = plt.subplots(1, 2, figsize=(20,10))

    plots[0].set_title("Stolen Image")
    plots[0].imshow(img1)

    plots[1].set_title("Comparing to")
    plots[1].imshow(img2)

## Scaling functions for visualisation

Functions mainly for downscaling the paintings/keypoints before visualisation. Greatly reduces the size of the notebook.

In [None]:
def features_deepcopy (f):
    return [cv2.KeyPoint(x = k.pt[0], y = k.pt[1], 
            _size = k.size, _angle = k.angle, 
            _response = k.response, _octave = k.octave, 
            _class_id = k.class_id) for k in f]

In [None]:
def scale_keypoints(keypoints, scale):
    
    for keypoint in keypoints:
        new_x = math.trunc(keypoint.pt[0]*(scale/100) )
        new_y = math.trunc(keypoint.pt[1]*(scale/100) )
        keypoint_scaled = (new_x, new_y) 
        keypoint.pt = keypoint_scaled
    

In [None]:
def draw_matches_scaled(found_painting, found_painting_keypoints,
                        stolen_painting, stolen_painting_keypoints,
                        matches, matchesMask, draw_params, scale = 20):
    
    # scale the paintings
    found_painting = downscale_images([found_painting], scale)[0]
    stolen_painting = downscale_images([stolen_painting], scale)[0]
    
    found_painting_keypoints_scaled = features_deepcopy(found_painting_keypoints)
    scale_keypoints(found_painting_keypoints_scaled, scale)
    
    stolen_painting_keypoints_scaled = features_deepcopy(stolen_painting_keypoints)
    scale_keypoints(stolen_painting_keypoints_scaled, scale)

    result = cv2.drawMatchesKnn(found_painting, found_painting_keypoints_scaled,
                                stolen_painting, stolen_painting_keypoints_scaled,
                                matches, None, **draw_params)
    
    return result

    

# ORB

In [None]:
# Creating an ORB object
orb = cv2.ORB_create(nfeatures=500)

In [None]:
def visualize_keypoints(image, keypoints, scale = 100):
        
    keypoints_without_size = np.copy(image)
    keypoints_with_size = np.copy(image)

    keypoints_without_size = downscale_images([keypoints_without_size], scale)[0]
    keypoints_with_size = downscale_images([keypoints_with_size], scale)[0]
    
    keypoints_scaled = features_deepcopy(keypoints)
    scale_keypoints(keypoints_scaled, scale)
    
    cv2.drawKeypoints(image, keypoints_scaled, keypoints_without_size, color = (0, 255, 0))

    cv2.drawKeypoints(image, keypoints_scaled, keypoints_with_size, flags = cv2.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS)

    
    # Display image with and without keypoints size
    fx, plots = plt.subplots(1, 2, figsize=(20,10))

    plots[0].set_title("Train keypoints With Size")
    plots[0].imshow(keypoints_with_size)

    plots[1].set_title("Train keypoints Without Size")
    plots[1].imshow(keypoints_without_size)
    
    print("Number of Keypoints Detected In The Stolen Image: ", len(keypoints))

## Running the algorithm

Find keypoints on every image in the database. Display the keypoints with their size (left) and without (right).

In [None]:
orb_results = []

for index, _ in enumerate(stolen_paintings[0]):

    orb_res = orb.detectAndCompute(stolen_paintings[0][index], None)
    orb_results.append(orb_res)

    sp_keypoints, sp_descriptors = orb_res

# Searching for a painting in the database

In [None]:
def search_for_painting(painting_index, draw_match = True):
    
    found_painting = found_paintings[0][painting_index]
    found_painting_rgb = found_paintings[1][painting_index]
    found_painting_keypoints, found_painting_descriptor = orb.detectAndCompute(found_painting, None)

    # search for nearest ORB painting
    best_match_index, matches, mask = get_best_match(found_painting_descriptor)
    
    
    # check histogram correlation
    hist_corr = 0
    if best_match_index is not None:
        hist_corr = compare_hists(found_paintings_hists[painting_index], stolen_paintings_hists[best_match_index])
    
    
    # indexes of paintings in DB which will not be matched with current painting again
    excluded_indexes = []
    tried = 0
    while hist_corr <= 0.02 and tried < 10 and False:
        previous = best_match_index
        previous_hc = hist_corr
        
        if best_match_index == painting_index:
            display_compared(found_paintings[1][best_match_index], stolen_paintings[1][painting_index])
        
        excluded_indexes.append(best_match_index)
        best_match_index, matches, mask = get_best_match(found_painting_descriptor, exclude=excluded_indexes)
        
        if best_match_index is not None:
            hist_corr = compare_hists(found_paintings_hists[index], stolen_paintings_hists[best_match_index])
        
        print(f"Previous match for {painting_index}: {previous}, hist_corr: {hist_corr}.\
        New match: {best_match_index} with hist_corr: {hist_corr}")
        
        tried = tried + 1
    
    
    if best_match_index < 0 and draw_match:
        print(f"Sorry, no match found for painting: {painting_index}")
        plt.imshow(found_paintings[1][painting_index])
        return
    
    if draw_match:
        draw_matches(best_match_index, found_painting_rgb, found_painting_keypoints, matches, mask)
    
    return best_match_index
    

## Techniques to improve accuracy

### Histogram comparison

In [None]:
from scipy.spatial import distance as dist

def calculate_histograms(images, bin_size = 32):
    
    hists = [cv2.calcHist([image], [0, 1, 2], None, [bin_size, bin_size, bin_size], [0, 256, 0, 256, 0, 256])
             for image in images]
    
    hists = [cv2.normalize(hist, hist).flatten() for hist in hists]
    
    return hists

In [None]:
found_paintings_hists = calculate_histograms(found_paintings[1])
stolen_paintings_hists = calculate_histograms(stolen_paintings[1])

In [None]:
def compare_hists(hist1, hist2):
    
    corr = cv2.compareHist(hist1, hist2, cv2.HISTCMP_CORREL)
    
    return corr

## FLANN Based Matcher

In [None]:
FLANN_INDEX_LSH = 6
index_params= dict(algorithm = FLANN_INDEX_LSH,
                   table_number = 6, # 12
                   key_size = 12,     # 20
                   multi_probe_level = 1) #2

In [None]:
search_params = dict(checks=50)   # or pass empty dictionary

In [None]:
def get_best_match(found_painting_descriptor,exclude=[], k = 2, min_matches = 10):

    matches_count = {}
    matches_count[-1] = 0
    
    matches = {}
    matches[-1] = None
    
    matches_masks = {}
    matches_masks[-1] = None
    
    
    for index, stolen_res in enumerate(orb_results):
        stolen_keypoints, stolen_descriptor = stolen_res

        if index in exclude:
            continue
        
        # if there is not enough features, continue
        if found_painting_descriptor is None or len(found_painting_descriptor) <= k or stolen_descriptor is None or len(stolen_descriptor) <= k:
            best_match_index = -1
            continue
        
        
        # Cross check parameter
        flann = cv2.FlannBasedMatcher(index_params, search_params)

        # Perform the matching between the ORB descriptors of the training image and the test image
        matches[index] = flann.knnMatch(stolen_descriptor, found_painting_descriptor, k)

        ok_matches_num = 0

        # Need to draw only good matches, so create a mask
        matches_masks[index] = [[0,0] for i in range(len(matches[index]))]

        # ratio test as per Lowe's paper
        for i, candidates in enumerate(matches[index]):
            if (len(candidates)<2):
                continue # not enough features for comparison
                
            m, n = candidates    
            if m.distance < 0.7*n.distance:
                matches_masks[index][i]=[1,0]
                ok_matches_num = ok_matches_num + 1 
        
        matches_count[index] = ok_matches_num
            
        
        # print("Amount of good matches: ", ok_matches_num)
        
    best_match_index = max(matches_count, key=matches_count.get)
    best_match_index = best_match_index if matches_count[best_match_index] >= min_matches else -1
        
    return best_match_index, matches[best_match_index], matches_masks[best_match_index] 

In [None]:
def draw_matches(stolen_index, found_painting, found_painting_keypoints, matches, matchesMask):
    
    draw_params = dict(matchColor = (0,255,0),
                       singlePointColor = (255,0,0),
                       matchesMask = matchesMask,
                       flags = cv2.DrawMatchesFlags_DEFAULT)

    
    
    result = draw_matches_scaled(found_painting, found_painting_keypoints,
                                 stolen_paintings[1][stolen_index], orb_results[stolen_index][0],
                                 matches, matchesMask, draw_params, 100)
    
    # Display the best matching points
    plt.rcParams['figure.figsize'] = [28.0, 14.0]
    plt.title('Painting found! Best match: ')
    plt.imshow(result)
    plt.show()

# Testing the algorithm on all the paintings

In [None]:
total_searched = len(found_paintings[0])
not_found = {}
found_ok = {}
found_wrong = {}

histograms_good_match = []
histograms_bad_match = []

for index in range(0, len(found_paintings[1])):
    
    
    best_match_index = search_for_painting(index, draw_match = False)    
    
    if index % 10:
        print(f"Best match for {index} is {best_match_index}")

    hist_corr = compare_hists(found_paintings_hists[index], stolen_paintings_hists[best_match_index])
    if index == best_match_index:
        histograms_good_match.append(hist_corr)
    else:
        histograms_bad_match.append(hist_corr)
    
    
    if index is None or index == -1:
        not_found[index] = best_match_index
    elif index == best_match_index:
        found_ok[index] = best_match_index
    elif index != best_match_index:
        found_wrong[index] = best_match_index



In [None]:
print(len(found_ok))
print(len(not_found))
print(len(found_wrong))
accuracy = len(found_ok) / (len(found_ok) + len(not_found) + len(found_wrong))
print(f"Accuracy: {accuracy*100} %")

In [None]:
len(orb_results[1])

#### Histograms vs ORB analysis
How correlated were the histograms depending on whether ORB matched them correctly or not?

In [None]:
import pandas as pd
import numpy as np


#print(hist_good_match)
hists_g = pd.DataFrame(histograms_good_match).rename(columns={0:"Good"})
print(hists_g.describe())

hists_b = pd.DataFrame(histograms_bad_match).rename(columns={0:"Bad"})
print(hists_b.describe())

### Visualizing results

In [None]:
def visualize_image_tuples(index_dict, limit=20):
    
    cnt = 0
    for index_searched, index_found in index_dict.items():
        
        fx, plots = plt.subplots(1, 2, figsize=(20,10))
    
        plots[0].set_title("Searched image:")
        plots[0].imshow(found_paintings[1][index_searched])

        plots[1].set_title("Wrongly matched with:")
        plots[1].imshow(stolen_paintings[1][index_found])
        cnt = cnt + 1
        if cnt > limit:
            return
        


In [None]:
#visualize_image_tuples(found_wrong)