## SURF (Speeded-Up Robust Features)

### Import resources and display image

In [None]:
!pip3 install opencv-python

In [None]:
!pip install scikit-image

In [None]:
import cv2
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pathlib
from skimage import io
import time

%matplotlib inline

In [None]:
# Load the image
image1 = cv2.imread('data/train_images/0002cc93b.jpg')

# Convert the training image to RGB
training_image = cv2.cvtColor(image1, cv2.COLOR_BGR2RGB)

# Convert the training image to gray scale
training_gray = cv2.cvtColor(training_image, cv2.COLOR_RGB2GRAY)

# Create test image by adding Scale Invariance and Rotational Invariance
test_image = cv2.pyrDown(training_image)
test_image = cv2.pyrDown(test_image)
num_rows, num_cols = test_image.shape[:2]

rotation_matrix = cv2.getRotationMatrix2D((num_cols/2, num_rows/2), 30, 1)
test_image = cv2.warpAffine(test_image, rotation_matrix, (num_cols, num_rows))

test_gray = cv2.cvtColor(test_image, cv2.COLOR_RGB2GRAY)

# Display traning image and testing image
fx, plots = plt.subplots(2, 1, figsize=(20,10))

plots[0].set_title("Training Image")
plots[0].imshow(training_image)

plots[1].set_title("Testing Image")
plots[1].imshow(test_image)

### Detect keypoints and Create Descriptor

In [None]:
surf = cv2.xfeatures2d.SURF_create(400)

train_keypoints, train_descriptor = surf.detectAndCompute(training_gray, None)
test_keypoints, test_descriptor = surf.detectAndCompute(test_gray, None)

keypoints_without_size = np.copy(training_image)
keypoints_with_size = np.copy(training_image)

cv2.drawKeypoints(training_image, train_keypoints, keypoints_without_size, color = (0, 255, 0))

cv2.drawKeypoints(training_image, train_keypoints, keypoints_with_size, flags = cv2.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS)

# Display image with and without keypoints size
fx, plots = plt.subplots(2, 1, figsize=(20,10))

plots[0].set_title("Train keypoints With Size")
plots[0].imshow(keypoints_with_size, cmap='gray')

plots[1].set_title("Train keypoints Without Size")
plots[1].imshow(keypoints_without_size, cmap='gray')

# Print the number of keypoints detected in the training image
print("Number of Keypoints Detected In The Training Image: ", len(train_keypoints))

# Print the number of keypoints detected in the query image
print("Number of Keypoints Detected In The Query Image: ", len(test_keypoints))

### Matching Keypoints

In [None]:
# Create a Brute Force Matcher object.
bf = cv2.BFMatcher(cv2.NORM_L1, crossCheck = False)

# Perform the matching between the SURF descriptors of the training image and the test image
matches = bf.match(train_descriptor, test_descriptor)

# The matches with shorter distance are the ones we want.
matches = sorted(matches, key = lambda x : x.distance)

result = cv2.drawMatches(training_image, train_keypoints, test_gray, test_keypoints, matches, test_gray, flags = 2)

# Display the best matching points
plt.rcParams['figure.figsize'] = [14.0, 7.0]
plt.title('Best Matching Points')
plt.imshow(result)
plt.show()

# Print total number of matching points between the training and query images
print("\nNumber of Matching Keypoints Between The Training and Query Images: ", len(matches))

In [None]:
training_gray

In [None]:
img = cv2.imread('data/train_images/0002cc93b.jpg',0)
# Create SURF object. You can specify params here or later.
# Here I set Hessian Threshold to 400
surf = cv2.xfeatures2d.SURF_create(400)
# Find keypoints and descriptors directly
kp, des = surf.detectAndCompute(img,None)
len(kp)

In [None]:
# Check present Hessian threshold
print( surf.getHessianThreshold() )

In [None]:
# We set it to some 50000. Remember, it is just for representing in picture.
# In actual cases, it is better to have a value 300-500
surf.setHessianThreshold(300)
# Again compute keypoints and check its number.
kp, des = surf.detectAndCompute(img,None)
print( len(kp) )


In [None]:
img2 = cv2.drawKeypoints(img,kp,None,(255,0,0),4)
plt.imshow(img2),plt.show()

In [None]:
# Find size of descriptor
print( surf.descriptorSize() )

In [None]:
# That means flag, "extended" is False.
surf.getExtended()

In [None]:
# So we make it to True to get 128-dim descriptors.
surf.setExtended(True)
kp, des = surf.detectAndCompute(img,None)
print( surf.descriptorSize() )

In [None]:
print( des.shape )

In [None]:
img3 = cv2.drawKeypoints(img,kp,None,(255,0,0),4)
plt.imshow(img3),plt.show()

---

### SURF-Attributes

In [None]:
print('Keypoint at:', kp[0].pt)
print('Keypoint diameter:', kp[0].size)
print('Direction of gradient:', kp[0].angle)

In [None]:
len(kp)

---

### Apply SURF to all images

In [None]:
"""Processing time: ~3 minutes and 40 seconds.
"""

# get current working directory
cwd = pathlib.Path.cwd()
train_data_dir = cwd.joinpath('data', 'train_images')
train_images = list(train_data_dir.glob('*.jpg'))

# Create SURF object. You can specify params here or later.
# Here I set Hessian Threshold to 400
surf = cv2.xfeatures2d.SURF_create(400)

# prepare dictionary to gather data
surf_images = {'keypoints': [],
               'ImageId': [],
               'NumberKP': []
              }

print('processing images...')
start = time.time()

for idx, image in enumerate(train_images):
    surf_images['ImageId'].append(image.name)
    
    # `image` so far holds just the path to the image. Convert to image file
    image = io.imread("data/train_images/"+image.name)
    # Find keypoints and descriptors directly
    kp, des = surf.detectAndCompute(image, None)
    
    surf_images['keypoints'].append(kp)
    surf_images['NumberKP'].append(len(kp))
    if idx % 500 == 0 and idx != 0:
        print(f'image number {idx} processed...')

end = time.time()
print('processing done.')
print('required time:', end - start)

In [None]:
temp = pd.DataFrame.from_dict(surf_images)
temp.head()

In [None]:
print(f"We have {temp.query('NumberKP < 50').count()[0]} keypoint vectors with less than 50 keypoints")

In [None]:
temp.sort_values(by='NumberKP', ascending=False)

Adjust data frame and eliminate images that have more than 1 defect.

In [None]:
df_complete = pd.read_csv('data/train_complete.csv')
# Count occurrences of `ImageId` in df
df_complete['count'] = df_complete.ImageId.apply(lambda x: df_complete['ImageId'].value_counts()[x])

In [None]:
# isolate `ImageIds` for images with defect
single_defects = df_complete.query('count > 1').ImageId.to_numpy()

# get indices of `df_raw` for row dropping
indices = []
for idx, row in temp.iterrows():
    if row.ImageId in single_defects:
        indices.append(idx)
        
temp.drop(indices, inplace=True)

---

### Prepare data frame with (max) TOP50 Keypoints per image

In [None]:
def rank_keypoint(kp_dict, ranking_number):
    """Returns a keypoint at position `ranking_number` from a given dictionary of keypoints (`kp_dict`).
    
    Input variables:
    kp_dict        - dictionary with entries `size` (of keypoint) and `keypoint` (object)
    ranking_number - position of ranked keypoint (by size)
    """
    df = pd.DataFrame.from_dict(kp_dict)
    df = df.sort_values(by='size', ascending=False)

    # get number of kp in the dictionary to create ranking
    keypoints_in_dict = len(kp_dict['keypoint'])
    
    # create array with numbers from 1 to `keypoints_in_dict`
    ranking_numbers = np.linspace(1, keypoints_in_dict, keypoints_in_dict)
    # turn it into a data frame column
    ranking = pd.DataFrame(ranking_numbers, columns=['Ranking'])
    
    # merge with sorted df after index reset
    df.reset_index(drop=True, inplace=True)
    ranking.reset_index(drop=True, inplace=True)
    df_with_ranking = pd.concat([df, ranking], axis=1, ignore_index=False)

    # get entry at `ranking_number`
    df_ranked = df_with_ranking.query('Ranking == @ranking_number')

    return df_ranked.keypoint


In [None]:
def build_keypoint_dict(keypoints):
    """Turns a list of keypoint objects into a dictionary with entries `size` and `keypoint`.
    """
    kp_dict = {'size': [],
               'keypoint': []
              }

    for kp in keypoints:
        kp_dict['size'].append(kp.size)
        kp_dict['keypoint'].append(kp)
        
    return kp_dict

In [None]:
def get_ranked_keypoint(keypoints, ranking_number):
    """Returns a keypoint at position `ranking_number` and 0 if no keypoint exists for the ranking position.
    
    Input variables:
    keypoints      - list of keypoint objects from SURF algorithm
    ranking_number - position of ranked keypoint (by size)
    """
    # convert keypoints into dictionary with added size
    kp_dict = build_keypoint_dict(keypoints)
    
    # check, whether there is a keypoint for the required `ranking_number`
    if len(kp_dict['keypoint']) >= ranking_number:
        ranked_keypoint = rank_keypoint(kp_dict, ranking_number)
        
        # the keypoint needs to be extracted by its index
        return ranked_keypoint[ranking_number - 1]
    else:
        return 0

In [None]:
def get_keypoint_x(keypoints, ranking_number):
    """Function to calculate the x-coordinate of a keypoint at rank `ranking_number`.
    Returns 0 if no keypoint available for `ranking_number`.
    
    Input variables:
    keypoints      - list of keypoint objects from SURF algorithm
    ranking_number - position of ranked keypoint (by size)
    """
    if len(keypoints) != 0:
        ranked_keypoint = get_ranked_keypoint(keypoints, ranking_number)
        # validate that the keypoint is not empty
        if ranked_keypoint:
            return ranked_keypoint.pt[0]
        else:
            return 0
    else:
        return 0

In [None]:
def get_keypoint_y(keypoints, ranking_number):
    """Function to calculate the y-coordinate of a keypoint at rank `ranking_number`.
    Returns 0 if no keypoint available for `ranking_number`.
    
    Input variables:
    keypoints      - list of keypoint objects from SURF algorithm
    ranking_number - position of ranked keypoint (by size)
    """
    if len(keypoints) != 0:
        ranked_keypoint = get_ranked_keypoint(keypoints, ranking_number)
        # validate that the keypoint is not empty
        if ranked_keypoint:
            return ranked_keypoint.pt[1]
        else:
            return 0
    else:
        return 0

In [None]:
def get_keypoint_size(keypoints, ranking_number):
    """Function to calculate the size of a keypoint at rank `ranking_number`.
    Returns 0 if no keypoint available for `ranking_number`.
    
    Input variables:
    keypoints      - list of keypoint objects from SURF algorithm
    ranking_number - position of ranked keypoint (by size)
    """
    if len(keypoints) != 0:
        ranked_keypoint = get_ranked_keypoint(keypoints, ranking_number)
        # validate that the keypoint is not empty
        if ranked_keypoint:
            return ranked_keypoint.size
        else:
            return 0
    else:
        return 0

In [None]:
def add_keypoint_parameters(df, max_rank=50):
    """Adds columns to df for keypoint parameters `x`, `y`, and `size` up to `max_rank`.
    Run time for 50 keypoints: ~ 35 minutes
    """
    total_time = 0

    for i in range(1,max_rank + 1):
        # initialize temporary parameters
        name_x = 'kp_x_' + str(i)
        name_y = 'kp_y_' + str(i)
        name_size = 'kp_size_' + str(i)
        x = pd.DataFrame(columns = [name_x])
        y = pd.DataFrame(columns = [name_y])
        s = pd.DataFrame(columns = [name_size])
        
        print('processing step ...', i)
        start = time.time()

        x[name_x] = df.keypoints.apply(lambda x: get_keypoint_x(x, i))
        y[name_y] = df.keypoints.apply(lambda x: get_keypoint_y(x, i))
        s[name_size] = df.keypoints.apply(lambda x: get_keypoint_size(x, i))
        
        # piece everything together before the next run
        df = pd.concat([df, x, y, s], axis=1, ignore_index=False)

        end = time.time()
        total_time += (end - start)

        print('processing time:', end-start)
    print('total processing time was:', total_time)
    print('average processing time per rank:', total_time / max_rank)
  
    return df

In [None]:
# apply functions to data frame
temp = add_keypoint_parameters(temp)

---

### Save data frame for further processing

In [None]:
temp.to_csv('data/train_surf.csv', sep=',', index=False)

---

### Trials

In [None]:
x = pd.DataFrame(columns = ['x'])
y = pd.DataFrame(columns = ['y'])
s = pd.DataFrame(columns = ['s'])

total_time = 0

start = time.time()
x.x = temp.keypoints.apply(lambda x: get_keypoint_x(x, 1))
end = time.time()
print('processing time x:', end-start)
total_time += end - start

start = time.time()
y.y = temp.keypoints.apply(lambda x: get_keypoint_y(x, 1))
end = time.time()
print('processing time y:', end-start)
total_time += end - start

start = time.time()
s.s = temp.keypoints.apply(lambda x: get_keypoint_size(x, 1))
end = time.time()
print('processing time s:', end-start)
total_time += end - start

start = time.time()
temp = pd.concat([temp, x, y, s], axis=1, ignore_index=False)

end = time.time()
print('processing time concat:', end-start)
total_time += end - start
print('total processing time was:', total_time)
