# Image recognition 101

## Working with images

In [None]:
import cv2
import numpy as np
import os
from matplotlib import pyplot as plt

In [None]:
INPUT_BASE_PATH = os.path.join('media', 'input')
GREG_IMAGE_PATH = os.path.join(INPUT_BASE_PATH, 'greg.png')
SUDOKU_IMAGE_PATH = os.path.join(INPUT_BASE_PATH, 'sudoku.jpg')
MEMESFUNNY_IMAGE_PATH = os.path.join(INPUT_BASE_PATH, 'memesfunny.png')
LOGO_IMAGE_PATH = os.path.join(INPUT_BASE_PATH, 'logo.png')
LOGO_MULTIPLE_SCREENSHOT_IMAGE_PATH = os.path.join(INPUT_BASE_PATH, 'logo_multiple_screenshot.png')

In [None]:
def display_image(image):
    %matplotlib notebook
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    plt.rcParams['figure.dpi'] = 150
    plt.axis('off')
    plt.imshow(image)
    
    
def display_greyscale_image(image):
    %matplotlib notebook
    plt.rcParams['figure.dpi'] = 150
    plt.axis('off')
    plt.imshow(image, cmap='gray', vmin=0, vmax=255)

In [None]:
color_image = cv2.imread(GREG_IMAGE_PATH, cv2.IMREAD_COLOR)
color_image.shape
display_image(color_image)

In [None]:
color_image[0][0]

#### Exercise 1

OpenCV constants for the exercise:
+ IMREAD_COLOR
+ IMREAD_GRAYSCALE
+ IMREAD_UNCHANGED
+ IMREAD_REDUCED_GRAYSCALE_2
+ IMREAD_REDUCED_COLOR_2
+ IMREAD_REDUCED_GRAYSCALE_4
+ IMREAD_REDUCED_COLOR_4
+ IMREAD_REDUCED_GRAYSCALE_8
+ IMREAD_REDUCED_COLOR_8

Try to read greg image with different input flags listed above.
Take a look at the results using next cell.
Don't forget to check what is the shape of your newly read image and what is the value for specific pixel

In [None]:
# your code goes here
display_image(input_image)

### Blurring

In [None]:
image = cv2.imread(GREG_IMAGE_PATH, cv2.IMREAD_COLOR)
blurred_image_10 = cv2.blur(image, (10, 10))
display_image(blurred_image_10)

#### Exercise 2
Try out different kernel values for `blur` function.

### Edge detection

In [None]:
image = cv2.imread(GREG_IMAGE_PATH, cv2.IMREAD_GRAYSCALE)
laplacian = cv2.Laplacian(image, cv2.CV_64F)
enhanced_laplacian = laplacian.copy()
enhanced_laplacian[enhanced_laplacian > 10] = 255
display_greyscale_image(enhanced_laplacian)

#### Exercise 3
In the next cell read image of the sudoku puzzle. Try to detect edges using Laplacian method.

Compare it with Sobel operator using following function:

`cv2.Sobel(input_image, cv2.CV_64F, 1, 0, ksize=ksize_parameter)`

Play around with `ksize` parameter for better results.

**Remember that `ksize` parameter can only be odd number from range -1 to 31**

In [None]:
sudoku_image = cv2.imread(SUDOKU_IMAGE_PATH, cv2.IMREAD_GRAYSCALE)
# your code goes here
display_greyscale_image(edge_detection_image)

## Feature detection

### Keypoints detection using SIFT algorithm

In [None]:
image = cv2.imread(GREG_IMAGE_PATH, cv2.IMREAD_COLOR)
sift = cv2.xfeatures2d.SIFT_create()
keypoints = sift.detect(image, None)
image_with_keypoints = cv2.drawKeypoints(image, keypoints, outImage=np.array([]), flags=cv2.DRAW_MATCHES_FLAGS_DEFAULT)
display_image(image_with_keypoints)

### Keypoints detection using ORB algorithm

In [None]:
orb = cv2.ORB_create()
keypoints = orb.detect(image, None)
image_with_keypoints = cv2.drawKeypoints(image, keypoints, outImage=np.array([]), flags=cv2.DRAW_MATCHES_FLAGS_DEFAULT)
display_image(image_with_keypoints)

#### Exercise 4

Flags for drawKeypoints:
+ DRAW_MATCHES_FLAGS_DEFAULT *only center point*
+ DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS *center point with keypoint size and orientation*
+ DRAW_MATCHES_FLAGS_NOT_DRAW_SINGLE_POINTS *single keypoints will not be drawn*

Observe changes in result image using different flags for drawKeypoints function.

Try to analyze what are the characteristic features of keypoints that were selected by algorithm.

In [None]:
# your code goes here
display_image(image_with_keypoints_with_different_flags)

## Feature matching

In [None]:
greg_image = cv2.imread(GREG_IMAGE_PATH)
memesfunny_image = cv2.imread(MEMESFUNNY_IMAGE_PATH)

sift = cv2.xfeatures2d.SIFT_create()
greg_keypoints, greg_descriptors = sift.detectAndCompute(greg_image, None)
memesfunny_keypoints, memesfunny_descriptors = sift.detectAndCompute(memesfunny_image, None)

#### Exercise 5
Display keypoints found for memesfunny image using `drawKeypoints` function.

In [None]:
# your code goes here
display_image(memesfunny_image_with_keypoints)

### Match

In [None]:
bf = cv2.BFMatcher()
matches = bf.match(greg_descriptors, memesfunny_descriptors)

first_match = matches[0]

print(first_match.distance)
print(first_match.queryIdx)
print(first_match.trainIdx)

### Displaying matches

In [None]:
matches = sorted(matches, key = lambda x:x.distance)
matches_image = cv2.drawMatches(greg_image, greg_keypoints, memesfunny_image, memesfunny_keypoints, matches[:10], outImg=np.array([]), flags=2)
display_image(matches_image)

### Knn matches

In [None]:
matches = bf.knnMatch(greg_descriptors, memesfunny_descriptors, k=2)
match, nearest_neighbour_match = matches[0]

#### Exercise 6
Display distance attribute for both `match` and `nearest_neighbour_match`.

Compare those values. What do they tell us about specific match?

In [None]:
# your code goes here

### Lowe's ratio

In [None]:
lowe_ratio = 0.75

good_matches = []
for best_match, second_best_match in matches:
    if best_match.distance < lowe_ratio * second_best_match.distance:
        good_matches.append(best_match)
        
knn_matches_image = cv2.drawMatchesKnn(greg_image, greg_keypoints, memesfunny_image, memesfunny_keypoints, [[match] for match in good_matches], outImg=np.array([]), flags=2)
display_image(knn_matches_image)

#### Exercise 7
Play around with `lowe_ratio` value.

How those changes influence number of found good matches?

## Homography

Change `lowe_ratio` to 0.75 again and rerun previous cell

### Display found object

In [None]:
greg_points = np.float32(
    [greg_keypoints[match.queryIdx].pt for match in good_matches]
).reshape(-1,1,2)
memesfunny_points = np.float32(
    [memesfunny_keypoints[match.trainIdx].pt for match in good_matches]
).reshape(-1,1,2)

transformation_matrix, matches_mask = cv2.findHomography(
    greg_points, memesfunny_points, cv2.RANSAC, 5.0
)
matches_mask = matches_mask.ravel().tolist()


height, width, *_ = greg_image.shape
points = np.float32([
    [0, 0],
    [0, height - 1],
    [width - 1, height - 1],
    [width - 1, 0],
]).reshape(-1, 1, 2)
transformed_points = cv2.perspectiveTransform(
    points, transformation_matrix
)

homography_image = cv2.polylines(
    memesfunny_image,
    [np.int32(transformed_points)],
    True,
    (0, 255, 0), # draw border in green color
    3,
    cv2.LINE_AA,
)
display_image(homography_image)


#### Exercise 8
Change params of polylines function above to display border of matched area in red.

**Remember that colors in OpenCV are represented in *BGR* color space**

### Display matches

In [None]:
draw_params = dict(
    matchColor = (0, 255, 0),
    singlePointColor = None,
    matchesMask = matches_mask,
    flags = 2,
)

homography_image_with_matches = cv2.drawMatches(
    greg_image,
    greg_keypoints,
    homography_image,
    memesfunny_keypoints,
    good_matches,
    None,
    **draw_params,
)
display_image(homography_image_with_matches)

#### Exercise 9
Change value of `matchesMask` parameter to `[number % 10 == 0 for number in range(len(matches_mask))]`.

What changed? How do you think, what is the reason to use mask for matches?

## Flann Based Matcher

In [None]:
FLANN_INDEX_KDTREE = 0
KNN_MATCH_NEAREST_NEIGHBOURS_NUMBER = 2

In [None]:
index_params = dict(algorithm=FLANN_INDEX_KDTREE, trees=5)
search_params = dict(checks=50)

In [None]:
flann = cv2.FlannBasedMatcher(index_params, search_params)

## Multiple Images Matching

In [None]:
from sklearn.cluster import MeanShift, estimate_bandwidth

In [None]:
source_image = cv2.imread(LOGO_IMAGE_PATH)
target_image = cv2.imread(LOGO_MULTIPLE_SCREENSHOT_IMAGE_PATH)
keypoints1, descriptors1 = sift.detectAndCompute(source_image, None)
keypoints2, descriptors2 = sift.detectAndCompute(target_image, None)

In [None]:
keypoints2 = np.array(keypoints2)
keypoints2_coordinates = np.array([keypoint.pt for keypoint in keypoints2])

### Clustering

In [None]:
BANDWIDTH_QUANTILE = 0.1
NUMBER_OF_SAMPLES = 500

bandwidth = estimate_bandwidth(
    keypoints2_coordinates, quantile=BANDWIDTH_QUANTILE, n_samples=NUMBER_OF_SAMPLES
)
mean_shift = MeanShift(bandwidth=bandwidth, bin_seeding=True, cluster_all=True)
mean_shift.fit(keypoints2_coordinates)
mean_shift_labels = mean_shift.labels_
clusters_labels = np.unique(mean_shift_labels)

In [None]:
cluster_matches = []
for cluster_label in clusters_labels:
    cluster_points_indices, = np.where(mean_shift_labels == cluster_label)
    keypoints2_cluster = keypoints2[cluster_points_indices]
    descriptors2_cluster = descriptors2[cluster_points_indices]

    matches = flann.knnMatch(
        descriptors1, descriptors2_cluster, k=KNN_MATCH_NEAREST_NEIGHBOURS_NUMBER
    )
    good_matches = []
    lowe_ratio = 0.75
    for best_match, second_best_match in matches:
        if best_match.distance < lowe_ratio * second_best_match.distance:
            good_matches.append(best_match)

    cluster_matches.append(((keypoints1, keypoints2_cluster), good_matches))

In [None]:
MIN_MATCH_COUNT = 10
output_image = target_image
for (cluster_keypoints1, cluster_keypoints2), matches in cluster_matches:
    if len(matches) >= MIN_MATCH_COUNT: 
        points1 = np.float32(
            [cluster_keypoints1[match.queryIdx].pt for match in matches]
        ).reshape(-1,1,2)
        points2 = np.float32(
            [cluster_keypoints2[match.trainIdx].pt for match in matches]
        ).reshape(-1,1,2)

        transformation_matrix, matches_mask = cv2.findHomography(
            points1, points2, cv2.RANSAC, 5.0
        )
        if transformation_matrix is None:
            continue
        matches_mask = matches_mask.ravel().tolist()

        height, width, *_ = source_image.shape
        points = np.float32([
            [0, 0],
            [0, height - 1],
            [width - 1, height - 1],
            [width - 1, 0],
        ]).reshape(-1, 1, 2)
        transformed_points = cv2.perspectiveTransform(
            points, transformation_matrix
        )

        output_image = cv2.polylines(output_image, [np.int32(transformed_points)], True, 255, 3, cv2.LINE_AA)
display_image(output_image)

#### Exercise 10
Play around with `BANDWIDTH_QUANTILE` constant.

Try to match as many lion logos as possible.