In [None]:
• Geometric Image Transformation
• Affine Transformation
• Perspective Transformation
• Image Alignment
• Object Recognition

In [2]:
import cv2
import numpy as np
import matplotlib.pyplot as plt

In [3]:
# load image
img = cv2.imread('Image/house.jpg')

h, w = img.shape[:2]
tx, ty = w/2, h/2
angle = np.radians(45)
scale = 0.8

# define rotation matrix
R = np.array([
    [np.cos(angle), np.sin(angle), 0],
    [-np.sin(angle), np.cos(angle), 0],
    [0,0,1]
])
# define translation matrix
T = np.array([
    [1, 0, tx],
    [0, 1, ty],
    [0, 0, 1]
])
# define scale matrix
S = np.array([
    [scale, 0, 0],
    [0, scale, 0],
    [0, 0, 1]
])




In [26]:
'''Rotation and scaling usually requires prior translation of
the image center to the origin of the coordinate system
and then back again by inverse translation'''

# compute transformation matrix by multiplying the single transformations
A = T @ R @ S @ np.linalg.inv(T)
# print(A)

# define grid to represent image coordinate using np.indices
dim = (w, h)
print(dim)

coords1 = np.indices(dim) # resulted shape: (2 (x,y), width, height)
print(coords1.shape)
coords = np.indices(dim).reshape(2,-1) # resulted shape: (2,row*column)
print(coords.shape)

# homogeneous coordinates by adding a row with ones after last row
coords = np.vstack((coords, np.ones(coords.shape[1])))# resulted shape: (3,row*column)
print(coords.shape)

# get indices of columns (coords[0]) and rows (coords[1])
coords = coords.astype(np.int)
xcoord, ycoord = coords[0], coords[1]

# apply transformation matrix A
warp_coords = np.round(A@coords).astype(np.int64)
xcoord2, ycoord2  = warp_coords[0,:], warp_coords[1,:]
xcoord, ycoord = coords[0], coords[1] 

# !!! to avoid aliasing and gaps => apply the inverse of A
# warp_coords = np.round(np.linalg.inv(A)@coords).astype(np.int)
# xcoord2, ycoord2 = warp_coords[0,:], warp_coords[1,:]

# get indices only within image boundary
indices = np.where((xcoord2 >= 0) & (xcoord2 < w) & (ycoord2 >= 0) & (ycoord2 < h))

# get pixels within image boundary
xpix2, ypix2 = xcoord2[indices], ycoord2[indices]
xpix, ypix = xcoord[indices], ycoord[indices]


# !!! swap targets of coordinates when apply inverse of A
# xpix2, ypix2 = xcoord[indices], ycoord[indices]
# xpix, ypix = xcoord2[indices], ycoord2[indices]


# map the pixel values to new location in output array
output = np.zeros(img.shape, img.dtype)
output[ypix2, xpix2] = img[ypix, xpix]

# show images
# cv2.imshow("input", img)
cv2.imshow("output", output)

cv2.waitKey(0)
cv2.destroyAllWindows()



(640, 480)
(2, 640, 480)
(2, 307200)
(3, 307200)


Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  coords = coords.astype(np.int)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  warp_coords = np.round(np.linalg.inv(A)@coords).astype(np.int)


In [33]:
# Affine transformation

# load image
img = cv2.imread('Image/house.jpg')
height, width = img.shape[:2]
tx, ty = width/2, height/2
angle = np.radians(45)
scale = 1.25

# composite transformtaion matrix
sin_theta = np.sin(angle)
cos_theta = np.cos(angle)

a_11 = scale * cos_theta
a_21 = -scale * sin_theta
a_12 = scale * sin_theta
a_22 = scale * cos_theta
a_13 = tx * (1 - scale * cos_theta) - scale * sin_theta * ty
a_23 = ty * (1 - scale * cos_theta) + scale * sin_theta * tx
# A = np.array([[a_11, a_12, a_13],[a_21, a_22, a_23]])

# alternative: get transformation matrix by OpenCV
A = cv2.getRotationMatrix2D((tx, ty), np.rad2deg(angle), scale)

# transform image by OpenCV function cv2.warpAffine
output = cv2.warpAffine(img, A, (width, height))


# show images
cv2.imshow("input", img)
cv2.imshow("output", output)


cv2.waitKey(0)
cv2.destroyAllWindows()


In [36]:
# Image alignment

img1 = cv2.imread('Image/book1.jpg')
img2 = cv2.imread('Image/book2.jpg')

# find the keypoints and descriptors using ORB
orb = cv2.ORB_create(nfeatures=2000)
kp1, dsc1 = orb.detectAndCompute(img1, None)
kp2, dsc2 = orb.detectAndCompute(img2, None)

# get matches on the two images
bf = cv2.BFMatcher_create(cv2.NORM_HAMMING)
matches = bf.match(dsc1, dsc2)

# sort matches in the order of their distance
matches = sorted(matches, key=lambda x:x.distance)
good = matches[:30] # we need at least 4 matches to estimate homography

# draw matches
img_matches = cv2.drawMatches(img1,kp1,img2,kp2,good,None,matchColor=(0,255,0),singlePointColor=(255, 0, 255))

# we need at least four matches to find homography between the images

if len(good)> 4:

    # extract location (keypoints) of good matches
    p1, p2 = [], []
    for i, match in enumerate(good):
        p1.append([kp1[match.queryIdx].pt])
        p2.append([kp2[match.trainIdx].pt])
    
    p1 = np.asarray(p1)
    p2 = np.asarray(p2)

    # find homography using RANSAC: it removes outliers (incorrect matches)
    H, status = cv2.findHomography(p2, p1, cv2.RANSAC)

    # Apply homography to warp perspective of the image to be aligned
    h, w = img1.shape[:2]
    output = cv2.warpPerspective(img2, H, (w, h))

    # Show images
    cv2.imshow("img1 (template)", img1)
    cv2.imshow("img2", img2)
    cv2.imshow("matches", img_matches)
    cv2.imshow("aligned image", output)

else:
    print("Error, not enough matches\n")


cv2.waitKey(0)
cv2.destroyAllWindows()


In [55]:
# Object recognition

img1 = cv2.imread('Image/object.jpg')
img2 = cv2.imread('Image/objectScene.jpg')

# find the keypoints and descriptors using ORB
orb = cv2.ORB_create()
kp1, descriptors_1 = orb.detectAndCompute(img1, None)
kp2, descriptors_2 = orb.detectAndCompute(img2, None)


# get matches on the two images
bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True)
bf_matches = bf.match(descriptors_1, descriptors_2)

# sort matches in the order of their distance
bf_matches = sorted(bf_matches, key=lambda x: x.distance)
good = bf_matches[:30]

# draw matches
# img_matches = cv2.drawMatches(img1, kp1, img2, kp2, good, None, matchColor=(255, 255, 0), singlePointColor=(255, 0, 255))

# we need at least four matches to find homography between the images
if len(good) > 4:

    # extract location (keypoints) of good matches
    p1, p2 = [], []
    for i, match in enumerate(good):
        p1.append([kp1[match.queryIdx].pt])
        p2.append([kp2[match.trainIdx].pt])
    p1 = np.asarray(p1)
    p2 = np.asarray(p2)

    # find homography using RANSAC
    H, status = cv2.findHomography(p1, p2, cv2.RANSAC, 5.0)

    # Determine the four corner coordinates corners_src of the reference image
    # get the corner coordinates of the first image
    height, width = img1.shape[:2]
    corners_src = np.float32([[0, 0], [0, height-1], [width-1, height-1], [width-1, 0]]).reshape(-1, 1, 2)

    # the 'detected' object in the 'scene' image: (Apply homography to perspective transform corner coordinates corners_src)
    corners_dst = cv2.perspectiveTransform(corners_src, H)

    # draw corners of the detected object
    output = img2.copy()
    cv2.polylines(output, [np.int32(corners_dst)], True, (0, 255, 255), 10)

    # print estimated homography
#    print("Estimated homography : \n",  H)

    # display images
    cv2.imshow("object Image", img1)
    cv2.imshow("scene with object", img2)
    # cv2.imshow("matches", img_matches)
    cv2.imshow("resulted detected object", output)

else:
    print("Error: not enough matches\n")

cv2.waitKey(0)
cv2.destroyAllWindows()




