## Getting started with images

In [None]:
import cv2 as cv
import sys

# read in the image
img = cv.imread(cv.samples.findFile('starry_night.png'))

if img is None:
    sys.exit('Could not read image.')

# first argumnet is the title
cv.imshow("Display image", img)

# the argument specifies the time, in milliseconds, the program should wait for, to register a key press
# 0 means wait forever
k = cv.waitKey(0)

# to write the image if the 's' key is pressed
if k==ord('s'):
    cv.imwrite('starry_night.png', img)

## Getting and Saving Video from camera

In [None]:
import numpy as np
import cv2 as cv

# save video file
# define the codec and create VideoWriter object
# fourcc is a 4-byte code used to specify the video codec
fourcc = cv.VideoWriter_fourcc(*'XVID')
out = cv.VideoWriter('output.avi', fourcc, 20.0, (640, 480))

# the argument specifies the camera number
# to play a video file, instead of 0 write file name
cap = cv.VideoCapture(0)
if not cap.isOpened():
    print("Cannot open camera")
    exit()
while True:
    # Capture frame-by-frame
    ret, frame = cap.read()
    
    # if frame is read correctly, ret is True
    if not ret:
        print("Can't receive frame..Exiting")
        break
    
    gray = cv.cvtColor(frame, cv.COLOR_BGR2GRAY)
    # Dsiplay the resulting frame
    cv.imshow('frame',gray)
    
    gray = cv.flip(gray, 0)
    # write the flipped frame
    out.write(gray)
    
    if cv.waitKey(1) == ord('q'):
        break

# when everything is done, release resources
cap.release()
cv.destroyAllWindows()

## Drawing functions in OpenCV

In [None]:
# Creating a black image
img = np.zeros((512, 512, 3), np.uint8)

# Draw a diagonal blue line (blue in BGR is(255,0,0)) with thickness 5px
cv.line(img, (0,0), (511, 511), (255, 0, 0), 5)

In [None]:
# Drawing a circle
cv.circle(img, (447, 63), 63, (0, 0, 255), -1)

In [None]:
# Drawing a rectangle
cv.rectangle(img,(384,0),(510,128),(0,255,0),3)

In [None]:
# Drawing an ellipse, giving the angle of rotation in anti-clockwise direction, starta angle, end angle etc.
cv.ellipse(img,(256,256),(100,50),0,0,180,255,-1)

In [None]:
# Drawing a polygon
# coordinates of vertices
pts = np.array([[10,5],[20,30],[70,20],[50,10]], np.int32)
# Converting into an array of shape ROWSx1x2 where ROWS are the number of vertices
pts = pts.reshape((-1,1,2))
cv.polylines(img,[pts],True,(0,255,255))

In [None]:
# adding text to images
font = cv.FONT_HERSHEY_SIMPLEX
cv.putText(img,'OpenCV',(10,500), font, 4,(255,255,255),2,cv.LINE_AA)

In [None]:
while True:
    cv.imshow('image', img)
    if cv.waitKey(0) == ord('q'):
        break
cv.destroyAllWindows()

## Basic Operations on Images

In [None]:
img = cv.imread('starry_night.png')

In [None]:
px = img[100,100]
print(px)

In [None]:
# accessing only blue pixel
blue = img[100, 100, 0]
print(blue)

In [None]:
# modifying pixel values
img[100, 100] = [255, 255, 255]
print(img[100, 100])

In [None]:
while True:
    cv.imshow('image', img)
    if cv.waitKey(0) == ord('q'):
        break
cv.destroyAllWindows()

In [None]:
# better individual pixel accessing and editing methods
# accessing red value
img.item(10, 10, 2)

In [None]:
img.itemset((10, 10, 2), 100)
img.item(10, 10, 2)

In [None]:
img.shape

In [None]:
# total number of pixels
img.size

In [None]:
# Image ROI
part = img[280:340,330:390]
img[273:333, 100:160] = part

In [None]:
# splitting and merging image channels
b, g, r = cv.split(img)
img = cv.merge((b, g, r))

In [None]:
# can also do like
b = img[:,:,0]

In [None]:
# to change all red pixel values to zero, using numpy indexing
img[:,:,2] = 0

In [None]:
# Making borders for images
import matplotlib.pyplot as plt

BLUE=[255, 0, 0]
img1 = cv.imread('starry_night.png')

# the numbers are the top, bottom, left and right border widths repsectively
replicate = cv.copyMakeBorder(img1,10,10,10,10,cv.BORDER_REPLICATE)
reflect = cv.copyMakeBorder(img1,10,10,10,10,cv.BORDER_REFLECT)
reflect101 = cv.copyMakeBorder(img1,10,10,10,10,cv.BORDER_REFLECT_101)
wrap = cv.copyMakeBorder(img1,10,10,10,10,cv.BORDER_WRAP)
constant= cv.copyMakeBorder(img1,10,10,10,10,cv.BORDER_CONSTANT,value=BLUE)
plt.subplot(231),plt.imshow(img1,'gray'),plt.title('ORIGINAL')
plt.subplot(232),plt.imshow(replicate,'gray'),plt.title('REPLICATE')
plt.subplot(233),plt.imshow(reflect,'gray'),plt.title('REFLECT')
plt.subplot(234),plt.imshow(reflect101,'gray'),plt.title('REFLECT_101')
plt.subplot(235),plt.imshow(wrap,'gray'),plt.title('WRAP')
plt.subplot(236),plt.imshow(constant,'gray'),plt.title('CONSTANT')
plt.show()

## Arithmetic on images

In [None]:
import cv2 as cv
import numpy as np

x = np.uint8([250])
y = np.uint8([10])

print(cv.add(x,y))

### Blending of images

In [None]:
img1 = cv.imread('starry_night.png')
img2 = cv.imread('opencv_logo.png')

dst = cv.addWeighted(img1, 0.7, img2, 0.3, 0)

cv.imshow('dst', dst)
cv.waitKey(0)
cv.destroyAllWindows()

### Bitwise operations

In [None]:
# load the images
# Load two images
img2 = cv.imread('starry_night.png')
img1 = cv.imread('opencv_logo.png')
# I want to put logo on top-left corner, So I create a ROI
rows,cols,channels = img2.shape
roi = img1[0:rows, 0:cols]

# Now create a mask of logo and create its inverse mask also
img2gray = cv.cvtColor(img2,cv.COLOR_BGR2GRAY)
# is value less than threshold, then pixel value set to 10(min) otherwise set to 255(max)
ret, mask = cv.threshold(img2gray, 10, 255, cv.THRESH_BINARY)
mask_inv = cv.bitwise_not(mask)
# Now black-out the area of logo in ROI
img1_bg = cv.bitwise_and(roi,roi,mask = mask_inv)
# Take only region of logo from logo image.
img2_fg = cv.bitwise_and(img2,img2,mask = mask)
# Put logo in ROI and modify the main image
dst = cv.add(img1_bg,img2_fg)
img1[0:rows, 0:cols ] = dst
cv.imshow('res',img1)
cv.waitKey(0)
cv.destroyAllWindows()

## Image Processing

### Object Tracking

In [None]:
import cv2 as cv
import numpy as np

cap = cv.VideoCapture(0)
while True:
    _, frame = cap.read()
    # convert bgr to hsv
    hsv = cv.cvtColor(frame, cv.COLOR_BGR2HSV)
    
    # define the ranges for the blue color in hsv
    lower_blue = np.array([110, 50, 50])
    upper_blue = np.array([130, 255, 255])
    
    # threshold the hsv image to get only blue color
    mask = cv.inRange(hsv, lower_blue, upper_blue)
    
    # bitwise-and mask and original image
    res = cv.bitwise_and(frame, frame, mask = mask)
    
    cv.imshow('frame', frame)
    cv.imshow('mask', mask)
    cv.imshow('res', res)
    
    k = cv.waitKey(5) & 0xFF
    if k == 27:
        break

cv.destroyAllWindows()

In [None]:
# how to find hsv values to track?
import numpy as np
import cv2 as cv
green = np.uint8([[[0, 255, 0]]])
hsv_green = cv.cvtColor(green, cv.COLOR_BGR2HSV)
print(hsv_green)

### Geometric transformation on images

Preferable interpolation methods are cv.INTER_AREA for shrinking and cv.INTER_LINEAR & cv.INTER_CUBIC for zooming

In [None]:
# scaling

img = cv.imread('opencv_logo.png')
res = cv.resize(img, None, fx = 2, fy = 2, interpolation = cv.INTER_CUBIC)

# OR

height, width = img.shape[:2]
res = cv.resize(img, (2*width, 2*height), interpolation = cv.INTER_CUBIC)

In [None]:
# translation

img = cv.imread('starry_night.png')
rows, cols = img.shape[:2]

# now the translation matrix, for a shift of (100, 50)
M = np.float32([[1, 0, 100], [0, 1, 50]])
# the third argument of cv.warpAffine is the size of output image as (width, height), where width is number of columns
dst = cv.warpAffine(img, M, (cols, rows))

cv.imshow('dst', dst)
cv.waitKey(0)
cv.destroyAllWindows()

In [None]:
# rotation

img = cv.imread('starry_night.png')
rows, cols = img.shape[:2]

# cols-1 and rows-1 are the coordinate limits
# here, rotating by 90 degrees, wrt center, without any scaling
# the first argument is the center of the image, as it is being rotated about the center
M = cv.getRotationMatrix2D(((cols-1)/2.0, (rows-1)/2.0), 90, 1)
dst = cv.warpAffine(img, M, (cols, rows))

cv.imshow('dst', dst)
cv.waitKey(0)
cv.destroyAllWindows()

In [None]:
# affine transformation - lines that are parallel still remain parallel
import matplotlib.pyplot as plt

# we need three points from input image and their corresponding positions in output image
pts1 = np.float32([[50,50],[200,50],[50,200]])
pts2 = np.float32([[10,100],[200,50],[100,250]])

# get the affine transformation matrix
M = cv.getAffineTransform(pts1, pts2)

dst = cv.warpAffine(img, M, (cols, rows))
plt.subplot(121),plt.imshow(img),plt.title('Input')
plt.subplot(122),plt.imshow(dst),plt.title('Output')
plt.show()

In [None]:
# perspective transformation

# we need 4 points on input image, and corresponding points on the output images
# out of the 4 points provided, three should not be collinear
pts1 = np.float32([[56,65],[368,52],[28,387],[389,390]])
pts2 = np.float32([[0,0],[300,0],[0,300],[300,300]])

# get the perspective transformation matrix
M = cv.getPerspectiveTransform(pts1, pts2)

dst = cv.warpPerspective(img, M, (cols, rows))

plt.subplot(121),plt.imshow(img),plt.title('Input')
plt.subplot(122),plt.imshow(dst),plt.title('Output')
plt.show()

### Image Thresholding

Pixel value is set to 0 value if value less than threshold, otherwise set to the max value. There are many thresholding functions. The image should be a grayscale image
The method return two values, the first is the threshold that was used and second is the thresholded image

The second argument is the threshold value and the thrid argument is the max value

In [None]:
img = cv.imread('starry_night.png')
ret, thresh1 = cv.threshold(img, 127, 255, cv.THRESH_BINARY)
ret,thresh2 = cv.threshold(img,127,255,cv.THRESH_BINARY_INV)
ret,thresh3 = cv.threshold(img,127,255,cv.THRESH_TRUNC)
ret,thresh4 = cv.threshold(img,127,255,cv.THRESH_TOZERO)
ret,thresh5 = cv.threshold(img,127,255,cv.THRESH_TOZERO_INV)

titles = ['Original Image','BINARY','BINARY_INV','TRUNC','TOZERO','TOZERO_INV']
images = [img, thresh1, thresh2, thresh3, thresh4, thresh5]

for i in range(6):
    plt.subplot(2,3,i+1),plt.imshow(images[i],'gray')
    plt.title(titles[i])
    plt.xticks([]),plt.yticks([])
plt.show()

Another type of thresholding is the adaptive thresholding, that is determines the threshold value based on a small region around it. It is useful where different parts have different illuminations. 

Block size (second-to-last argument) determines size of neighborhood and C(last argument) is the constant to be subtracted

In [None]:
import cv2 as cv
import numpy as np
from matplotlib import pyplot as plt

img = cv.imread('starry_night.png', 0)
img = cv.medianBlur(img,5)

ret,th1 = cv.threshold(img,127,255,cv.THRESH_BINARY)
th2 = cv.adaptiveThreshold(img,255,cv.ADAPTIVE_THRESH_MEAN_C,\
            cv.THRESH_BINARY,11,2)
th3 = cv.adaptiveThreshold(img,255,cv.ADAPTIVE_THRESH_GAUSSIAN_C,\
            cv.THRESH_BINARY,11,2)
titles = ['Original Image', 'Global Thresholding (v = 127)',
            'Adaptive Mean Thresholding', 'Adaptive Gaussian Thresholding']
images = [img, th1, th2, th3]
for i in range(4):
    plt.subplot(2,2,i+1),plt.imshow(images[i],'gray')
    plt.title(titles[i])
    plt.xticks([]),plt.yticks([])
plt.show()

The last type of thresholding is Otsu's binarization that helps to determine the threshold value automatically without having to sepcify one yourself.

In [None]:
img = cv.imread('starry_night.png',0)

# global thresholding
ret1,th1 = cv.threshold(img,127,255,cv.THRESH_BINARY)

# otsu's thresholding (the threshold value can be chosen arbitrarily)
ret2, th2 = cv.threshold(img, 127, 255, cv.THRESH_BINARY+cv.THRESH_OTSU)

# otsu's thresholding after Gaussian filtering to remove noise --> blurs the image by convolving with a Gaussian kernel
blur = cv.GaussianBlur(img, (5,5), 0)
ret3,th3 = cv.threshold(blur,0,255,cv.THRESH_BINARY+cv.THRESH_OTSU)

# plot all the images and their histograms
images = [img, 0, th1,
          img, 0, th2,
          blur, 0, th3]
titles = ['Original Noisy Image','Histogram','Global Thresholding (v=127)',
          'Original Noisy Image','Histogram',"Otsu's Thresholding",
          'Gaussian filtered Image','Histogram',"Otsu's Thresholding"]
for i in range(3):
    plt.subplot(3,3,i*3+1),plt.imshow(images[i*3],'gray')
    plt.title(titles[i*3]), plt.xticks([]), plt.yticks([])
    plt.subplot(3,3,i*3+2),plt.hist(images[i*3].ravel(),256)
    plt.title(titles[i*3+1]), plt.xticks([]), plt.yticks([])
    plt.subplot(3,3,i*3+3),plt.imshow(images[i*3+2],'gray')
    plt.title(titles[i*3+2]), plt.xticks([]), plt.yticks([])
plt.show()

### Smoothing Images

Images can be filtered with various Low Pass Filters (LPF) which help in removing noise and High Pass Filters (HPF) which help in detecting edges in images

In [None]:
# 2D convolution - Image Filtering

img = cv.imread('opencv_logo.png')

kernel = np.ones((10, 10), np.float32)/100
dst = cv.filter2D(img, -1, kernel)

plt.subplot(121),plt.imshow(img),plt.title('Original')
plt.xticks([]), plt.yticks([])
plt.subplot(122),plt.imshow(dst),plt.title('Averaging')
plt.xticks([]), plt.yticks([])
plt.show()

#### Image Blurring (Image Smoothing)

In [None]:
# 1. Averaging

# can use cv.blur() or cv.boxFilter()
# the second argument is the kernel size used for the blurring
blur = cv.blur(img, (5,5))

plt.subplot(121),plt.imshow(img),plt.title('Original')
plt.xticks([]), plt.yticks([])
plt.subplot(122),plt.imshow(blur),plt.title('Blurred')
plt.xticks([]), plt.yticks([])
plt.show()

In [None]:
# 2. Gaussian Blurring --> veru effective in removing Guassian noise

# third argument is sigmaX, standard deviation in X direction. You can also specify sigmaY as 4th arg
# if sigmaY not specified, taken same as sigmaX
blur = cv.GaussianBlur(img, (5,5), 0)

plt.subplot(121),plt.imshow(img),plt.title('Original')
plt.xticks([]), plt.yticks([])
plt.subplot(122),plt.imshow(blur),plt.title('Blurred')
plt.xticks([]), plt.yticks([])
plt.show()

In [None]:
# 3. medianBlur --> takes median of all pixels under kernel area and replaces central element with it

median = cv.medianBlur(img, 5)

plt.subplot(121),plt.imshow(img),plt.title('Original')
plt.xticks([]), plt.yticks([])
plt.subplot(122),plt.imshow(blur),plt.title('Blurred')
plt.xticks([]), plt.yticks([])
plt.show()

In [None]:
# 4. Bilateral Filtering --> highly effective in noise removal while keeping edges sharp

# 3rd and 4th args are sigma colour and sigma space
blur = cv.bilateralFilter(img, 9, 75, 75)

plt.subplot(121),plt.imshow(img),plt.title('Original')
plt.xticks([]), plt.yticks([])
plt.subplot(122),plt.imshow(blur),plt.title('Blurred')
plt.xticks([]), plt.yticks([])
plt.show()

### Morphological Transformations

#### Erosion
It erodes away the boundaries of the foreground object. Method --> A pixel in the original image will be considered 1 only if all the pixels under the image are also 1.

In [None]:
import numpy as np
import cv2 as cv

img = cv.imread('j.png', 0)
kernel = np.ones((5, 5), np.uint8)
erosion = cv.erode(img, kernel, iterations = 1)

cv.imshow('erosion',erosion)
cv.waitKey(0)
cv.destroyAllWindows()

#### Dilation
It is the oppsoite of erosion. Here a pixel element is 1 if at least one pixel under the kernel is 1

In [None]:
dilation = cv.dilate(img, kernel, iterations = 1)

cv.imshow('dilation', dilation)
cv.waitKey(0)
cv.destroyAllWindows()

#### Opening
It is another name of erosion followed by dilation. Useful to remove noise

In [None]:
opening = cv.morphologyEx(img, cv.MORPH_OPEN, kernel)

cv.imshow('opening', opening)
cv.waitKey(0)
cv.destroyAllWindows()

#### Closing
Opposite of opening.

In [None]:
closing = cv.morphologyEx(img, cv.MORPH_CLOSE, kernel)

cv.imshow('closing', closing)
cv.waitKey(0)
cv.destroyAllWindows()

#### Morphological Gradient
Difference between erosion and dilation. Result looks like the outline of an object.

In [None]:
gradient = cv.morphologyEx(img, cv.MORPH_GRADIENT, kernel)

cv.imshow('gradient', gradient)
cv.waitKey(0)
cv.destroyAllWindows()

In [None]:
# there are other transformations too. You can have a look at them.

### Gradient or High-Pass filters
OpenCV provides three such filters - Sobel, Scharr and Laplacian

Sobel derivatives are Gaussian smoothing plus differentiation so it is more resistant to noise.

In [None]:
# showing all the filters
import matplotlib.pyplot as plt

img = cv.imread('gradients.jpg')

laplacian = cv.Laplacian(img, cv.CV_64F)
sobelx = cv.Sobel(img, cv.CV_64F, 1, 0, ksize = 5)
sobely = cv.Sobel(img, cv.CV_64F, 0, 1, ksize = 5)


plt.subplot(2,2,1),plt.imshow(img,cmap = 'gray')
plt.title('Original'), plt.xticks([]), plt.yticks([])
plt.subplot(2,2,2),plt.imshow(laplacian,cmap = 'gray')
plt.title('Laplacian'), plt.xticks([]), plt.yticks([])
plt.subplot(2,2,3),plt.imshow(sobelx,cmap = 'gray')
plt.title('Sobel X'), plt.xticks([]), plt.yticks([])
plt.subplot(2,2,4),plt.imshow(sobely,cmap = 'gray')
plt.title('Sobel Y'), plt.xticks([]), plt.yticks([])
plt.show()

### Edge detection

The most famous algorithm used is the Canny detection algorithm which has multiple steps:  
1) Noise Reduction  
2) Finding intensity gradient of the image  
3) Non-maximum supression  
4) Hysterisis Thresholding  

In [None]:
img = cv.imread('j.png')
edges = cv.Canny(img, 100, 200)

plt.subplot(121),plt.imshow(img,cmap = 'gray')
plt.title('Original Image'), plt.xticks([]), plt.yticks([])
plt.subplot(122),plt.imshow(edges,cmap = 'gray')
plt.title('Edge Image'), plt.xticks([]), plt.yticks([])
plt.show()

In the above code, cv.Canny() wraps all the steps in that one function. 2nd and 3rd args are the minVal and maxVal used for hysterisis thresholding. 4th arg is the aperture_size which is the size of the Sobel kernel used to find the image gradients (in the x and y directions). Last arg is the L2 gradient for finding gradient magnitude

### Contours
Contours can be described as a curve joining all continuous points. For better accuracy, use binary images, hence apply threshold or canny edge detection

The approximation cv.CHAIN_APPROX_NONE saves all the contour boundary points while the approximation function cv.CHAIN_APPROX_SIMPLE removes redundant points.

In [None]:
import numpy as np
import cv2 as cv

img = cv.imread('j.png')
imgray = cv.cvtColor(img, cv.COLOR_BGR2GRAY)
ret, thresh = cv.threshold(imgray, 127, 255, 0)
# 2nd argument is the contour retrieval mode and 3rd is the contour approximation method
img, contours, hierarchy = cv.findContours(thresh, cv.RETR_TREE, cv.CHAIN_APPROX_SIMPLE)

In [None]:
# fourth arg onwards are colour, thickness etc
# to draw all the contours in the image, use -1
cv.drawContours(img, contours, -1, (0,255,0), 3)

In [None]:
# can also use the following way to draw a specific contour
cnt = contours[4]
cv.drawContours(img, [cnt], 0, (0,255,0), 3)

#### Moments
These help you to calculate important parameters like centre of mass, area, perimeter etc.

In [None]:
cnt = contours[0]
M = cv.moments(cnt)
print(M)

In [None]:
# centroid
cx = int(M['m10']/M['m00'])
cy = int(M['m01']/M['m00'])

# area
area = cv.contourArea(cnt)

# contour perimeter
perimeter = cv.arcLength(cnt, True) # 2nd parameter specifies whether shape is a closed contour (if passed True) or just a curve

# contour approximation --> it approximates a contour shape to another shape with less number of vertices based on precision
# uses Douglas - Peucker algorithm
# Eg. trying to find a square but due to some problems in the image we don't get a perfect square
# epsilon is an accuracy parameter and specifies maximum distance from contour to approximated contour
epsilon = 0.1*cv.arcLength(cnt, True)
approx = cv.approxPolyDB(cnt, epsilon, True) # 3rd arg tells whether curve is closed or not

In [None]:
# convex hull --> it checks for convexity defects and corrects it
# syntax: hull = cv.convexHull(points[, hull[, clockwise[, returnPoints]]
# the arg 'returnPoints' if true, returns coordinates of hull points. If false, it returns indices of contour points corresponding to hull points
hull = cv.convexHull(cnt)

# to check whether a curve is convex (curves which are always bulged out) or not
hull= cv.isContourConvex(cnt)

In [None]:
# bounding a rectangle

cnt = contours[0]

# straight bounding rectangle does not consider rotation of object, so is not rectangle of min area
x, y, w, h = cv.boundingRect(cnt) # x,y => top-left coord of rect and w,h => width and height of rect
cv.rectangle(img, (x,y), (x+w, y+h), (0, 255, 0), 2)

# rotated rectangle is the bounding rectangle of min area and takes rotation into account
# to draw this rectangle, we need the four points using the function cv.boxPoints()
rect = cv.minAreaRect(cnt)
box = cv.boxPoints(rect)
box = np.int0(box)
cv.drawContours(img, [box], 0, (0, 0, 255), 2)

# min enclosing circle
(x,y), radius = cv.minEnclosingCircle(cnt)
center = (int(x), int(y))
radius = int(radius)
cv.circle(img, center, radius, (0, 255, 0), 2)

# fitting an ellipse --> returns ellipse that inscribes the min bounding rect
ellipse = cv.fitEllipse(cnt)
cv.ellipse(img, ellipse, (0, 255, 0), 2)

# fitting a line --> approximate a straight line
rows, cols = img.shape[:2]
[vx,vy,x,y] = cv.fitLine(cnt, cv.DIST_L2,0,0.01,0.01)
lefty = int((-x*vy/vx) + y)
righty = int(((cols-x)*vy/vx)+y)
cv.line(img,(cols-1,righty),(0,lefty),(0,255,0),2)

Some other contour properties:  

1) Aspect Ratio: ratio of width to height of bounding rectangle  
2) Extent: ratio of contour area to bounding rectangle area  
3) Solidity: ratio of contour area to convex hull area  
4) Equivalent diameter: diameter of the circle whose area is equal to contour area  
5) Orientation: angle at which object is directed  
6) Extreme Points: found as follows-  

leftmost = tuple(cnt[cnt[:, :, 0].argmin()][0])  
rightmost = tuple(cnt[cnt[:, :, 0].argmax()][0])  
topmost = tuple(cnt[cnt[:, :, 1].argmin()][0])  
bottommost = tuple(cnt[cnt[:, :, 1].argmax()][0])  

In [None]:
# finding convexity defects and drawing them
# it returns an array where each row contains [start_point, end_point, farthest_point, approximate distance to farthest_point]
defects = cv.convexityDefects(cnt, hull) # need to pass returnPoints = False while finding convexity defects

for i in range(defects.shape[0]):
    s,e,f,d = defects[i,0]
    start = tuple(cnt[s][0])
    end = tuple(cnt[e][0])
    far = tuple(cnt[f][0])
    cv.line(img,start,end,[0,255,0],2)
    cv.circle(img,far,5,[0,0,255],-1)
    
cv.imshow('img',img)
cv.waitKey(0)
cv.destroyAllWindows()

In [None]:
# point polygon test --> finds shortest distance between point on the image and the contour
# returns positive if point inside contour otherwise returns negative
dist = cv.pointPolygonTest(cnt, (50, 50), True) # last arg finds signed distance if True, else finds whether point inside or not

In [None]:
# match shapes --> compare two shapes/contours and return similarity metric. Lower the metric, more the similarity
# calculated based on hu-moment values

import cv2 as cv
import numpy as np

img1 = cv.imread('star.jpg',0)
img2 = cv.imread('star2.jpg',0)
ret, thresh = cv.threshold(img1, 127, 255,0)
ret, thresh2 = cv.threshold(img2, 127, 255,0)
contours,hierarchy = cv.findContours(thresh,2,1)
cnt1 = contours[0]
contours,hierarchy = cv.findContours(thresh2,2,1)
cnt2 = contours[0]

ret = cv.matchShapes(cnt1,cnt2,1,0.0)
print( ret )

Contour hierarchy in OpenCV:  
In some cases, some shapes can be inside other shapes, just like nested figure. In this case, we call the outer shape as the parent and the inner contour as the child.  

Hierarchy Representation in OpenCV: [Next, Previous, First_Child, Parent]  
Next is the next contour in same hierarchy level  
Previous is the previous contour in same hierarchy level  
First_Child is the first child of the contour  
Parent is the parent of the contour  

### Histogram

You can consider histogram as a graph or plot, which gives you an overall idea about the intensity distribution of an image. It is a plot with pixel values (ranging from 0 to 255, not always) in X-axis and corresponding number of pixels in the image on Y-axis.

In [None]:
img = cv.imread('starry_night.png')
# 2nd arg is channel for which you want histogram. For colored image, it can be [0], [1] or [2]
# 3rd arg is histSize, that is, number of bins
# 4th arg is the range, that is, range of intensity values you want to measure. Normally it is is [0, 256]
hist = cv.calcHist([img], [0], None, [256], [0, 256])

In [None]:
# plotting the histogram

# matplotlib way
import matplotlib.pyplot as plt
plt.hist(img.ravel(), 256, [0, 256])
plt.show()

In [None]:
# following can be a good way for BGR plots:
color = ('b','g','r')
for i,col in enumerate(color):
    histr = cv.calcHist([img],[i],None,[256],[0,256])
    plt.plot(histr,color = col)
    plt.xlim([0,256])
plt.show()

In [None]:
# there is also an OpenCV way to plot...

In [None]:
# application of mask --> for only a portion of image

# create a mask
mask = np.zeros(img.shape[:2], np.uint8)
mask[100:300, 100:400] = 255
masked_img = cv.bitwise_and(img,img,mask = mask)
# Calculate histogram with mask and without mask
# Check third argument for mask
hist_full = cv.calcHist([img],[0],None,[256],[0,256])
hist_mask = cv.calcHist([img],[0],mask,[256],[0,256])
plt.subplot(221), plt.imshow(img, 'gray')
plt.subplot(222), plt.imshow(mask,'gray')
plt.subplot(223), plt.imshow(masked_img, 'gray')
plt.subplot(224), plt.plot(hist_full), plt.plot(hist_mask)
plt.xlim([0,256])
plt.show()

#### Histogram Equalization

It improves the contrast of the image by stretching the histogram on either ends if it is confined to specific values

In [None]:
import numpy as np
import cv2 as cv

img = cv.imread('face.jpg', 0)
equ = cv.equalizeHist(img)
res = np.hstack((img, equ)) # stacking images side by side
cv.imwrite('res.png', res)

Histogram equalization is good in cases where all images with different lighting conditions are to be brought to a common scale. Eg. for facial recognition before training.

#### CLAHE (Contrast Limited Adaptive Histogram Equalization)

This is important in scenarios where some information might be lost due to over-brightness after applying histogram equalization. CLAHE uses tiles and equalizes histograms of smaller regions at a time. If any histogram bin is above the specifies contrast limit, those pixels are clipped and distributed uniformly to other bins before applying histogram equalization.

In [None]:
# create a clahe object
clahe = cv.createCLAHE(clipLimit = 2.0, tileGridSize = (8,8))
cl1 = clahe.apply(img)

cv.imwrite('res.png', cl1)

#### 2D histogram
Earlier we were dealing with 1D histograms as they had only one feature: intensity for every pixel. In 2D histograms, we use two features, namely hue and saturation for every pixel

In [None]:
img = cv.imread('starry_night.png')
hsv = cv.cvtColor(img, cv.COLOR_BGR2HSV)

# 180 bins for hue and 256 bins for saturation
hist = cv.calcHist([hsv], [0, 1], None, [180, 256], [0, 180, 0, 256])

In [None]:
# matplotlib function to display 2D histogram
plt.imshow(hist,interpolation = 'nearest')
plt.show()

#### Histogram Backprojection
In simple words, it creates an image of the same size (but single channel) as that of our input image, where each pixel corresponds to the probability of that pixel belonging to our object. In more simpler words, the output image will have our object of interest in more white compared to remaining part.

In [None]:
roi = cv.imread('starry_night.png')
hsv = cv.cvtColor(roi, cv.COLOR_BGR2HSV)

target = cv.imread('opencv_logo.png')
hsvt = cv.cvtColor(target, cv.COLOR_BGR2HSV)

# calculating object histogram
roihist = cv.calcHist([hsv], [0,1], None, [180,256], [0,180,0,256])

# normalize histogram and apply backprojection
cv.normalize(roihist, 0, 255, cv.NORM_MINMAX)
dst = cv.calcBackProject([hsvt], [0,1], roihist, [0, 180, 0, 256], 1) # this gives us the probability image

# now convolve with circular disc with the probability image obtained above
disc = cv.getStructuringElement(cv.MORPH_ELLIPSE, (5,5))
cv.filter2D(dst, -1, disc, dst)

# threshold anf binary AND
ret, thresh = cv.threshold(dst, 50, 255, 0)
thresh = cv.merge((thresh, thresh, thresh))
res = cv.bitwise_and(target, thresh)

res = np.vstack((target, thresh, res))
cv.imwrite('res.png', res)

### Template Matching
It is used for searching and finding the location of a template image in a larger image. It simply slides the template image over the larger image and helps find the best match

In [None]:
# code

img = cv.imread('messi5.jpg',0)
img2 = img.copy()
template = cv.imread('template.jpg',0)
w, h = template.shape[::-1]
# All the 6 methods for comparison in a list
methods = ['cv.TM_CCOEFF', 'cv.TM_CCOEFF_NORMED', 'cv.TM_CCORR',
            'cv.TM_CCORR_NORMED', 'cv.TM_SQDIFF', 'cv.TM_SQDIFF_NORMED']
for meth in methods:
    img = img2.copy()
    method = eval(meth)
    # Apply template Matching
    res = cv.matchTemplate(img,template,method)
    min_val, max_val, min_loc, max_loc = cv.minMaxLoc(res)
    # If the method is TM_SQDIFF or TM_SQDIFF_NORMED, take minimum
    if method in [cv.TM_SQDIFF, cv.TM_SQDIFF_NORMED]:
        top_left = min_loc
    else:
        top_left = max_loc
    bottom_right = (top_left[0] + w, top_left[1] + h)
    cv.rectangle(img,top_left, bottom_right, 255, 2)
    plt.subplot(121),plt.imshow(res,cmap = 'gray')
    plt.title('Matching Result'), plt.xticks([]), plt.yticks([])
    plt.subplot(122),plt.imshow(img,cmap = 'gray')
    plt.title('Detected Point'), plt.xticks([]), plt.yticks([])
    plt.suptitle(meth)
    plt.show()

In [None]:
# template matching with multiple objects uses thresholding

img_rgb = cv.imread('mario.png')
img_gray = cv.cvtColor(img_rgb, cv.COLOR_BGR2GRAY)
template = cv.imread('mario_coin.png',0)
w, h = template.shape[::-1]
res = cv.matchTemplate(img_gray,template,cv.TM_CCOEFF_NORMED)
threshold = 0.8
loc = np.where( res >= threshold)
for pt in zip(*loc[::-1]):
    cv.rectangle(img_rgb, pt, (pt[0] + w, pt[1] + h), (0,0,255), 2)
cv.imwrite('res.png',img_rgb)

### Line Detection using Hough Transform

Need to apply thresholding and canny edge detection for the grayscale image, to apply hough transform

In [None]:
img = cv.imread('sudoku.png')
gray = cv.cvtColor(img, cv.COLOR_BGR2GRAY)
edges = cv.Canny(gray, 50, 150, apertureSize = 3)

lines = cv.HoughLines(edges, 1, np.pi/180, 200)
for line in lines:
    # rho is perpendicular distance of line from origin and theta is angle from the origin
    rho, theta = line[0]
    a = np.cos(theta)
    b = np.sin(theta)
    x0 = a*rho
    y0 = b*rho
    x1 = int(x0 + 1000*(-b))
    y1 = int(y0 + 1000*(a))
    x2 = int(x0 - 1000*(-b))
    y2 = int(y0 - 1000*(a))
    
    cv.line(img,(x1,y1),(x2,y2),(0,0,255),2)

cv.imwrite('houghlines3.jpg',img)

### Probabilistic Hough Transform
It takes a random subset of points instead of all points for the hough computation. Makes it less expensive

In [None]:
# minLineLength: minimum length of the line. Lines shorter than this are rejected
# maxLineGap: maximum allowed gap between line segments to treat them as a single line
lines = cv.HoughLinesP(edges,1,np.pi/180,100,minLineLength=100,maxLineGap=10)
for line in lines:
    x1,y1,x2,y2 = line[0]
    cv.line(img,(x1,y1),(x2,y2),(0,255,0),2)
cv.imwrite('houghlines5.jpg',img)

### Hough Circle Transform

Parameters
image	8-bit, single-channel, grayscale input image.  

circles	Output vector of found circles. Each vector is encoded as 3 or 4 element floating-point vector (x,y,radius) or (x,y,radius,votes) .  

method	Detection method, see HoughModes. The available methods are HOUGH_GRADIENT and HOUGH_GRADIENT_ALT.  

dp	Inverse ratio of the accumulator resolution to the image resolution. For example, if dp=1 , the accumulator has the same 
resolution as the input image. If dp=2 , the accumulator has half as big width and height. For HOUGH_GRADIENT_ALT the recommended value is dp=1.5, unless some small very circles need to be detected.  

minDist	Minimum distance between the centers of the detected circles. If the parameter is too small, multiple neighbor circles may be falsely detected in addition to a true one. If it is too large, some circles may be missed.  

param1	First method-specific parameter. In case of HOUGH_GRADIENT and HOUGH_GRADIENT_ALT, it is the higher threshold of the two passed to the Canny edge detector (the lower one is twice smaller). Note that HOUGH_GRADIENT_ALT uses Scharr algorithm to compute image derivatives, so the threshold value shough normally be higher, such as 300 or normally exposed and contrasty images.  

param2	Second method-specific parameter. In case of HOUGH_GRADIENT, it is the accumulator threshold for the circle centers at the detection stage. The smaller it is, the more false circles may be detected. Circles, corresponding to the larger accumulator values, will be returned first. In the case of HOUGH_GRADIENT_ALT algorithm, this is the circle "perfectness" measure. The closer it to 1, the better shaped circles algorithm selects. In most cases 0.9 should be fine. If you want get better detection of small circles, you may decrease it to 0.85, 0.8 or even less. But then also try to limit the search range [minRadius, maxRadius] to avoid many false circles.  

minRadius	Minimum circle radius.  

maxRadius	Maximum circle radius. If <= 0, uses the maximum image dimension. If < 0, HOUGH_GRADIENT returns centers without finding the radius. HOUGH_GRADIENT_ALT always computes circle radiuses.

In [None]:
img = cv.imread('opencv_logo.png',0)
img = cv.medianBlur(img,5)
cimg = cv.cvtColor(img,cv.COLOR_GRAY2BGR)
circles = cv.HoughCircles(img,cv.HOUGH_GRADIENT,1,100,
                            param1=50,param2=30,minRadius=0,maxRadius=0)
circles = np.uint16(np.around(circles))
for i in circles[0,:]:
    # draw the outer circle
    cv.circle(cimg,(i[0],i[1]),i[2],(0,255,0),2)
    # draw the center of the circle
    cv.circle(cimg,(i[0],i[1]),2,(0,0,255),3)
cv.imshow('detected circles',cimg)
cv.waitKey(0)
cv.destroyAllWindows()

### Watershed Algorithm for image segmentation

Label the region which we are sure of being the foreground or object with one color (or intensity), label the region which we are sure of being background or non-object with another color and finally the region which we are not sure of anything, label it with 0. That is our marker. Then apply watershed algorithm. Then our marker will be updated with the labels we gave, and the boundaries of objects will have a value of -1.

In [None]:
img = cv.imread('coins.jpg')
gray = cv.cvtColor(img, cv.COLOR_BGR2GRAY)
ret, thresh = cv.threshold(gray, 0, 255, cv.THRESH_BINARY_INV + cv.THRESH_OTSU)

plt.imshow(thresh)

In [None]:
# noise removal
kernel = np.ones((3,3), np.uint8)
opening = cv.morphologyEx(thresh, cv.MORPH_OPEN, kernel, iterations = 2) # erosion followed by dilation

# sure background area
sure_bg = cv.dilate(opening, kernel, iterations = 3)

# finding sure foreground area
dist_transform = cv.distanceTransform(opening, cv.DIST_L2, 5)
ret, sure_fg = cv.threshold(dist_transform, 0.7*dist_transform.max(), 255, 0)

# finding unknown region
sure_fg = np.uint8(sure_fg)
unknown = cv.subtract(sure_bg, sure_fg)

plt.imshow(dist_transform)

In [None]:
# marker labelling
# cv.connectedComponents() labels background as 0 and other objects with integers starting from 1
# however, we want unknown area to marked as 0, as required by watershed algorithm
ret, markers = cv.connectedComponents(sure_fg)

# add one to all the markers so that sure_bg is not 0, but 1
markers += 1

# now mark the region of unknown with 0
markers[unknown == 255] = 0

In [None]:
# now apply watershed
# the marker image will be modified and the boundary region will be marked with -1
markers = cv.watershed(img, markers)
img[markers == -1] = [255, 0, 0]

plt.imshow(img)

#### Interactive foreground extraction using Grabcut Algorithm

In [None]:
import numpy as np
import cv2 as cv
import matplotlib.pyplot as plt

img = cv.imread('messi.jpg')
mask = np.zeros(img.shape[:2], np.uint8)

# used by the algorthm internally
bgdModel = np.zeros((1, 65), np.float64)
fgdModel = np.zeros((1, 65), np.float64)

rect = (50, 50, 450, 290)
cv.grabCut(img, mask, rect, bgdModel, fgdModel, 5, cv.GC_INIT_WITH_RECT)

# all 0 and 2 pixels are background
mask2 = np.where((mask == 2)|(mask == 0), 0, 1).astype('uint8')
img = img*mask2[:,:,np.newaxis]

plt.imshow(img), plt.colorbar(), plt.show()

You can improve the above image by marking sure foreground with white and sure background with black in the image on Paint, and then re-run the algorithm with this edited image.

## Video Analysis (Video Module)

### Background Subtraction

Background modelling consists of:  
Background Initialization - initial model of background  
Background Update - To possible changes to the background  

In [None]:
from __future__ import print_function
import cv2 as cv

backSub = cv.createBackgroundSubtractorMOG2()


capture = cv.VideoCapture(0)
if not capture.isOpened:
    print('Unable to open')
    exit(0)

while True:
    ret, frame = capture.read()
    if frame is None:
        break
    fgMask = backSub.apply(frame)
    cv.rectangle(frame, (10, 2), (100,20), (255,255,255), -1)
    cv.putText(frame, str(capture.get(cv.CAP_PROP_POS_FRAMES)), (15, 15),
               cv.FONT_HERSHEY_SIMPLEX, 0.5 , (0,0,0))
    
    
    cv.imshow('Frame', frame)
    cv.imshow('FG Mask', fgMask)
    
    keyboard = cv.waitKey(30)
    cv.destroyAllWindows()

### Meanshift
An algorithm to track objects in videos by continuously moving a window to area with highest pixel density

In [None]:
import numpy as np
import cv2 as cv

cap = cv.VideoCapture(0)

# take first frame of video
ret, frame = cap.read()

# setup initial location of window
x, y, w, h = 300, 200, 100, 50 # simply hardcoded the values
track_window = (x, y, w, h)

# set up the ROI for tracking
roi = frame[y:y+h, x:x+w]
hsv_roi =  cv.cvtColor(roi, cv.COLOR_BGR2HSV)
mask = cv.inRange(hsv_roi, np.array((0., 60.,32.)), np.array((180.,255.,255.)))
roi_hist = cv.calcHist([hsv_roi],[0],mask,[180],[0,180])
cv.normalize(roi_hist,roi_hist,0,255,cv.NORM_MINMAX)

# Setup the termination criteria, either 10 iteration or move by atleast 1 pt
term_crit = ( cv.TERM_CRITERIA_EPS | cv.TERM_CRITERIA_COUNT, 10, 1 )

while(1):
    ret, frame = cap.read()
    
    if ret == True:
        hsv = cv.cvtColor(frame, cv.COLOR_BGR2HSV)
        dst = cv.calcBackProject([hsv], [0], roi_hist, [0, 180], 1)
        
        # apply meanshift to get the new location
        ret, track_window = cv.meanShift(dst, track_window, term_crit)
        
        # Draw it on image
        x,y,w,h = track_window
        img2 = cv.rectangle(frame, (x,y), (x+w,y+h), 255,2)
        cv.imshow('img2',img2)
        
        k = cv.waitKey(30) & 0xFF
        if k == 27:
            break
    else:
        break
cv.destroyAllWindows()
cap.release()

### Camshift (Continously Adaptive Mean Shift)
It updates the size of the window by applying meanshift first and then updating size of window through a formula

In [None]:
import numpy as np
import cv2 as cv

cap = cv.VideoCapture(0)
# take first frame of the video
ret,frame = cap.read()
# setup initial location of window
x, y, w, h = 300, 200, 100, 50 # simply hardcoded the values
track_window = (x, y, w, h)
# set up the ROI for tracking
roi = frame[y:y+h, x:x+w]
hsv_roi =  cv.cvtColor(roi, cv.COLOR_BGR2HSV)
mask = cv.inRange(hsv_roi, np.array((0., 60.,32.)), np.array((180.,255.,255.)))
roi_hist = cv.calcHist([hsv_roi],[0],mask,[180],[0,180])
cv.normalize(roi_hist,roi_hist,0,255,cv.NORM_MINMAX)
# Setup the termination criteria, either 10 iteration or move by atleast 1 pt
term_crit = ( cv.TERM_CRITERIA_EPS | cv.TERM_CRITERIA_COUNT, 10, 1 )
while(1):
    ret, frame = cap.read()
    if ret == True:
        hsv = cv.cvtColor(frame, cv.COLOR_BGR2HSV)
        dst = cv.calcBackProject([hsv],[0],roi_hist,[0,180],1)
        # apply camshift to get the new location
        ret, track_window = cv.CamShift(dst, track_window, term_crit)
        # Draw it on image
        pts = cv.boxPoints(ret)
        pts = np.int0(pts)
        img2 = cv.polylines(frame,[pts],True, 255,2)
        cv.flip(img2, flipCode = -1)
        cv.imshow('img2',img2)
        k = cv.waitKey(30) & 0xff
        if k == 27:
            break
    else:
        break
cv.destroyAllWindows()
cap.release()

### Optical Flow
It is the pattern of apparent motion of objects between two consecutive frames

In [None]:
# Lucas-Kanade Optical Flow

import numpy as np
import cv2 as cv

cap = cv.VideoCapture(0)
# params for ShiTomasi corner detection
feature_params = dict( maxCorners = 100,
                       qualityLevel = 0.3,
                       minDistance = 7,
                       blockSize = 7 )
# Parameters for lucas kanade optical flow
lk_params = dict( winSize  = (15,15),
                  maxLevel = 2,
                  criteria = (cv.TERM_CRITERIA_EPS | cv.TERM_CRITERIA_COUNT, 10, 0.03))
# Create some random colors
color = np.random.randint(0,255,(100,3))
# Take first frame and find corners in it
ret, old_frame = cap.read()
old_gray = cv.cvtColor(old_frame, cv.COLOR_BGR2GRAY)
p0 = cv.goodFeaturesToTrack(old_gray, mask = None, **feature_params)
# Create a mask image for drawing purposes
mask = np.zeros_like(old_frame)
while(1):
    ret,frame = cap.read()
    frame_gray = cv.cvtColor(frame, cv.COLOR_BGR2GRAY)
    # calculate optical flow
    p1, st, err = cv.calcOpticalFlowPyrLK(old_gray, frame_gray, p0, None, **lk_params)
    # Select good points
    good_new = p1[st==1]
    good_old = p0[st==1]
    # draw the tracks
    for i,(new,old) in enumerate(zip(good_new, good_old)):
        a,b = new.ravel()
        c,d = old.ravel()
        mask = cv.line(mask, (a,b),(c,d), color[i].tolist(), 2)
        frame = cv.circle(frame,(a,b),5,color[i].tolist(),-1)
    img = cv.add(frame,mask)
    cv.flip(img, flipCode = -1)
    cv.imshow('frame',img)
    k = cv.waitKey(30) & 0xff
    if k == 27:
        break
    # Now update the previous frame and previous points
    old_gray = frame_gray.copy()
    p0 = good_new.reshape(-1,1,2)
cv.destroyAllWindows()
cap.release()

### Dense Optical Flow using Gunner Farneback's algorithm

In [None]:
import numpy as np
import cv2 as cv
cap = cv.VideoCapture(0)
ret, frame1 = cap.read()
prvs = cv.cvtColor(frame1,cv.COLOR_BGR2GRAY)
hsv = np.zeros_like(frame1)
hsv[...,1] = 255
while(1):
    ret, frame2 = cap.read()
    next = cv.cvtColor(frame2,cv.COLOR_BGR2GRAY)
    flow = cv.calcOpticalFlowFarneback(prvs,next, None, 0.5, 3, 15, 3, 5, 1.2, 0)
    mag, ang = cv.cartToPolar(flow[...,0], flow[...,1])
    hsv[...,0] = ang*180/np.pi/2
    hsv[...,2] = cv.normalize(mag,None,0,255,cv.NORM_MINMAX)
    bgr = cv.cvtColor(hsv,cv.COLOR_HSV2BGR)
    cv.imshow('frame2',bgr)
    k = cv.waitKey(30) & 0xff
    if k == 27:
        break
    elif k == ord('s'):
        cv.imwrite('opticalfb.png',frame2)
        cv.imwrite('opticalhsv.png',bgr)
    prvs = next
cv.destroyAllWindows()
cap.release()

### Depth map from stereo images
A disparity map refers to the apparent pixel difference or motion between two stereo images

In [None]:
import numpy as np
import cv2 as cv
from matplotlib import pyplot as plt

imgL = cv.imread('tsukuba_l.png',0)
imgR = cv.imread('tsukuba_r.png',0)
stereo = cv.StereoBM_create(numDisparities=16, blockSize=15)
disparity = stereo.compute(imgL,imgR)
plt.imshow(disparity,'gray')
plt.show()