<a href="https://colab.research.google.com/github/mawhy/OpenCV/blob/master/Object_Detection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Image Processing CookBook
## Object Detection


In [0]:
!git clone https://github.com/PacktPublishing/Python-Image-Processing-Cookbook.git
%cp -av "/content/Python-Image-Processing-Cookbook/Chapter 08/images/" "/content/"
%cp -av "/content/Python-Image-Processing-Cookbook/Chapter 08/models/" "/content/"
%rm -rf "/content/Python-Image-Processing-Cookbook"

### People Detection with HOG/SVM

In [0]:
import numpy as np
import cv2
import matplotlib.pylab as plt

img = cv2.imread("images/walk.png")
# create HOG descriptor using default people (pedestrian) detector
hog = cv2.HOGDescriptor()
hog.setSVMDetector(cv2.HOGDescriptor_getDefaultPeopleDetector())
# run detection, using a spatial stride of 4 pixels (horizontal and vertical), a scale stride of 1.02, and zero grouping of rectangles (to
# demonstrate that HOG will detect at potentially multiple places in the scale pyramid)
(found_bounding_boxes, weights) = hog.detectMultiScale(img, winStride=(4, 4), padding=(8, 8), scale=1.1, finalThreshold=0)
print(len(found_bounding_boxes)) # number of boundingboxes
# 314
# copy the original image to draw bounding boxes on it for now, as we'll use it again later
img_with_waw_bboxes = img.copy()
for (hx, hy, hw, hh) in found_bounding_boxes:
    cv2.rectangle(img_with_waw_bboxes, (hx, hy), (hx + hw, hy + hh), (0, 0, 255), 2)
plt.figure(figsize=(20, 12))
img_with_waw_bboxes = cv2.cvtColor(img_with_waw_bboxes, cv2.COLOR_BGR2RGB)
plt.imshow(img_with_waw_bboxes, aspect='auto'), plt.axis('off')
plt.title('Boundingboxes found by HOG-SVM without grouping', size=20)
plt.show()

#https://gist.github.com/CMCDragonkai/1be3402e261d3c239a307a3346360506
def non_max_suppression(boxes, scores, threshold):	
    assert boxes.shape[0] == scores.shape[0]
    # bottom-left origin
    ys1 = boxes[:, 0]
    xs1 = boxes[:, 1]
    # top-right target
    ys2 = boxes[:, 2]
    xs2 = boxes[:, 3]
    # box coordinate ranges are inclusive-inclusive
    areas = (ys2 - ys1) * (xs2 - xs1)
    scores_indexes = scores.argsort().tolist()
    boxes_keep_index = []
    while len(scores_indexes):
        index = scores_indexes.pop()
        boxes_keep_index.append(index)
        if not len(scores_indexes):
            break
        ious = compute_iou(boxes[index], boxes[scores_indexes], areas[index],
                           areas[scores_indexes])
        filtered_indexes = set((ious > threshold).nonzero()[0])
        # if there are no more scores_index
        # then we should pop it
        scores_indexes = [
            v for (i, v) in enumerate(scores_indexes)
            if i not in filtered_indexes
        ]
    return np.array(boxes_keep_index)


def compute_iou(box, boxes, box_area, boxes_area):
    # this is the iou of the box against all other boxes
    assert boxes.shape[0] == boxes_area.shape[0]
    # get all the origin-ys
    # push up all the lower origin-xs, while keeping the higher origin-xs
    ys1 = np.maximum(box[0], boxes[:, 0])
    # get all the origin-xs
    # push right all the lower origin-xs, while keeping higher origin-xs
    xs1 = np.maximum(box[1], boxes[:, 1])
    # get all the target-ys
    # pull down all the higher target-ys, while keeping lower origin-ys
    ys2 = np.minimum(box[2], boxes[:, 2])
    # get all the target-xs
    # pull left all the higher target-xs, while keeping lower target-xs
    xs2 = np.minimum(box[3], boxes[:, 3])
    # each intersection area is calculated by the  pulled target-x minus the pushed origin-x
    # multiplying pulled target-y minus the pushed origin-y 
    # we ignore areas where the intersection side would be negative
    # this is done by using maxing the side length by 0
    intersections = np.maximum(ys2 - ys1, 0) * np.maximum(xs2 - xs1, 0)
    # each union is then the box area added to each other box area minusing their intersection calculated above
    unions = box_area + boxes_area - intersections
    # element wise division
    # if the intersection is 0, then their ratio is 0
    ious = intersections / unions
    return ious

(found_bounding_boxes, weights) = hog.detectMultiScale(img, winStride=(4, 4), padding=(8, 8), scale=1.1, finalThreshold=0)
print(len(found_bounding_boxes)) # number of boundingboxes
# 70
found_bounding_boxes[:,2] = found_bounding_boxes[:,0] + found_bounding_boxes[:,2]
found_bounding_boxes[:,3] = found_bounding_boxes[:,1] + found_bounding_boxes[:,3]
boxIndices = non_max_suppression(found_bounding_boxes, weights.ravel(), threshold=0.2)
found_bounding_boxes = found_bounding_boxes[boxIndices,:]
found_bounding_boxes[:,2] = found_bounding_boxes[:,2] - found_bounding_boxes[:,0]
found_bounding_boxes[:,3] = found_bounding_boxes[:,3] - found_bounding_boxes[:,1]
print(len(found_bounding_boxes)) # number of boundingboxes
# 4
# copy the original image to draw bounding boxes on it for now, as we'll use it again later
img_with_waw_bboxes = img.copy()
for (hx, hy, hw, hh) in found_bounding_boxes:
    cv2.rectangle(img_with_waw_bboxes, (hx, hy), (hx + hw, hy + hh), (0, 0, 255), 1)
plt.figure(figsize=(20, 12))
img_with_waw_bboxes = cv2.cvtColor(img_with_waw_bboxes, cv2.COLOR_BGR2RGB)
plt.imshow(img_with_waw_bboxes, aspect='auto'), plt.axis('off')
plt.title('Boundingboxes found by HOG-SVM after non-max-suppression', size=20)
plt.show()

# with meanshiftgrouping to get rid of multiple detections of the same object
(found_bounding_boxes, weights) = hog.detectMultiScale(img, winStride=(4, 4), padding=(8, 8), scale=1.01, useMeanshiftGrouping=True)
print(len(found_bounding_boxes)) # number of boundingboxes
# 3
# copy the original image to draw bounding boxes on it for now, as we'll use it again later
img_with_waw_bboxes = img.copy()
for (hx, hy, hw, hh) in found_bounding_boxes:
    cv2.rectangle(img_with_waw_bboxes, (hx, hy), (hx + hw, hy + hh), (0, 0, 255), 1)
plt.figure(figsize=(20, 12))
img_with_waw_bboxes = cv2.cvtColor(img_with_waw_bboxes, cv2.COLOR_BGR2RGB)
plt.imshow(img_with_waw_bboxes, aspect='auto'), plt.axis('off')
plt.title('Boundingboxes found by HOG-SVM with meanshift grouping', size=20)
plt.show()

### Object Detection with Yolo V3 (OpenCV-python)
Very long time of downloading

In [0]:
!wget https://pjreddie.com/media/files/yolov3.weights

In [0]:
# https://pjreddie.com/darknet/yolo/
import cv2
import numpy as np
import matplotlib.pylab as plt
from PIL import Image, ImageDraw, ImageFont
import colorsys
from random import shuffle

# Initialize the parameters
conf_threshold = 0.5  #Confidence threshold
nms_threshold = 0.4   #Non-maximum suppression threshold
width = 416       #Width of network's input image
height = 416      #Height of network's input image

# Load names of classes
classes_file = "models/yolov3/coco_classes.txt";
classes = None
with open(classes_file, 'rt') as f:
    classes = f.read().rstrip('\n').split('\n')

# Give the configuration and weight files for the model and load the network using them.
model_configuration = "models/yolov3/yolov3.cfg"
model_weights = "yolov3.weights"

net = cv2.dnn.readNetFromDarknet(model_configuration, model_weights)
net.setPreferableBackend(cv2.dnn.DNN_BACKEND_OPENCV)
net.setPreferableTarget(cv2.dnn.DNN_TARGET_CPU)

# Get the names of the output layers
def get_output_layers(net):
    # Get the names of all the layers in the network
    layersNames = net.getLayerNames()
    # Get the names of the output layers, i.e. the layers with unconnected outputs
    return [layersNames[i[0] - 1] for i in net.getUnconnectedOutLayers()]

# Draw the predicted bounding box
def draw_boxes(img, class_id, conf, left, top, right, bottom):
    # Draw a bounding box.
    label = "{}: {:.2f}%".format(classes[class_id], conf * 100)
    color = tuple([int(255*x) for x in colors[class_id]])
    top = top - 15 if top - 15 > 15 else top + 15
    pil_im = Image.fromarray(cv2.cvtColor(img,cv2.COLOR_BGR2RGB)) 
    thickness = (img.shape[0] + img.shape[1]) // 300
    font = ImageFont.truetype("images/verdana.ttf", 25) 
    draw = ImageDraw.Draw(pil_im)  
    label_size = draw.textsize(label, font)
    if top - label_size[1] >= 0:
        text_origin = np.array([left, top - label_size[1]])
    else:
        text_origin = np.array([left, top + 1])
    for i in range(thickness):
        draw.rectangle([left + i, top + i, right - i, bottom - i], outline=color)
    draw.rectangle([tuple(text_origin), tuple(text_origin +  label_size)], fill=color)
    draw.text(text_origin, label, fill=(0, 0, 0), font=font)
    del draw
    img = cv2.cvtColor(np.array(pil_im), cv2.COLOR_RGB2BGR)  
    
    return img


# Remove the bounding boxes with low confidence using non-maxima suppression
def post_process(img, outs):
    heighteight = img.shape[0]
    widthidth = img.shape[1]

    class_ids = []
    confidences = []
    boxes = []
    # Scan through all the bounding boxes output from the network and keep only the
    # ones with high confidence scores. Assign the box's class label as the class with the highest score.
    class_ids = []
    confidences = []
    boxes = []
    for out in outs:
        for detection in out:
            scores = detection[5:]
            class_id = np.argmax(scores)
            confidence = scores[class_id]
            if confidence > conf_threshold:
                center_x = int(detection[0] * widthidth)
                center_y = int(detection[1] * heighteight)
                width = int(detection[2] * widthidth)
                height = int(detection[3] * heighteight)
                left = int(center_x - width / 2)
                top = int(center_y - height / 2)
                class_ids.append(class_id)
                confidences.append(float(confidence))
                boxes.append([left, top, width, height])

    # Perform non maximum suppression to eliminate redundant overlapping boxes with
    # lower confidences.
    indices = cv2.dnn.NMSBoxes(boxes, confidences, conf_threshold, nms_threshold)
    for i in indices:
        i = i[0]
        box = boxes[i]
        left = box[0]
        top = box[1]
        width = box[2]
        height = box[3]
        img = draw_boxes(img, class_ids[i], confidences[i], left, top, left + width, top + height)
        
    return img

hsv_tuples = [(x/len(classes), x/len(classes), 0.8) for x in range(len(classes))]
shuffle(hsv_tuples)
colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))

img = cv2.imread('images/mytable.png')

orig = np.copy(img)
# Create a 4D blob from a img.
blob = cv2.dnn.blobFromImage(img, 1/255, (width, height), [0,0,0], 1, crop=False)

# Sets the input to the network
net.setInput(blob)

# Runs the forward pass to get output of the output layers
outs = net.forward(get_output_layers(net))

# Remove the bounding boxes with low confidence
img = post_process(img, outs)

fig = plt.figure(figsize=(20,15))
plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB)), plt.axis('off'), plt.title('Objects detected with Yolo (v3)', size=20)
plt.show()

### Object Detection with Faster-RCNN (TensorFlow ResNet)

In [0]:
https://github.com/tensorflow/models/tree/master/research/object_detection

In [0]:
!wget http://download.tensorflow.org/models/object_detection/faster_rcnn_resnet101_coco_2018_01_28.tar.gz

In [0]:
!tar -xzvf faster_rcnn_resnet101_coco_2018_01_28.tar.gz

In [0]:
!wget https://github.com/JotJunior/PHP-Boleto-ZF2/raw/master/public/assets/fonts/arial.ttf

In [0]:
# https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/detection_model_zoo.md
%matplotlib inline
import numpy as np
import tensorflow as tf
import cv2
from PIL import Image, ImageFont, ImageDraw
import json    
import colorsys
import matplotlib.pylab as plt

print(tf.__version__)

# initialize the list of class labels MobileNet SSD was trained to
# detect, then generate a set of bounding box colors for each class
with open('models/faster_rcnn/image_info_test2017.json','r') as r:
    js = json.loads(r.read())
#js.keys()
labels = {i['id']:i['name'] for i in js['categories']}
print(labels)
print(len(labels))

hsv_tuples = [(x/len(labels), 0.8, 0.8) for x in range(len(labels))]
colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))
conf = 0.2

# Read and preprocess an image.
img = cv2.imread('images/road.png')

# Read the graph.
with tf.io.gfile.GFile('faster_rcnn_resnet101_coco_2018_01_28/frozen_inference_graph.pb', 'rb') as f:
    graph_def = tf.compat.v1.GraphDef() #tf.GraphDef()
    graph_def.ParseFromString(f.read())

with tf.compat.v1.Session() as sess: #tf.Session() as sess:
    # Restore session
    sess.graph.as_default()
    tf.import_graph_def(graph_def, name='')

    orig = np.copy(img)

    rows = img.shape[0]
    cols = img.shape[1]
    inp = cv2.resize(img, (300, 300))
    inp = inp[:, :, [2, 1, 0]]  # BGR2RGB

    # Run the model
    out = sess.run([sess.graph.get_tensor_by_name('num_detections:0'),
                    sess.graph.get_tensor_by_name('detection_scores:0'),
                    sess.graph.get_tensor_by_name('detection_boxes:0'),
                    sess.graph.get_tensor_by_name('detection_classes:0')],
                   feed_dict={'image_tensor:0': inp.reshape(1, inp.shape[0], inp.shape[1], 3)})


    #print(len(out))
    # Visualize detected bounding boxes.
    num_detections = int(out[0][0])
    print(num_detections)
    #print(out[1].shape) # prob
    #print(out[2].shape) # bounding box
    #print(out[3].shape) # class_id

    for i in range(num_detections):
        idx = int(out[3][0][i])
        #print(class_id)
        score = float(out[1][0][i])
        bbox = [float(v) for v in out[2][0][i]]
        if score > conf:
            x = bbox[1] * cols
            y = bbox[0] * rows
            right = bbox[3] * cols
            bottom = bbox[2] * rows

        # draw the prediction on the image
        label = "{}: {:.2f}%".format(labels[idx], score * 100)
        color = tuple([int(255*x) for x in colors[idx]])
        y = y - 15 if y - 15 > 15 else y + 15
        pil_im = Image.fromarray(cv2.cvtColor(img,cv2.COLOR_BGR2RGB)) 
        thickness = (img.shape[0] + img.shape[1]) // 300
        font = ImageFont.truetype("arial.ttf", 15) 
        draw = ImageDraw.Draw(pil_im)  
        label_size = draw.textsize(label, font)
        if y - label_size[1] >= 0:
            text_origin = np.array([x, y - label_size[1]])
        else:
            text_origin = np.array([x, y + 1])
        for i in range(thickness):
            draw.rectangle([x + i, y + i, right - i, bottom - i], outline=color)
        draw.rectangle([tuple(text_origin), tuple(text_origin +  label_size)], fill=color)
        draw.text(text_origin, label, fill=(0, 0, 0), font=font)
        del draw
        img = cv2.cvtColor(np.array(pil_im), cv2.COLOR_RGB2BGR)      

In [0]:
fig = plt.figure(figsize=(20,20))
plt.imshow(cv2.cvtColor(np.array(img), cv2.COLOR_BGR2RGB)), plt.axis('off'), plt.title('Objects detected with Faster-RCNN', size=25)
plt.show()

### Object Detection with Mask-RCNN

In [0]:
!wget http://download.tensorflow.org/models/object_detection/mask_rcnn_inception_v2_coco_2018_01_28.tar.gz

In [0]:
!tar -xzvf mask_rcnn_inception_v2_coco_2018_01_28.tar.gz

In [0]:
# http://download.tensorflow.org/models/object_detection/mask_rcnn_inception_v2_coco_2018_01_28.tar.gz
# https://github.com/opencv2/opencv2_extra/blob/master/testdata/dnn/download_models.py
# https://github.com/opencv2/opencv2_extra/tree/master/testdata/dnn
import cv2
import numpy as np
import os.path
import sys
import random
import matplotlib.pylab as plt

print(cv2.__version__)

# Initialize the parameters
conf_threshold = 0.5  # Confidence threshold
mask_threshold = 0.3  # Mask threshold

# Draw the predicted bounding box, colorize and show the mask on the image
def draw_box(img, class_id, conf, left, top, right, bottom, class_mask):
    # Draw a bounding box.
    cv2.rectangle(img, (left, top), (right, bottom), (255, 178, 50), 3)
    
    # Print a label of class.
    label = '%.2f' % conf
    if classes:
        assert(class_id < len(classes))
        label = '%s:%s' % (classes[class_id], label)
    
    # Display the label at the top of the bounding box
    label_size, baseline = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
    top = max(top, label_size[1])
    cv2.rectangle(img, (left, top - round(1.5*label_size[1])), (left + round(1.5*label_size[0]), top + baseline), (255, 255, 255), cv2.FILLED)
    cv2.putText(img, label, (left, top), cv2.FONT_HERSHEY_SIMPLEX, 0.75, (0,0,0), 1)

    # Resize the mask, threshold, color and apply it on the image
    class_mask = cv2.resize(class_mask, (right - left + 1, bottom - top + 1))
    mask = (class_mask > mask_threshold)
    roi = img[top:bottom+1, left:right+1][mask]

    # color = colors[class_id%len(colors)]
    # Comment the above line and uncomment the two lines below to generate different instance colors
    color_index = random.randint(0, len(colors)-1)
    color = colors[color_index]

    img[top:bottom+1, left:right+1][mask] = ([0.3*color[0], 0.3*color[1], 0.3*color[2]] + 0.7 * roi).astype(np.uint8)

    # Draw the contours on the image
    mask = mask.astype(np.uint8)
    #im2, 
    contours, hierarchy = cv2.findContours(mask,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
    cv2.drawContours(img[top:bottom+1, left:right+1], contours, -1, color, 3, cv2.LINE_8, hierarchy, 100)

# For each img, extract the bounding box and mask for each detected object
def post_process(boxes, masks):
    # Output size of masks is NxCxHxW where
    # N - number of detected boxes
    # C - number of classes (excluding background)
    # HxW - segmentation shape
    num_classes = masks.shape[1]
    num_detections = boxes.shape[2]

    height = img.shape[0]
    width = img.shape[1]

    for i in range(num_detections):
        box = boxes[0, 0, i]
        mask = masks[i]
        score = box[2]
        if score > conf_threshold:
            class_id = int(box[1])
            
            # Extract the bounding box
            left = int(width * box[3])
            top = int(height * box[4])
            right = int(width * box[5])
            bottom = int(height * box[6])
            
            left = max(0, min(left, width - 1))
            top = max(0, min(top, height - 1))
            right = max(0, min(right, width - 1))
            bottom = max(0, min(bottom, height - 1))
            
            # Extract the mask for the object
            class_mask = mask[class_id]

            # Draw bounding box, colorize and show the mask on the image
            draw_box(img, class_id, score, left, top, right, bottom, class_mask)


# Load names of classes
classesFile = "models/mask_rcnn/mscoco_labels.names";
classes = None
with open(classesFile, 'rt') as f:
   classes = f.read().rstrip('\n').split('\n')

# Give the textGraph and weight files for the model
textGraph = "models/mask_rcnn/mask_rcnn_inception_v2_coco_2018_01_28.pbtxt";
model_weights = "mask_rcnn_inception_v2_coco_2018_01_28/frozen_inference_graph.pb";

# Load the network
net = cv2.dnn.readNetFromTensorflow(model_weights, textGraph);
net.setPreferableBackend(cv2.dnn.DNN_BACKEND_OPENCV)
net.setPreferableTarget(cv2.dnn.DNN_TARGET_CPU)

# Load the classes
colors_file = "models/mask_rcnn/colors.txt";
with open(colors_file, 'rt') as f:
    colors_str = f.read().rstrip('\n').split('\n')
colors = [] #[0,0,0]
for i in range(len(colors_str)):
    rgb = colors_str[i].split(' ')
    color = np.array([float(rgb[0]), float(rgb[1]), float(rgb[2])])
    colors.append(color)

img = cv2.imread('images/road.png')
    
print(img.shape)

orig = np.copy(img)
#cv2.imwrite('Mask-RCNN/input/img_' + str(i).zfill(4) + '.jpg', orig)

# Create a 4D blob from a img.
blob = cv2.dnn.blobFromImage(img, swapRB=True, crop=False)

# Set the input to the network
net.setInput(blob)

# Run the forward pass to get output from the output layers
boxes, masks = net.forward(['detection_out_final', 'detection_masks'])

# Extract the bounding box and mask for each of the detected objects
post_process(boxes, masks)

# Put efficiency information.
t, _ = net.getPerfProfile()
#label = 'Mask-RCNN on 2.5 GHz Intel Core i7 CPU, Inference time for a img : %0.0f ms' % abs(t * 1000.0 / cv2.getTickFrequency())
#cv2.putText(img, label, (0, 15), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0))

fig = plt.figure(figsize=(20,20))
fig.subplots_adjust(left=0, right=1, bottom=0, top=1, hspace=0.05, wspace=0.05) 
plt.subplot(211), plt.imshow(cv2.cvtColor(orig, cv2.COLOR_BGR2RGB)), plt.axis('off'), plt.title('Original Image', size=20)
plt.subplot(212), plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB)), plt.axis('off'), plt.title('Objects detected with Mask-RCNN', size=20)
plt.show()

### Text Detection in images with Tesseract

In [0]:
!sudo apt install tesseract-ocr

In [0]:
!pip install pytesseract

In [0]:
!wget https://codeload.github.com/ZER-0-NE/EAST-Detector-for-text-detection-using-OpenCV/zip/master

In [0]:
!unzip master


In [0]:
# https://stackoverflow.com/questions/44619077/pytesseract-ocr-multiple-config-options
# https://codeload.github.com/ZER-0-NE/EAST-Detector-for-text-detection-using-OpenCV/zip/master
# import the necessary packages
from imutils.object_detection import non_max_suppression
import numpy as np
import pytesseract
import cv2

min_confidence = 0.5

def decode_predictions(scores, geometry):
    # grab the number of rows and columns from the scores volume, then
    # initialize our set of bounding box rectangles and corresponding
    # confidence scores
    (num_rows, num_cols) = scores.shape[2:4]
    rects = []
    confidences = []
 
    # loop over the number of rows
    for y in range(0, num_rows):
        # extract the scores (probabilities), followed by the
        # geometrical data used to derive potential bounding box
        # coordinates that surround text
        scores_data = scores[0, 0, y]
        x_data0 = geometry[0, 0, y]
        x_data1 = geometry[0, 1, y]
        x_data2 = geometry[0, 2, y]
        x_data3 = geometry[0, 3, y]
        angles_data = geometry[0, 4, y]
 
        # loop over the number of columns
        for x in range(0, num_cols):
            # if our score does not have sufficient probability,
            # ignore it
            if scores_data[x] < min_confidence:
                continue
 
            # compute the offset factor as our resulting feature
            # maps will be 4x smaller than the input image
            (offset_x, offset_y) = (x * 4.0, y * 4.0)
 
            # extract the rotation angle for the prediction and
            # then compute the sin and cosine
            angle = angles_data[x]
            cos = np.cos(angle)
            sin = np.sin(angle)
 
            # use the geometry volume to derive the width and height
            # of the bounding box
            h = x_data0[x] + x_data2[x]
            w = x_data1[x] + x_data3[x]
 
            # compute both the starting and ending (x, y)-coordinates
            # for the text prediction bounding box
            end_x = int(offset_x + (cos * x_data1[x]) + (sin * x_data2[x]))
            end_y = int(offset_y - (sin * x_data1[x]) + (cos * x_data2[x]))
            start_x = int(end_x - w)
            start_y = int(end_y - h)
 
            # add the bounding box coordinates and probability score
            # to our respective lists
            rects.append((start_x, start_y, end_x, end_y))
            confidences.append(scores_data[x])
 
    # return a tuple of the bounding boxes and associated confidences
    return (rects, confidences)

# load the input image and grab the image dimensions
im = 'images/book_cover.png'
image = cv2.imread(im)
orig = image.copy()
(origH, origW) = image.shape[:2]
 
# set the new width and height and then determine the ratio in change
# for both the width and height
width = height = 32*10 #320
(w, h) = (width, height)
rW = origW / float(w)
rH = origH / float(h)
 
# resize the image and grab the new image dimensions
image = cv2.resize(image, (w, h))
(H, W) = image.shape[:2]

# define the two output layer names for the EAST detector model that
# we are interested in -- the first is the output probabilities and the
# second can be used to derive the bounding box coordinates of text
layerNames = [
    "feature_fusion/Conv_7/Sigmoid",
    "feature_fusion/concat_3"]
 
# load the pre-trained EAST text detector
print("loading EAST text detector...")
net = cv2.dnn.readNet('EAST-Detector-for-text-detection-using-OpenCV-master/frozen_east_text_detection.pb')

# construct a blob from the image and then perform a forward pass of
# the model to obtain the two output layer sets
b, g, r = np.mean(image[...,0]), np.mean(image[...,1]), np.mean(image[...,2])
blob = cv2.dnn.blobFromImage(image, 1.0, (W, H), (b, g, r), swapRB=True, crop=False)
net.setInput(blob)
(scores, geometry) = net.forward(layerNames)
 
# decode the predictions, then  apply non-maxima suppression to
# suppress weak, overlapping bounding boxes
(rects, confidences) = decode_predictions(scores, geometry)
boxes = non_max_suppression(np.array(rects), probs=confidences)

padding = 0.001 #0.01 #0.5
# initialize the list of results
results = []
 
# loop over the bounding boxes
for (start_x, start_y, end_x, end_y) in boxes:
    # scale the bounding box coordinates based on the respective ratios
    start_x = int(start_x * rW)
    start_y = int(start_y * rH)
    end_x = int(end_x * rW)
    end_y = int(end_y * rH)

    # in order to obtain a better OCR of the text we can potentially
    # apply a bit of padding surrounding the bounding box -- here we
    # are computing the deltas in both the x and y directions
    dX = int((end_x - start_x) * padding)
    dY = int((end_y - start_y) * padding)

    # apply padding to each side of the bounding box, respectively
    start_x = max(0, start_x - dX*2)
    start_y = max(0, start_y - dY*2)
    end_x = min(origW, end_x + (dX * 2))
    end_y = min(origH, end_y + (dY * 2))

    # extract the actual padded ROI
    roi = orig[start_y:end_y, start_x:end_x]

    # in order to apply Tesseract v4 to OCR text we must supply
    # (1) a language, (2) an OEM flag of 4, indicating that the we
    # wish to use the LSTM neural net model for OCR, and finally
    # (3) an OEM value, in this case, 7 which implies that we are
    # treating the ROI as a single line of text
    config = ("-l eng --oem 1 --psm 11")
    text = pytesseract.image_to_string(roi, config=config)
    #print(text)

    # add the bounding box coordinates and OCR'd text to the list
    # of results
    results.append(((start_x, start_y, end_x, end_y), text))

    # sort the results bounding box coordinates from top to bottom
    results = sorted(results, key=lambda r:r[0][1])
 
print(len(results))
# loop over the results
output = orig.copy()
i = 1
for ((start_x, start_y, end_x, end_y), text) in results:
    # display the text OCR'd by Tesseract
    #print("OCR TEXT")
    #print("========")
    print(text)

    # strip out non-ASCII text so we can draw the text on the image
    # using OpenCV, then draw the text and a bounding box surrounding
    # the text region of the input image
    text = "".join([c if ord(c) < 128 else "" for c in text]).strip()
    cv2.rectangle(output, (start_x, start_y), (end_x, end_y), (0, 255, 0), 2)
    cv2.putText(output, text, (start_x, start_y - 20), cv2.FONT_HERSHEY_SIMPLEX, 1.2, (0, 255, 0), 3)

    # show the output image
    i += 1
cv2.imwrite("images/text_" + im.split('/')[-1], output)

![](images\text_book_cover.png)

### Multiple Object Tracking with opencv-python

In [0]:
!pip uninstall opecv-python 
!pip uninstall opencv-contrib-python
!pip install opencv-python==3.4.4.19
!pip install opencv-contrib-python==3.4.4.19

In [0]:
# https://www.youtube.com/watch?v=whXnYIgT4P0
# https://stackoverflow.com/questions/54013403/attribute-error-multitracker-create-not-found-in-cv2-on-raspberry-pi
#print(cv2.getBuildInformation())
#pip uninstall opecv-python 
#pip uninstall opencv-contrib-python
#pip install opencv-python==3.4.4.19
#pip install opencv-contrib-python==3.4.4.19
import time
import cv2
import matplotlib.pylab as plt
from imutils import resize

print(cv2.__version__)
# 3.4.4

# Create MultiTracker object
multi_tracker = cv2.MultiTracker_create()
 
# initialize the bounding box coordinates of the object (car) we are going to track
car_bbox = (141,175,45,29) 
car2_bbox = (295,170,55,39) 
bboxes = [car_bbox, car2_bbox]
colors = [(0, 255, 255), (255, 255, 0)]

vs = cv2.VideoCapture('images/road.mp4')

_, frame = vs.read()
frame = resize(frame, width=500)
print(frame.shape)

# start OpenCV object tracker using the supplied bounding box
# coordinates, then start the FPS throughput estimator as well
# tracker.init(frame, car_bbox)

for bbox in bboxes:
    multi_tracker.add(cv2.TrackerCSRT_create(), frame, bbox)

j = 0
fig = plt.figure(figsize=(20,55))
# loop over frames from the video stream
while True:
    # grab the current frame, then handle if we are using a
    # VideoStream or VideoCapture object
    vs.set(cv2.CAP_PROP_POS_MSEC,(j*300))    # added this line - 1 sec 3 frames
    _, frame = vs.read()
    
    # check to see if we have reached the end of the stream
    if frame is None or j//4 > 18:
        break

    # resize the frame (so we can process it faster) and grab the
    # frame dimensions
    frame = resize(frame, width=500)

    (H, W) = frame.shape[:2]
    # grab the new bounding box coordinates of the object
    #(success, box) = tracker.update(frame)

    # get updated location of objects in subsequent frames
    success, boxes = multi_tracker.update(frame)
 
    # check to see if the tracking was a success
    if success:
        # draw tracked objects
        for i, box in enumerate(boxes):
            p1 = (int(box[0]), int(box[1]))
            p2 = (int(box[0] + box[2]), int(box[1] + box[3]))
            cv2.rectangle(frame, p1, p2, colors[i], 2, 1)

    # show the output frame
    if j%4 == 0:
        plt.subplot(9,2,j//4+1)
        plt.imshow(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)), plt.axis('off'), plt.title('Frame {}'.format(j), size=20)

    j += 1

plt.suptitle('Tracking cars in a video with CSRT MultiTracker', size=30)
fig.subplots_adjust(left=0, right=1, bottom=0, top=0.95, hspace=0.05, wspace=0.05) 
plt.show()

### Face Detection with Viola Jones / Haar-like features and Adaboost Cascade

In [0]:
!pip uninstall opecv-python 
!pip uninstall opencv-contrib-python


In [0]:
!pip install opencv-python
!pip install opencv-contrib-python

In [0]:
# https://github.com/opencv/opencv/blob/master/data/haarcascades/haarcascade_smile.xml
import cv2
import numpy as np
import matplotlib.pylab as plt

face_cascade = cv2.CascadeClassifier('models/face_detect/haarcascade_frontalface_alt2.xml')
eye_cascade = cv2.CascadeClassifier('models/face_detect/haarcascade_eye.xml') # haarcascade_eye_tree_eyeglasses.xml
smile_cascade = cv2.CascadeClassifier('models/face_detect/haarcascade_smile.xml')

img = cv2.imread('images/all.png')
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
faces = face_cascade.detectMultiScale(gray, 1.01, 8) # scaleFactor=1.2, minNbr=5
print(len(faces)) # number of faces detected
for (x,y,w,h) in faces:
    img = cv2.rectangle(img,(x,y),(x+w,y+h),(255,0,0),2)
    roi_gray = gray[y:y+h, x:x+w]
    roi_color = img[y:y+h, x:x+w]
    eyes = eye_cascade.detectMultiScale(roi_gray, 1.04, 10)
    #print(eyes) # location of eyes detected
    for (ex,ey,ew,eh) in eyes:
        cv2.rectangle(roi_color,(ex,ey),(ex+ew,ey+eh),(0,255,0),2)
    smile = smile_cascade.detectMultiScale(roi_gray, 1.38, 6)
    for (mx,my,mw,mh) in smile:
        cv2.rectangle(roi_color,(mx,my),(mx+mw,my+mh),(0,0,255),2)
       
plt.figure(figsize=(15,20))
plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB)), plt.axis('off')
plt.tight_layout()
plt.show()

### Face Detection with dlib using HOG features

In [0]:
import cv2
import dlib
img = cv2.cvtColor(cv2.imread('images/all.png'), cv2.COLOR_BGR2RGB)
hog_detector = dlib.get_frontal_face_detector()
faces = hog_detector(img, 0)
print(len(faces)) # number of faces detected
for face in faces:
    l,t,r,b = face.left(), face.top(), face.right(), face.bottom()
    img = cv2.rectangle(img,(l,t),(r,b),(0,0,255),2)
plt.figure(figsize=(15,20))
plt.imshow(img), plt.axis('off')
plt.tight_layout()
plt.show()

### Face Detection with opencv-python Single Shot MultiBox Detector pretrained deeplearning model

In [0]:
!wget https://github.com/opencv/opencv_3rdparty/raw/8033c2bc31b3256f0d461c919ecc01c2428ca03b/opencv_face_detector_uint8.pb

In [0]:
# https://github.com/opencv/opencv_extra/blob/master/testdata/dnn/opencv_face_detector.pbtxt
# https://github.com/opencv/opencv_3rdparty/raw/8033c2bc31b3256f0d461c919ecc01c2428ca03b/opencv_face_detector_uint8.pb
import cv2

tf_model = "opencv_face_detector_uint8.pb"
tf_config = "models/face_detect/opencv_face_detector.pbtxt"
net = cv2.dnn.readNetFromTensorflow(tf_model, tf_config)

img = cv2.imread('images/beatles.png')
h, w = img.shape[:2]
print(h,w)
blob = cv2.dnn.blobFromImage(img, 1.0, (w, h), [104, 117, 123], False, False)
net.setInput(blob)
detections = net.forward()
print(detections.shape)
bboxes = []
threshold = 0.5
for i in range(detections.shape[2]):
    confidence = detections[0, 0, i, 2]
    if confidence > threshold:
        print(confidence)
        l = int(detections[0, 0, i, 3] * w)
        t = int(detections[0, 0, i, 4] * h)
        r = int(detections[0, 0, i, 5] * w)
        b = int(detections[0, 0, i, 6] * h)
        img = cv2.rectangle(img,(l,t),(r,b),(0,0,255),2)
plt.figure(figsize=(15,20))
plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB)), plt.axis('off')
plt.tight_layout()
plt.show()