# Contents
1. [Telegram Send Alert](#Telegram-Send-Alert)
2. [Postprocessing](#Prediction-and-PostProcessing)

# Telegram Send Alert

In [31]:
def send_alert(filename, alert_type, method='photo'):   # 'photo', 'document', animation
    with open('bot_cred.json','r') as json_file:
        bot_creds = json.load(json_file)
        
    files = {method:open(filename, 'rb')}
    
    resp = requests.post('https://api.telegram.org/bot' + bot_creds['bot_token'] + \
        '/send'+ method + '?chat_id=' + bot_creds['bot_chatID'] + '&caption=' + alert_type, files=files)

    return resp.status_code

In [32]:
import json
import requests

send_alert('cheers.gif', 'Testing')

200

# Prediction and PostProcessing

In [9]:
# Load Model

import tensorflow as tf
model = tf.keras.models.load_model('files/YoloV3.h5')



In [2]:
# Image preprocess function

import numpy as np

def preprocess_image(image, target_size=(416,416)):
    '''
    Processes the given image to feed the model(Rescaling by factor 1./255 and
    resizing to target_shape)

    Arguments:
    filename -- Filepath of the image to be processed
    target_shape -- Shape of the image accepted by the model(Width, Height)

    Returns:
    image -- Array of the resized input image to target_size
    width -- Original width of the image
    height -- Original height of the image
    '''
    
    original_height, original_width,_ = image.shape

    image = tf.image.resize(image, target_size, method='nearest')    # Resizes the image    
    image /= 255  # Normalization
    image = np.expand_dims(image, 0)    # To make the image a batch of images with a single image. Output shape=(batch_size, height, width, channels)

    return image, original_width, original_height

In [118]:
# filename = 'image1.jpg'
#img = cv2.imread(filename)
image_array, original_width, original_height = preprocess_image(frame)

In [3]:
'''decode_netout() will take each one of the NumPy arrays, one at a time, and
decode the candidate bounding boxes and class predictions. Further, any bounding
boxes that don’t confidently describe an object (e.g. all class probabilities
are below a threshold) are ignored. We will use a probability of 60% or 0.6. The
function returns a list of BoundBox instances that define the corners of each 
bounding box in the context of the input image shape and class probabilities.'''

'''def _sigmoid(x):
	return 1. / (1. + np.exp(-x))
 '''

class BoundBox:
    def __init__(self, xmin, ymin, xmax, ymax, objness = None, classes = None):
        self.xmin = xmin
        self.ymin = ymin
        self.xmax = xmax
        self.ymax = ymax
        self.objness = objness
        self.classes = classes
        self.label = -1
        self.score = -1

    def get_label(self):
        if self.label == -1:
            self.label = np.argmax(self.classes)
        return self.label

    def get_score(self):
        if self.score == -1:
            self.score = self.classes[self.get_label()]
        return self.score
        

def decode_netout(netout, anchors, obj_threshold, net_w=416, net_h=416):
    grid_h, grid_w = netout.shape[:2] # Number of Grid divisions
    nb_box = 3 # Number of anchor boxes
    netout = netout.reshape((grid_h, grid_w, nb_box, -1)) # Convert (13,13,225) to (13,13,3,85)
    nb_classes = netout.shape[-1] - 5 # 5 dedected due to pc, bx, by, bh, bw
    boxes = []

    netout[..., :2] = tf.math.sigmoid(netout[..., :2])
    netout[..., 4:] = tf.math.sigmoid(netout[..., 4:])
    netout[..., 5:] = netout[..., 4][..., np.newaxis] * netout[..., 5:]
    netout[..., 5:] *= netout[..., 5:] > obj_threshold

    for i in range(grid_h*grid_w):
        row = i / grid_w
        col = i % grid_w

        for b in range(nb_box):
            objectness = netout[int(row)][int(col)][b][4]
            if objectness.all() <= obj_threshold:
                continue
            x, y, w, h = netout[int(row)][int(col)][b][:4]
            x = (col + x) / grid_w
            y = (row + y) / grid_h
            w = anchors[2 * b + 0] * np.exp(w) / net_w
            h = anchors[2 * b + 1] * np.exp(h) / net_h

            classes = netout[int(row)][col][b][5:]
            box = BoundBox(x-w/2, y-h/2, x+w/2, y+h/2, objectness, classes)
            boxes.append(box)
    return boxes

In [120]:
yhat = model.predict(image_array)

In [4]:
# Define the anchor boxes
anchors = [[116,90, 156,198, 373,326], [30,61, 62,45, 59,119], [10,13, 16,30, 33,23]]
model_input_shape = (416, 416)

# Define the probability threshold for detected objects
class_threshold = 0.6


'''We need a list of strings containing the class labels known to the model
 in the correct order used during training, specifically those class labels
  from the MSCOCO dataset. Thankfully, this is provided in the experiencor
  script.'''

# define the labels
labels = ["person", "bicycle", "car", "motorbike", "aeroplane", "bus", "train", "truck",
    "boat", "traffic light", "fire hydrant", "stop sign", "parking meter", "bench",
    "bird", "cat", "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe",
    "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard",
    "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard",
    "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana",
    "apple", "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake",
    "chair", "sofa", "pottedplant", "bed", "diningtable", "toilet", "tvmonitor", "laptop", "mouse",
    "remote", "keyboard", "cell phone", "microwave", "oven", "toaster", "sink", "refrigerator",
    "book", "clock", "vase", "scissors", "teddy bear", "hair drier", "toothbrush"]

In [122]:
boxes = list()
for i in range(len(yhat)):
    boxes += decode_netout(yhat[i][0],
                            anchors[i],
                            class_threshold,
                            model_input_shape[0],
                            model_input_shape[1]
                            )
print(boxes[0].xmin, boxes[10].ymax, len(boxes))

-0.20888684174189198 0.29292368478852615 10647


In [123]:
count = 0
for i in range(10647):
    if boxes[i].objness > 0.5:
        count += 1
print(count)

7


In [5]:
# To rescale the bboxes to fit on the objects of the original image

def rescale_yolo_boxes(boxes, image_w, image_h, net_h, net_w):
    new_w, new_h = net_w, net_h
    for i in range(len(boxes)):
        x_offset, x_scale = (net_w - new_w)/2./net_w, float(new_w)/net_w
        y_offset, y_scale = (net_h - new_h)/2./net_h, float(new_h)/net_h
        boxes[i].xmin = int((boxes[i].xmin - x_offset) / x_scale * image_w)
        boxes[i].xmax = int((boxes[i].xmax - x_offset) / x_scale * image_w)
        boxes[i].ymin = int((boxes[i].ymin - y_offset) / y_scale * image_h)
        boxes[i].ymax = int((boxes[i].ymax - y_offset) / y_scale * image_h)

In [125]:
rescale_yolo_boxes(boxes, original_width, original_height, model_input_shape[0], model_input_shape[1])

print(boxes[0].xmin, boxes[10].ymax)

-133 140


In [6]:
def _interval_overlap(interval_a, interval_b):
	x1, x2 = interval_a
	x3, x4 = interval_b
	if x3 < x1:
		if x4 < x1:
			return 0
		else:
			return min(x2,x4) - x1
	else:
		if x2 < x3:
			 return 0
		else:
			return min(x2,x4) - x3
 
def bbox_iou(box1, box2):
	intersect_w = _interval_overlap([box1.xmin, box1.xmax], [box2.xmin, box2.xmax])
	intersect_h = _interval_overlap([box1.ymin, box1.ymax], [box2.ymin, box2.ymax])
	intersect = intersect_w * intersect_h
	w1, h1 = box1.xmax-box1.xmin, box1.ymax-box1.ymin
	w2, h2 = box2.xmax-box2.xmin, box2.ymax-box2.ymin
	union = w1*h1 + w2*h2 - intersect
	return float(intersect) / union

def do_nms(boxes, nms_threshold):
    if len(boxes) > 0:
        nb_classes = len(boxes[0].classes)
    else:
        return

    for c in range(nb_classes):
        sorted_indices = np.argsort([-box.classes[c] for box in boxes])
        for i in range(len(sorted_indices)):
            index_i = sorted_indices[i]
            if boxes[index_i].classes[c] == 0:
                continue
            for j in range(i+1, len(sorted_indices)):
                index_j = sorted_indices[j]
                if bbox_iou(boxes[index_i], boxes[index_j]) >= nms_threshold:
                    boxes[index_j].classes[c] = 0

In [127]:
do_nms(boxes, 0.5)
print(len(boxes),boxes[0].xmin, boxes[10].ymax)

10647 -133 140


In [128]:
count = 0
for i in range(10647):
    if boxes[i].objness > 0.5:
        count += 1
print(count)

7


In [7]:
def filter_boxes(boxes, labels, threshold=0.6):
    v_boxes, v_labels, v_scores = list(), list(), list()
    
    for box in boxes:
        for i in range(len(labels)):
            if box.classes[i] > threshold:
                v_boxes.append(box)
                v_labels.append(labels[i])
                v_scores.append(box.classes[i]*100)
    return v_boxes, v_labels, v_scores

In [130]:
v_boxes, v_labels, v_scores = filter_boxes(boxes, labels, class_threshold)


# summarize what we found
for i in range(len(v_boxes)):
	print(v_labels[i], v_scores[i])

person 98.77431988716125


In [131]:
import cv2

for i in range(len(v_boxes)):
    box = v_boxes[i]
    y1, x1, y2, x2 = box.ymin, box.xmin, box.ymax, box.xmax
    cv2.rectangle(frame, (x1, y1), (x2, y2), (0,255,0), 1)

In [132]:
cv2.imshow('Image', frame)    # Read Image

k = cv2.waitKey(0)
if k == 27:
    cv2.destroyAllWindows()

In [34]:
import cv2
cap = cv2.VideoCapture(0)
count = 0
while cap.isOpened():
    ret, frame = cap.read()

    img_array, original_width, original_height = preprocess_image(frame)
    yhat = model.predict(img_array)
    
    boxes = list()
    for i in range(len(yhat)):
        boxes += decode_netout(yhat[i][0],
                               anchors[i],
                               class_threshold,
                               model_input_shape[0],
                               model_input_shape[1]
                            )

    rescale_yolo_boxes(boxes, original_width, original_height, model_input_shape[0], model_input_shape[1])

    do_nms(boxes, 0.5)

    v_boxes, v_labels, v_scores = filter_boxes(boxes, labels, class_threshold)

    # summarize what we found
    for i in range(len(v_boxes)):
        box = v_boxes[i]
        xmin, ymin, xmax, ymax = box.xmin, box.ymin, box.xmax, box.ymax
        cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), (0,255,0), 1)
        print(v_labels[i], v_scores[i])

    cv2.imshow('Video', frame)
    count += 1
    k = cv2.waitKey(1)
    if k == 27:
        break

cap.release()
cv2.destroyAllWindows()

## Time taken

|After Fn call       | Frames processed/10sec|
|--------------------|---------------|
|yhat:               |72frames(10.9s)|
|Decode_netout:      |17frames(10.6s)|
|rescale_yolo_boxes: |15frames(10.5s)|
|do_nms:             |03frames(11.9s)|
|filter boxes:       |02frames(11.6s)|
|final:              |02frames(11.7s)|

# References:
1. https://www.youtube.com/watch?v=NYT1KFE1X2o&ab_channel=VikasJha
2. https://github.com/experiencor/keras-yolo3
3. https://machinelearningmastery.com/how-to-perform-object-detection-with-yolov3-in-keras/
4. https://www.tensorflow.org/install/gpu