# Video Monitoring

In [3]:
import cv2
import numpy as np
import time
import sys
import os
import torchvision
import torch
import pandas as pd
from torchvision import transforms
from PIL import Image
import os

In [13]:
def yolov3_model(row):
    
    CONFIDENCE = 0.5
    SCORE_THRESHOLD = 0.5
    IOU_THRESHOLD = 0.5
    config_path = "obj_detection/cfg/yolov3.cfg"
    weights_path = "obj_detection/weights/yolov3.weights"
    font_scale = 1
    thickness = 1
    labels = open("obj_detection/data/coco.names").read().strip().split("\n")
    colors = np.random.randint(0, 255, size=(len(labels), 3), dtype="uint8")

    net = cv2.dnn.readNetFromDarknet(config_path, weights_path)

    ln = net.getLayerNames()
    ln = [ln[i[0] - 1] for i in net.getUnconnectedOutLayers()]
    video_file = row[0]
    video_filename = os.path.join("obj_detection/videos/", video_file)
    cap = cv2.VideoCapture(video_filename)
    _, image = cap.read()
    directory = os.path.join('obj_detection/', 'image_slices/', video_file[:-4])
    if not os.path.exists(directory):
        os.makedirs(directory)
    h, w = image.shape[:2]
    fourcc = cv2.VideoWriter_fourcc(*"XVID")
    out_filename = os.path.join('obj_detection/video_outputs/', video_file[:-4] + "_output.avi")
    out = cv2.VideoWriter(out_filename, fourcc, 20.0, (w, h))
    images = np.array([])
    classes = []
    count = -1
    while True:
        count +=1
        success, image = cap.read()
        if not success:
            break
        output_image_path =  os.path.join(directory, '%d.png') % count        
        cv2.imwrite(output_image_path, image)
        images = np.append(images, output_image_path)
        h, w = image.shape[:2]
        blob = cv2.dnn.blobFromImage(image, 1/255.0, (416, 416), swapRB=True, crop=False)
        net.setInput(blob)
        start = time.perf_counter()
        layer_outputs = net.forward(ln)
        time_took = time.perf_counter() - start
#         print("Time took:", time_took)
        boxes, confidences, class_ids = [], [], []

        # loop over each of the layer outputs
        for output in layer_outputs:
            # loop over each of the object detections
            for detection in output:
                # extract the class id (label) and confidence (as a probability) of
                # the current object detection
                scores = detection[5:]
                class_id = np.argmax(scores)
                confidence = scores[class_id]
                # discard weak predictions by ensuring the detected
                # probability is greater than the minimum probability
                if confidence > CONFIDENCE:
                    # scale the bounding box coordinates back relative to the
                    # size of the image, keeping in mind that YOLO actually
                    # returns the center (x, y)-coordinates of the bounding
                    # box followed by the boxes' width and height
                    box = detection[:4] * np.array([w, h, w, h])
                    (centerX, centerY, width, height) = box.astype("int")

                    # use the center (x, y)-coordinates to derive the top and
                    # and left corner of the bounding box
                    x = int(centerX - (width / 2))
                    y = int(centerY - (height / 2))

                    # update our list of bounding box coordinates, confidences,
                    # and class IDs
                    boxes.append([x, y, int(width), int(height)])
                    confidences.append(float(confidence))
                    class_ids.append(class_id)

        # perform the non maximum suppression given the scores defined before
        idxs = cv2.dnn.NMSBoxes(boxes, confidences, SCORE_THRESHOLD, IOU_THRESHOLD)

        font_scale = 1
        thickness = 1

        # ensure at least one detection exists
        c_e = []
        if len(idxs) > 0:
            # loop over the indexes we are keeping
            for i in idxs.flatten():
                # extract the bounding box coordinates
                x, y = boxes[i][0], boxes[i][1]
                w, h = boxes[i][2], boxes[i][3]
                # draw a bounding box rectangle and label on the image
                color = [int(c) for c in colors[class_ids[i]]]
                cv2.rectangle(image, (x, y), (x + w, y + h), color=color, thickness=thickness)
                text = str(labels[class_ids[i]]) + ":" +str(confidences[i])

                c_e.append(labels[class_ids[i]])

                # calculate text width & height to draw the transparent boxes as background of the text
                (text_width, text_height) = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, fontScale=font_scale, thickness=thickness)[0]
                text_offset_x = x
                text_offset_y = y - 5
                box_coords = ((text_offset_x, text_offset_y), (text_offset_x + text_width + 2, text_offset_y - text_height))
                overlay = image.copy()
                cv2.rectangle(overlay, box_coords[0], box_coords[1], color=color, thickness=cv2.FILLED)
                # add opacity (transparency to the box)
                image = cv2.addWeighted(overlay, 0.6, image, 0.4, 0)
                # now put the text (label: confidence %)
                cv2.putText(image, text, (x, y - 5), cv2.FONT_HERSHEY_SIMPLEX,
                    fontScale=font_scale, color=(0, 0, 0), thickness=thickness)
        classes.append(c_e)

        out.write(image)
    #     cv2.imshow("image", image)

        if ord("q") == cv2.waitKey(1):
            break


    cap.release()
    cv2.destroyAllWindows()
    return out_filename, images, classes

In [5]:
# o_name, images, classes = yolov3_model("steph3.mp4")

In [48]:
def resnet_vehicle_model(row):
    """
    ResNet101 for image classification on vehicles
    """
    transform = transforms.Compose([
    transforms.Resize(256),                    
    transforms.CenterCrop(224),                
    transforms.ToTensor(),                     
    transforms.Normalize(                      
    mean=[0.485, 0.456, 0.406],                
    std=[0.229, 0.224, 0.225]                  
    )])
    
    
    resnet = torchvision.models.resnet101(pretrained=True)
    resnet.eval()
    img_2 = Image.open(img['image_slices'])
    img_t = transform(img_2)
    batch_t = torch.unsqueeze(img_t, 0)
    out = resnet(batch_t)
    _, indices = torch.sort(out, descending=True)
    percentage = torch.nn.functional.softmax(out, dim=1)[0] * 100
    p_2 = percentage.detach().numpy()
    return indices.detach().numpy()[0], p_2, p_2[indices[0][0]]


In [54]:
def resnet_people_model(img):
    """
    ResNet101 for image classification on people
    """
    transform = transforms.Compose([
    transforms.Resize(256),                    
    transforms.CenterCrop(224),                
    transforms.ToTensor(),                     
    transforms.Normalize(                      
    mean=[0.485, 0.456, 0.406],                
    std=[0.229, 0.224, 0.225]                  
    )])
    
    resnet = torchvision.models.resnet101(pretrained=True)
    resnet.eval()
    img_2 = Image.open(img['image_slices'])
    img_t = transform(img_2)
    batch_t = torch.unsqueeze(img_t, 0)
    out = resnet(batch_t)
    _, indices = torch.sort(out, descending=True)
    percentage = torch.nn.functional.softmax(out, dim=1)[0] * 100
    p_2 = percentage.detach().numpy()
    return indices.detach().numpy()[0], p_2, classes[indices[0][0]]


In [34]:
def expand_images(row):
    """
    Function to get correct image
    """
    return row[1][row.name]

In [45]:
def person_mask(row):
    """
    Creating a mask for images with people
    """
    if "person" in row['classes']:
        return True
    return False

In [57]:
def vehicle_mask(row):
    """
    Creating a mask for images with vehicles
    """
    if "vehicle" in row['classes']:
        return True
    return False

In [9]:
#Creating the dataframe, only had 1 video.. will test with more later
with open('imagenet_classes.txt') as f:
    classes = [line.strip() for line in f.readlines()]

x= np.array([])
for root, dirs, files in os.walk("obj_detection/videos/"):
    for filename in files:
        x = np.append(x, filename)
df = pd.DataFrame(data=x, columns=["videos"])
df

Unnamed: 0,videos
0,steph3.mp4


In [58]:
#Calling all the functions
yolo_preds = df.apply(yolov3_model, axis=1, result_type="expand") \
                .rename(columns={0: "out_file", 1: "image_slices", 2: "classes"}) \
                .explode('classes') \
                .reset_index() \
                .drop('index', axis=1)
image_slices = yolo_preds.apply(expand_images, axis=1).rename('image_slices')
final_preds = yolo_preds.drop('image_slices', axis=1) \
                        .join(image_slices)
    
p_mask = final_preds.apply(person_mask, axis=1)
v_mask = final_preds.apply(vehicle_mask, axis=1)
p_predictions = final_preds[p_mask].apply(resnet_people_model, axis=1, result_type="expand") \
                                .rename(columns={0: "resnet_indices", 1: "resnet_percentage", 2: "resnet_prediction"})
people_predictions = final_preds.join(p_predictions)
people_predictions

Unnamed: 0,out_file,classes,image_slices,resnet_indices,resnet_percentage,resnet_prediction
0,obj_detection/video_outputs/steph3_output.avi,"[person, person, person, person, person, perso...",obj_detection/image_slices/steph3/0.png,"[430, 890, 416, 602, 752, 702, 880, 733, 646, ...","[2.1241775e-07, 4.8890456e-07, 1.0058557e-05, ...",basketball
1,obj_detection/video_outputs/steph3_output.avi,"[person, person, person, person, person, perso...",obj_detection/image_slices/steph3/1.png,"[430, 890, 416, 602, 752, 702, 880, 733, 865, ...","[2.414549e-07, 4.7873175e-07, 1.8527493e-05, 3...",basketball
2,obj_detection/video_outputs/steph3_output.avi,"[person, person, person, person, person, perso...",obj_detection/image_slices/steph3/2.png,"[430, 890, 416, 602, 880, 752, 702, 646, 433, ...","[1.5643454e-07, 3.0740824e-07, 9.687183e-06, 2...",basketball
3,obj_detection/video_outputs/steph3_output.avi,"[person, person, person, person, person, perso...",obj_detection/image_slices/steph3/3.png,"[430, 890, 416, 602, 880, 615, 917, 865, 646, ...","[7.4505216e-08, 2.1469914e-07, 5.332123e-06, 1...",basketball
4,obj_detection/video_outputs/steph3_output.avi,"[person, person, person, person, person, perso...",obj_detection/image_slices/steph3/4.png,"[430, 890, 416, 602, 646, 880, 702, 865, 917, ...","[2.3167996e-07, 7.5728695e-07, 1.3656507e-05, ...",basketball
5,obj_detection/video_outputs/steph3_output.avi,"[person, person, person, person, person, perso...",obj_detection/image_slices/steph3/5.png,"[430, 890, 416, 602, 702, 880, 865, 646, 615, ...","[3.8719725e-07, 1.6263414e-06, 2.0617283e-05, ...",basketball
6,obj_detection/video_outputs/steph3_output.avi,"[person, person, person, person, person, perso...",obj_detection/image_slices/steph3/6.png,"[430, 890, 416, 646, 602, 702, 880, 615, 433, ...","[3.349366e-07, 8.441289e-07, 1.5179643e-05, 5....",basketball
7,obj_detection/video_outputs/steph3_output.avi,"[person, person, person, person, person, perso...",obj_detection/image_slices/steph3/7.png,"[430, 890, 416, 646, 602, 615, 880, 865, 702, ...","[2.4716104e-07, 7.417926e-07, 1.0703387e-05, 3...",basketball
8,obj_detection/video_outputs/steph3_output.avi,"[person, person, person, person, person, perso...",obj_detection/image_slices/steph3/8.png,"[430, 890, 416, 646, 602, 880, 615, 702, 433, ...","[4.1076146e-07, 1.0817357e-06, 2.3134307e-05, ...",basketball
9,obj_detection/video_outputs/steph3_output.avi,"[person, person, person, person, person, perso...",obj_detection/image_slices/steph3/9.png,"[430, 890, 416, 602, 880, 752, 702, 615, 646, ...","[8.697465e-07, 1.9054202e-06, 7.648115e-05, 1....",basketball
