### Object detection on images

In [185]:
### import necessary libraries
import cv2
import numpy as np
import matplotlib.pyplot as plt

### model paths
coco_data_path = '/home/pallav/yolo/models/coco.names'
yolo_cfg_path = '/home/pallav/yolo/models/yolov3.cfg'
yolo_wts_path = '/home/pallav/yolo/models/yolov3.weights'

### image path
img_path = '/home/pallav/Downloads/02.jpg'


### read classnames from coco.names file
LABELS = open(coco_data_path).read().strip().split('\n')

## initialize a list of colors to represent each possible class label
np.random.seed(42)
COLORS = np.random.randint(0, 255, size=(len(LABELS), 3), dtype="uint8")

### load the object detector
net = cv2.dnn.readNetFromDarknet(yolo_cfg_path, yolo_wts_path)
ln = net.getLayerNames()
ln = [ln[i - 1] for i in net.getUnconnectedOutLayers()]

### read frame of video for object detection
frame = cv2.imread(img_path)
(H, W) = frame.shape[:2]

# YOLO object detector - obtain bounding boxes and probabilities
blob = cv2.dnn.blobFromImage(frame, 1 / 255.0, (416, 416), swapRB=True, crop=False)
net.setInput(blob)
layerOutputs = net.forward(ln)

# initialize our lists of detected bounding boxes, confidences, and class ID
boxes = []
confidences = []
classIDs = []

# loop over each of the layer outputs
for output in layerOutputs:
    # loop over each of the detections
    for detection in output:
        # extract the class ID and confidence (i.e., probability)
        # of the current object detection
        scores = detection[5:]
        classID = np.argmax(scores)
        confidence = scores[classID]
        if confidence > 0.5:
            # scale the bounding box coordinates back relative to the size of the image
            # YOLO model returns the center (x, y)-coordinates of
            # the bounding box followed by the boxes' width and height
            box = detection[0:4] * np.array([W, H, W, H])
            (centerX, centerY, width, height) = box.astype("int")
            
            # use the center (x, y)-coordinates to derive the top and and left corner of the bounding box
            x = int(centerX - (width / 2))
            y = int(centerY - (height / 2))
            
            # update our list of bounding box coordinates, confidences, and class IDs
            boxes.append([x, y, int(width), int(height)])
            confidences.append(float(confidence))
            classIDs.append(classID)
            
# apply non-maxima suppression to suppress weak, overlapping bounding boxes
idxs = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.3)

#  ensure at least one detection exists
if len(idxs) > 0:
    # loop over the indexes 
    for i in idxs.flatten():
        # extract the bounding box coordinates
        (x, y) = (boxes[i][0], boxes[i][1])
        (w, h) = (boxes[i][2], boxes[i][3])
        # draw a bounding box rectangle and label on the frame
        color = [int(c) for c in COLORS[classIDs[i]]]
        cv2.rectangle(frame, (x, y), (x + w, y + h), color, 2)
        text = "{}: {:.4f}".format(LABELS[classIDs[i]], confidences[i])
#         print the labels and confidence
#         print(video_name + f"_frame_{count}.jpg")
        print(f"{LABELS[classIDs[i]]} -> {str(confidences[i])}")
        cv2.putText(frame, text, (x, y - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)

person -> 0.9963330626487732
book -> 0.7454584240913391
book -> 0.7102881073951721
book -> 0.6613765358924866
book -> 0.642610490322113
book -> 0.6321038007736206
book -> 0.6220565438270569
book -> 0.5905422568321228
book -> 0.588641881942749
book -> 0.5483890771865845
book -> 0.5480033159255981
book -> 0.5449872612953186
book -> 0.5108945369720459


### Object detection on videos

In [3]:
### import necessary libraries
import cv2
import numpy as np
import matplotlib.pyplot as plt

### model paths
coco_data_path = '/home/pallav/yolo/models/coco.names'
yolo_cfg_path = '/home/pallav/yolo/models/yolov3.cfg'
yolo_wts_path = '/home/pallav/yolo/models/yolov3.weights'

### read classnames from coco.names file
LABELS = open(coco_data_path).read().strip().split('\n')

## initialize a list of colors to represent each possible class label
np.random.seed(42)
COLORS = np.random.randint(0, 255, size=(len(LABELS), 3), dtype="uint8")

### load the object detector
net = cv2.dnn.readNetFromDarknet(yolo_cfg_path, yolo_wts_path)
ln = net.getLayerNames()
ln = [ln[i - 1] for i in net.getUnconnectedOutLayers()]

In [4]:
### image path
vid_dir_path = 'https://wkcdn.wakau.in/wakau/videoContent/'
vid_name = '8832-1663935392-1675159794.mp4'

cap = cv2.VideoCapture(vid_dir_path + vid_name)
(W, H) = (None, None)

# Find the total number of frames in the video file
total_frame = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
print(f"Video Name = {vid_name}\tTotal frames = {total_frame}")

fps = cap.get(cv2.CAP_PROP_FPS)  # Gets the frames per second
multiplier = fps * 0.50
frame_counter = 1

vid_result = []
predictions = {}

while frame_counter <= total_frame:
    cap.set(cv2.CAP_PROP_POS_FRAMES, frame_counter)
    (grabbed, frame) = cap.read()
    frame_no = frame_counter
    frame_counter += multiplier
    # if the frame was not grabbed, then we have reached the end of the stream
    if not grabbed:
        break
    # if the frame dimensions are empty, grab them
    if W is None or H is None:
        (H, W) = frame.shape[:2]
    
    # YOLO object detector - obtain bounding boxes and probabilities
    blob = cv2.dnn.blobFromImage(frame, 1 / 255.0, (416, 416), swapRB=True, crop=False)
    net.setInput(blob)
    layerOutputs = net.forward(ln)

    # initialize our lists of detected bounding boxes, confidences, and class ID
    boxes = []
    confidences = []
    classIDs = []

    # loop over each of the layer outputs
    for output in layerOutputs:
        # loop over each of the detections
        for detection in output:
            # extract the class ID and confidence (i.e., probability)
            # of the current object detection
            scores = detection[5:]
            classID = np.argmax(scores)
            confidence = scores[classID]
            if confidence > 0.3:
                # scale the bounding box coordinates back relative to the size of the image
                # YOLO model returns the center (x, y)-coordinates of
                # the bounding box followed by the boxes' width and height
                box = detection[0:4] * np.array([W, H, W, H])
                (centerX, centerY, width, height) = box.astype("int")

                # use the center (x, y)-coordinates to derive the top and and left corner of the bounding box
                x = int(centerX - (width / 2))
                y = int(centerY - (height / 2))

                # update our list of bounding box coordinates, confidences, and class IDs
                boxes.append([x, y, int(width), int(height)])
                confidences.append(float(confidence))
                classIDs.append(classID)

    # apply non-maxima suppression to suppress weak, overlapping bounding boxes
    idxs = cv2.dnn.NMSBoxes(boxes, confidences, 0.3, 0.3)


    print(vid_name + f"_frame_{frame_no}.jpg")

    #  ensure at least one detection exists
    if len(idxs) > 0:
        # loop over the indexes 
        for i in idxs.flatten():
            predictions[LABELS[classIDs[i]]] = round(confidences[0], 2)
            print(f"{LABELS[classIDs[i]]} -> {str(confidences[i])}")

#     ## change logic
#     frame_result.append([vid_name, f"frame_{int(frame_no)}", predictions])

Video Name = 8832-1663935392-1675159794.mp4	Total frames = 211
8832-1663935392-1675159794.mp4_frame_1.jpg
8832-1663935392-1675159794.mp4_frame_16.0.jpg
8832-1663935392-1675159794.mp4_frame_31.0.jpg
orange -> 0.42128127813339233
8832-1663935392-1675159794.mp4_frame_46.0.jpg
8832-1663935392-1675159794.mp4_frame_61.0.jpg
8832-1663935392-1675159794.mp4_frame_76.0.jpg
8832-1663935392-1675159794.mp4_frame_91.0.jpg
8832-1663935392-1675159794.mp4_frame_106.0.jpg
8832-1663935392-1675159794.mp4_frame_121.0.jpg
8832-1663935392-1675159794.mp4_frame_136.0.jpg
8832-1663935392-1675159794.mp4_frame_151.0.jpg
8832-1663935392-1675159794.mp4_frame_166.0.jpg
8832-1663935392-1675159794.mp4_frame_181.0.jpg
8832-1663935392-1675159794.mp4_frame_196.0.jpg


In [41]:
predictions = {'bowl': 0.4, 'diningtable': 0.9, 'vase': 0.6, 'person': 0.9}
print(f"Before: {predictions}")

label = 'diningtable'
confidence = 0.95
if label in predictions.keys():
    predictions[label] = max(predictions[label], confidence)
else:
    predictions[label] = confidence
    
print(f"\nAfter: {predictions}")

Before: {'bowl': 0.4, 'diningtable': 0.9, 'vase': 0.6, 'person': 0.9}

After: {'bowl': 0.4, 'diningtable': 0.95, 'vase': 0.6, 'person': 0.9}


In [1]:
### model paths
coco_data_path = '/home/pallav/yolo/models/coco.names'
yolo_cfg_path = '/home/pallav/yolo/models/yolov3.cfg'
yolo_wts_path = '/home/pallav/yolo/models/yolov3.weights'

### read classnames from coco.names file
LABELS = open(coco_data_path).read().strip().split('\n')
print(LABELS)

['person', 'bicycle', 'car', 'motorbike', 'aeroplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'sofa', 'pottedplant', 'bed', 'diningtable', 'toilet', 'tvmonitor', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']


In [21]:
master_label_dict = {
    'food' : ['banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake'],
    'automobiles' : ['bicycle', 'car', 'motorbike', 'aeroplane', 'bus', 'train', 'truck', 'boat'],
    'sports_items' : ['frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket'],
    'animals' : ['bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe'],
    'household_items' : ['chair', 'sofa', 'pottedplant', 'bed', 'diningtable', 'toilet', 'tvmonitor', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'],
    'kitchen_items' : ['microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl'],
    'other_items' : ['backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'fire hydrant', 'stop sign', 'parking meter', 'bench']
}

In [22]:
result = {'person': 0.983, 'hot dog': 0.992, 'diningtable': 0.854, 'bowl': 0.786, 'spoon': 0.581, 'oven': 0.954, 'sink': 0.555, 'bottle': 0.949}
master_list = []

In [23]:
# for i in result.keys():
#     if i in master_label_dict['food']:
#         master_list.append('food')
#     if i in master_label_dict['automobiles']:
#         master_list.append('automobiles')
#     if i in master_label_dict['sports_items']:
#         master_list.append('sports_items')
#     if i in master_label_dict['animals']:
#         master_list.append('animals')
#     if i in master_label_dict['household_items']:
#         master_list.append('household_items')
#     if i in master_label_dict['kitchen_items']:
#         master_list.append('kitchen_items')
#     if i in master_label_dict['other_items']:
#         master_list.append('other_items')
        
        
for label in result.keys():
    for i in master_label_dict.keys():
        if label in master_label_dict[i]:
            master_list.append(i)

In [25]:
list(set(master_list))

['household_items', 'kitchen_items', 'food']

In [29]:
list(set(master_list))

['household_items', 'kitchen_items', 'food']

In [1]:
import pandas as pd

data = pd.read_csv('/home/pallav/yolo/backend_content_details.csv', low_memory = False)
data = data[['_id', 'contentURL']]
data = data.drop_duplicates()
data = data.dropna()
print(f"Shape of data: {data.shape}")
print(f"Columns in data: {data.columns}")

Shape of data: (88230, 2)
Columns in data: Index(['_id', 'contentURL'], dtype='object')


In [9]:
for index, i in data.iterrows():
    print(index)
    print(i['contentURL'])
    print(i['_id'])
    break

0
https://wkcdn.wakau.in/wakau/videoContent/4262-1674247688-1675160133.mp4
63d8ea45706187554a72a723


In [18]:
for i in data.iterrows():
    print(i)
    break

(0, _id                                    63d8ea45706187554a72a723
contentURL    https://wkcdn.wakau.in/wakau/videoContent/4262...
Name: 0, dtype: object)
