In [3]:
import cv2
import numpy as np
import yaml
from yaml.loader import SafeLoader

In [7]:
# Load YAML
with open('data.yaml',mode='r') as f:
    data_yaml = yaml.load(f,Loader=SafeLoader)

labels = data_yaml['names']
print(labels)


['person', 'chair', 'car', 'dog', 'bottle', 'bird', 'cat', 'pottedplant', 'sheep', 'boat', 'aeroplane', 'tvmonitor', 'bicycle', 'sofa', 'motorbike', 'cow', 'diningtable', 'horse', 'train', 'bus']


In [10]:
ls

 Volume in drive C is Acer
 Volume Serial Number is 300F-3588

 Directory of C:\Users\Kavitha padala\Desktop\kavithaProjects\objectdetection\predictions

05/10/2024  08:16 PM    <DIR>          .
05/10/2024  07:47 PM    <DIR>          ..
05/10/2024  08:06 PM    <DIR>          .ipynb_checkpoints
05/10/2024  03:38 PM               424 data.yaml
05/10/2024  08:01 PM    <DIR>          Model4
05/10/2024  08:01 PM           443,698 summer-streets-2023.jpg
05/10/2024  08:16 PM             4,096 yolo_predictions.ipynb
               3 File(s)        448,218 bytes
               4 Dir(s)  168,845,606,912 bytes free


In [47]:
# Load YOLO Model
yolo = cv2.dnn.readNetFromONNX('Model4/weights/best.onnx')
yolo.setPreferableBackend(cv2.dnn.DNN_BACKEND_OPENCV)
yolo.setPreferableTarget(cv2.dnn.DNN_TARGET_CPU)

In [48]:
# Load the image 
img = cv2.imread('./summer-streets-2023.jpg')
image = img.copy()

# cv2.imshow('image',image)
# cv2.waitKey(0)
# cv2.destroyAllWindows()

row,col,d = image.shape

# get the YOLO prediction from the image
#step1 convert image into square image (array)
max_rc = max(row,col)
input_image = np.zeros((max_rc,max_rc,3),dtype = np.uint8)
input_image[0:row,0:col] = image

#step2 get prediction from square array
INPUT_WH_YOLO = 640
blob = cv2.dnn.blobFromImage(input_image,1/255,(INPUT_WH_YOLO,INPUT_WH_YOLO),swapRB=True,crop=False)
yolo.setInput(blob)
preds = yolo.forward()  # detection or prediction from YOLO 

# cv2.imshow('input_image',input_image)
# cv2.waitKey(0)
# cv2.destroyAllWindows()

In [49]:
print(preds.shape)

(1, 25200, 25)


In [50]:
# Non Maximum Supression
#step1 filter detections based on confidence (0.4) and probability score (0.25)
detections = preds[0]
boxes = []
confidences = []
classes = []

# width and height of the input_image
image_w, image_h = input_image.shape[:2]
x_factor = image_w/INPUT_WH_YOLO
y_factor = image_h/INPUT_WH_YOLO


for i in range(len(detections)):
    row = detections[i]
    confidence = row[4]  #confidence of detection an object
    if confidence > 0.4:
        class_score = row[5:].max()   #max probability from the 20 objects
        class_id = row[5:].argmax() # get the index position at which max probability occur

        if class_score > 0.25:
            cx,cy,w,h = row[0:4]
            #construct bounding from four values
            #left,top,width and height
            left = int((cx - 0.5*w)*x_factor)
            top = int((cy - 0.5*h)*y_factor)
            width = int(w*x_factor)
            height = int(h*y_factor)

            box = np.array([left,top,width,height])

            # append values into yhe list
            confidences.append(confidence)
            boxes.append(box)
            classes.append(class_id)


# clean
boxes_np = np.array(boxes).tolist()
confidences_np = np.array(confidences).tolist()

#NMS
index = cv2.dnn.NMSBoxes(boxes_np,confidences_np,0.25,0.45).flatten()
        
        


In [51]:
len(index)

1

In [52]:
# Draw the Bounding
for ind in index:
     # extract bounding box
    x,y,w,h = boxes_np[ind]
    bb_conf = int(confidences_np[ind]*100)
    classes_id = classes[ind]
    class_name = labels[classes_id]
    

    text = f'{class_name}: {bb_conf}%'

    cv2.rectangle(image,(x,y),(x+w,y+h),(0,255,0),2)
    cv2.rectangle(image,(x,y-30),(x+w,y),(255,255,255),-1)


    cv2.putText(image,text,(x,y-10),cv2.FONT_HERSHEY_PLAIN,0.7,(0,0,0),1)




In [53]:
cv2.imshow('original',img)
cv2.imshow('yolo_prediction',image)
cv2.waitKey(0)
cv2.destroyAllWindows()

In [31]:
confidences

[0.428214, 0.40786305, 0.44302794, 0.49571487, 0.42710614, 0.44320455]

In [32]:
boxes

[array([544, 344,  72, 139]),
 array([543, 338,  70, 150]),
 array([537, 338,  79, 152]),
 array([537, 342,  79, 148]),
 array([542, 337,  74, 155]),
 array([544, 333,  64, 165])]

In [35]:
classes

[0, 0, 0, 0, 0, 0]