<a href="https://colab.research.google.com/github/sarang-manohar/hogwarts/blob/main/academia/sig788/code/task_4_2_P_Sarang_Manohar(s223504903)_Part2v1_.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import cv2, requests, json, io, time
import numpy as np
from PIL import Image, ImageDraw, ImageFont


In [None]:
#read video frames from video file
cap = cv2.VideoCapture('sig788task4.2.mp4')

In [None]:
#print video properties
print("Width :",cap.get(cv2.CAP_PROP_FRAME_WIDTH))
print("Height :",cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
print("FPS :",cap.get(cv2.CAP_PROP_FPS))
print("Frame Count :",cap.get(cv2.CAP_PROP_FRAME_COUNT))

Width : 640.0
Height : 360.0
FPS : 10.0
Frame Count : 600.0


In [None]:
#read credentials file
with open('creds.txt') as f:
    contents = f.read()
    creds = json.loads(contents)

# API endpoint and prediction key
ENDPOINT = creds["ENDPOINT"]
KEY = creds['prediction_key']

# Set the threshold for the probability of the predictions
THRESHOLD = 0.25

# Set the font for drawing the tag labels
FONT = ImageFont.truetype("arial.ttf", 16)

# Set the tag colors dictionary
TAG_COLORS = {
    "person": (253, 1, 0),
    "bicycle": (247, 105, 21),
    "motorcycle": (238, 222, 4),
    "bus": (47, 162, 54),
    "car": (51, 62, 212)
}
# Video reader and writer
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
frame_rate = int(cap.get(cv2.CAP_PROP_FPS))
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
video_writer = cv2.VideoWriter("object_tracking.mp4", fourcc, frame_rate, (width, height))

In [None]:
# Define the draw_boxes function
def draw_boxes(image, predictions, tag_colors):
    draw = ImageDraw.Draw(image)
    for prediction in predictions:
        tag = prediction["tagName"]
        probability = prediction["probability"]
        if probability >= THRESHOLD:
            box = prediction["boundingBox"]
            left = int(box["left"] * width)
            top = int(box["top"] * height)
            right = int((box["left"] + box["width"]) * width)
            bottom = int((box["top"] + box["height"]) * height)
            draw.rectangle([(left, top), (right, bottom)], outline=tag_colors.get(tag, "red"), width=2)
            draw.text((left, top-20), f"{tag} {probability:.2f}", font=FONT, fill=tag_colors.get(tag, "red"))
    return image

In [None]:
# Loop through the frames
while True:
    # Read the next frame from the video file
    ret, frame = cap.read()
    if not ret:
        break

    # Preprocess the frame
    frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)    

    # Compress the frame as a JPEG image
    _, encoded_frame = cv2.imencode(".jpg", frame, [int(cv2.IMWRITE_JPEG_QUALITY), 80])


    # Make a prediction using the Custom Vision API
    headers = {
        "Prediction-Key": KEY,
        "Content-Type": creds['content_type']
    }
    body = {
        "Data": frame.tolist()
    }
    response = requests.post(ENDPOINT, headers=headers, json=body, data=encoded_frame.tobytes())
    response.raise_for_status()
    predictions = response.json()["predictions"]

    # Draw the bounding boxes and labels on the image
    frame_with_boxes = draw_boxes(Image.fromarray(frame), predictions, TAG_COLORS)
    frame_with_boxes = cv2.cvtColor(np.asarray(frame_with_boxes), cv2.COLOR_RGB2BGR)  

    #Display the annotated frame
    cv2.imshow("Frame", np.asarray(frame_with_boxes))

    # Write the annotated frame to the output video file
    video_writer.write(np.asarray(frame_with_boxes))

    cv2.waitKey(1)
    time.sleep(11)



# Release the video reader and writer
cap.release()
video_writer.release()

cv2.destroyAllWindows()
