In [None]:
#Create a Flask application, integrate YOLO object detection model in the application

#Need to get video source,coordinates of the region from API call in Flask
#Need to have two regions,one where person must be detected and in another region only car or two wheelers must be detected
#Have left side of the video for person detection and right side for vehicles detection

In [None]:
import cv2
import warnings
import torch
from flask import Flask, request, jsonify, Response

warnings.filterwarnings("ignore", category=FutureWarning)

app = Flask(__name__)

# Load YOLOv5 model from ultralytics
model = torch.hub.load('ultralytics/yolov5', 'yolov5s', pretrained=True)

# Set detection classes for YOLO (0: 'Person', 2: 'car', 3: 'Two Wheelers')
PERSON_CLASS = 0
VEHICLE_CLASSES = [2, 3]  # Car and Two Wheelers

def detect_objects(frame, model, region, classes_to_detect):
    
    x1, y1, x2, y2 = region
    sub_frame = frame[y1:y2, x1:x2]  # Crop the region
    
    # Run YOLO detection
    results = model(sub_frame)
    
    # Filter by classes
    detected = results.xyxy[0].cpu().numpy()
    for det in detected:
        xmin, ymin, xmax, ymax, conf, cls = det
        if int(cls) in classes_to_detect:
            # Draw bounding boxes on the original frame (adjusting for region offset)
            cv2.rectangle(frame, (int(xmin) + x1, int(ymin) + y1), (int(xmax) + x1, int(ymax) + y1), (0, 255, 0), 2)
            label = model.names[int(cls)]
            cv2.putText(frame, f'{label} {conf:.2f}', (int(xmin) + x1, int(ymin) + y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
    return frame

def process_video(video_source, left_region, right_region):
    
    cap = cv2.VideoCapture(video_source)
    
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        
        height, width, _ = frame.shape
        # Split frame into left and right regions
        frame = detect_objects(frame, model, left_region, [PERSON_CLASS])
        frame = detect_objects(frame, model, right_region, VEHICLE_CLASSES)
        
        # Encode the frame to JPEG
        _, buffer = cv2.imencode('.jpg', frame)
        frame_bytes = buffer.tobytes()
        
        # Yield the frame in an HTTP response
        yield (b'--frame\r\n' b'Content-Type: image/jpeg\r\n\r\n' + frame_bytes + b'\r\n')
    
    cap.release()

@app.route('/detect', methods=['POST'])
def detect():
    
    data = request.json
    video_source = data['video_source']  # Corrected to match the key in the cURL request
    left_region = data['left_region']  # Format: [x1, y1, x2, y2]
    right_region = data['right_region']  # Format: [x1, y1, x2, y2]
    
    return Response(process_video(video_source, left_region, right_region), mimetype='multipart/x-mixed-replace; boundary=frame')

@app.route('/')
def index():
    return "YOLO Flask API is running. Use /detect endpoint for object detection."

if __name__ == '__main__':
    # Run the Flask app
    app.run(host='0.0.0.0', port=5000)

Using cache found in C:\Users\Kali/.cache\torch\hub\ultralytics_yolov5_master
YOLOv5  2024-10-14 Python-3.11.5 torch-2.4.1+cpu CPU

Fusing layers... 
YOLOv5s summary: 213 layers, 7225885 parameters, 0 gradients, 16.4 GFLOPs
Adding AutoShape... 


 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on all addresses (0.0.0.0)
 * Running on http://127.0.0.1:5000
 * Running on http://192.168.41.95:5000
Press CTRL+C to quit
127.0.0.1 - - [17/Oct/2024 18:41:06] "GET / HTTP/1.1" 200 -
127.0.0.1 - - [17/Oct/2024 18:41:06] "GET /favicon.ico HTTP/1.1" 404 -
127.0.0.1 - - [17/Oct/2024 18:42:43] "POST /detect HTTP/1.1" 200 -


In [6]:
curl -X POST -H "Content-Type: application/json" -d "{\"video_source\": \"C:\\\\Users\\\\Kali\\\\Downloads\\\\WhatsApp Video Caliber.mp4\", \"left_region\": [0, 0, 1920, 2160], \"right_region\": [1920, 0, 3840, 1920]}" http://127.0.0.1:5000/detect --output "C:\Users\Kali\Downloads\Caliber_Output\output_video.mp4"

In [7]:
import cv2

video_source = r"C:\Users\Kali\Downloads\WhatsApp Video Caliber.mp4"
cap = cv2.VideoCapture(video_source)

if not cap.isOpened():
    print("NOt Opened")
else:
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

    print(f'Video Resolution: {width}x{height} pixels')

cap.release()

Video Resolution: 3840x2160 pixels
