In [None]:
# TEST WITH IMAGE 

In [22]:
import cv2
import numpy as np
from ultralytics import YOLO
from scipy.spatial import distance

def draw_road_polygon_on_image(img_path, output_path):
    display_size = (800, 600)
    
    # Read the image
    image = cv2.imread(img_path)
    
    # Load the models
    model_human = YOLO("best (HUM).pt")
    model_screen = YOLO("best_SCR.pt")
    
    # Perform prediction using the models
    r = model_human.predict(img_path)
    l = model_screen.predict(img_path)
    
    # Initialize lists for polygons
    screen_polygons = []
    human_polygons = []

    # Draw polygons for model_screen
    for mask_points in l[0].masks.xy:
        polygon_points = np.array(mask_points, np.int32)
        polygon_points = polygon_points.reshape((-1, 1, 2))
        screen_polygons.append(polygon_points)
    
    # Draw polygons for model_human
    for mask_points in r[0].masks.xy:
        polygon_points = np.array(mask_points, np.int32)
        polygon_points = polygon_points.reshape((-1, 1, 2))
        human_polygons.append(polygon_points)
    
    # Process human polygons and screen polygons
    for human_polygon in human_polygons:
        min_dist = float('inf')
        closest_human_point = None
        closest_screen_point = None
        closest_screen_polygon = None
        
        # Find the nearest screen polygon
        for screen_polygon in screen_polygons:
            for human_point in human_polygon:
                for screen_point in screen_polygon:
                    dist = distance.euclidean(human_point[0], screen_point[0])
                    if dist < min_dist:
                        min_dist = dist
                        closest_human_point = human_point[0]
                        closest_screen_point = screen_point[0]
                        closest_screen_polygon = screen_polygon
        
        # Draw the line and masks if a valid closest point is found
        if closest_human_point is not None and closest_screen_point is not None:
            cv2.line(image, tuple(closest_human_point), tuple(closest_screen_point), color=(255, 0, 0), thickness=5)
            
            # Display the Euclidean distance
            midpoint = ((closest_human_point[0] + closest_screen_point[0]) // 2,
                        (closest_human_point[1] + closest_screen_point[1]) // 2)
            cv2.putText(image, f"{min_dist:.2f}", midpoint, cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 255), 2)
            
            # Draw the human mask based on the distance
            human_mask_color = (0, 255, 0) if min_dist <= 400 else (0, 0, 255)
            cv2.fillPoly(image, [human_polygon], color=human_mask_color)
        
        # Mark screen polygons
        if closest_screen_polygon is not None:
            # Mark the screen polygon blue if within range, pink otherwise
            screen_color = (255, 0, 0) if min_dist <= 400 else (255, 105, 180)  # Pink color
            cv2.fillPoly(image, [closest_screen_polygon], color=screen_color)

    # Resize the image for display
    resized_image = cv2.resize(image, display_size, interpolation=cv2.INTER_AREA)
    
    # Save the image
    cv2.imwrite(output_path, resized_image)

# Example usage
draw_road_polygon_on_image('3.jpg', '3_OUTPUT.jpg')




image 1/1 C:\Users\ASUS\Desktop\GAMA\3.jpg: 480x640 1 person, 1015.2ms
Speed: 2.0ms preprocess, 1015.2ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)

image 1/1 C:\Users\ASUS\Desktop\GAMA\3.jpg: 480x640 1 onscreen, 1004.7ms
Speed: 3.0ms preprocess, 1004.7ms inference, 1.9ms postprocess per image at shape (1, 3, 480, 640)


In [7]:
# TEST WITH VIDEO 

In [28]:
import cv2
import numpy as np
from ultralytics import YOLO
from scipy.spatial import distance

def process_frame(frame, model_human, model_screen):
    display_size = (800, 600)
    
    # Perform prediction using the models
    r = model_human.predict(frame)
    l = model_screen.predict(frame)
    
    # Draw polygons for model_screen (on screen and off screen)
    screen_polygons = []
    off_screen_polygons = []
    if l and l[0].masks:  # Ensure there are valid masks
        for mask_points in l[0].masks.xy:
            polygon_points = np.array(mask_points, np.int32)
            polygon_points = polygon_points.reshape((-1, 1, 2))
            if 'on' in l[0].names:
                screen_polygons.append(polygon_points)
            else:
                off_screen_polygons.append(polygon_points)
    
    # Draw blue mask for off screen
    for off_screen_polygon in off_screen_polygons:
        cv2.fillPoly(frame, [off_screen_polygon], color=(255, 0, 0))
    
    # Draw polygons for model_human
    human_polygons = []
    if r and r[0].masks:  # Ensure there are valid masks
        for mask_points in r[0].masks.xy:
            polygon_points = np.array(mask_points, np.int32)
            polygon_points = polygon_points.reshape((-1, 1, 2))
            human_polygons.append(polygon_points)
    
    # Process human polygons and screen polygons
    for human_polygon in human_polygons:
        min_dist = float('inf')
        closest_human_point = None
        closest_screen_point = None
        closest_screen_polygon = None
        
        # Find the nearest screen polygon
        for screen_polygon in screen_polygons:
            for human_point in human_polygon:
                for screen_point in screen_polygon:
                    dist = distance.euclidean(human_point[0], screen_point[0])
                    if dist < min_dist:
                        min_dist = dist
                        closest_human_point = human_point[0]
                        closest_screen_point = screen_point[0]
                        closest_screen_polygon = screen_polygon
        
        # Draw the line and masks if a valid closest point is found
        if closest_human_point is not None and closest_screen_point is not None:
            cv2.line(frame, tuple(closest_human_point), tuple(closest_screen_point), color=(255, 0, 0), thickness=5)
            
            # Display the Euclidean distance
            midpoint = ((closest_human_point[0] + closest_screen_point[0]) // 2,
                        (closest_human_point[1] + closest_screen_point[1]) // 2)
            cv2.putText(frame, f"{min_dist:.2f}", midpoint, cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 255), 2)
            
            # Draw the human mask based on the distance
            human_mask_color = (0, 0, 255) if min_dist < 400 else (0, 255, 255)
            cv2.fillPoly(frame, [human_polygon], color=human_mask_color)
        
        # Mark screen polygons
        if closest_screen_polygon is not None:
            # Mark the screen polygon green if within range, violet otherwise
            screen_color = (0, 255, 0) if min_dist < 400 else (238, 130, 238)
            cv2.fillPoly(frame, [closest_screen_polygon], color=screen_color)
    
    # Resize the image for display
    resized_frame = cv2.resize(frame, display_size, interpolation=cv2.INTER_AREA)
    
    return resized_frame

def process_video(input_path, output_path):
    # Open video capture
    cap = cv2.VideoCapture(input_path)
    
    # Get the codec information and create a VideoWriter object
    fourcc = cv2.VideoWriter_fourcc(*'XVID')  # Codec for .avi files
    fps = int(cap.get(cv2.CAP_PROP_FPS))
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
    
    # Load the models
    model_human = YOLO("best (HUM).pt")
    model_screen = YOLO("best_SCR.pt")
    
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        
        # Process the frame
        processed_frame = process_frame(frame, model_human, model_screen)
        
        # Write the frame to the output video
        out.write(processed_frame)
    
    # Release resources
    cap.release()
    out.release()
    cv2.destroyAllWindows()

# Example usage
process_video('testfile.mp4', 'output_video.mp4')



0: 384x640 (no detections), 892.4ms
Speed: 3.5ms preprocess, 892.4ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 onscreens, 898.9ms
Speed: 3.0ms preprocess, 898.9ms inference, 3.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 958.2ms
Speed: 2.0ms preprocess, 958.2ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 onscreens, 929.9ms
Speed: 2.0ms preprocess, 929.9ms inference, 3.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 894.3ms
Speed: 2.0ms preprocess, 894.3ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 onscreens, 871.5ms
Speed: 2.0ms preprocess, 871.5ms inference, 3.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 872.6ms
Speed: 2.1ms preprocess, 872.6ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 onscreen, 869.8ms
Speed: 1.9ms preprocess, 869.8ms in

In [None]:
# GRADIO INTEGRATION

In [35]:
import gradio as gr
import cv2
import numpy as np
from ultralytics import YOLO
from scipy.spatial import distance
from io import BytesIO
import tempfile

def process_frame(frame, model_human, model_screen):
    display_size = (800, 600)
    
    # Perform prediction using the models
    r = model_human.predict(frame)
    l = model_screen.predict(frame)
    
    # Check if predictions are not None
    if r[0] is None or l[0] is None:
        return cv2.resize(frame, display_size, interpolation=cv2.INTER_AREA)
    
    # Draw polygons for model_screen
    screen_polygons = []
    if l[0].masks is not None:
        for mask_points in l[0].masks.xy:
            polygon_points = np.array(mask_points, np.int32)
            polygon_points = polygon_points.reshape((-1, 1, 2))
            screen_polygons.append(polygon_points)
    
    # Draw polygons for model_human
    human_polygons = []
    if r[0].masks is not None:
        for mask_points in r[0].masks.xy:
            polygon_points = np.array(mask_points, np.int32)
            polygon_points = polygon_points.reshape((-1, 1, 2))
            human_polygons.append(polygon_points)
    
    # Process human polygons and screen polygons
    for human_polygon in human_polygons:
        min_dist = float('inf')
        closest_human_point = None
        closest_screen_point = None
        closest_screen_polygon = None
        
        # Find the nearest screen polygon
        for screen_polygon in screen_polygons:
            for human_point in human_polygon:
                for screen_point in screen_polygon:
                    dist = distance.euclidean(human_point[0], screen_point[0])
                    if dist < min_dist:
                        min_dist = dist
                        closest_human_point = human_point[0]
                        closest_screen_point = screen_point[0]
                        closest_screen_polygon = screen_polygon
        
        # Draw the line and masks if a valid closest point is found
        if closest_human_point is not None and closest_screen_point is not None:
            cv2.line(frame, tuple(closest_human_point), tuple(closest_screen_point), color=(255, 0, 0), thickness=5)
            
            # Display the Euclidean distance
            midpoint = ((closest_human_point[0] + closest_screen_point[0]) // 2,
                        (closest_human_point[1] + closest_screen_point[1]) // 2)
            cv2.putText(frame, f"{min_dist:.2f}", midpoint, cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 255), 2)
            
            # Check if the distance is greater than 444
            if min_dist > 444 and closest_screen_polygon is not None:
                # Draw a violet polygon around the screen
                cv2.polylines(frame, [closest_screen_polygon], isClosed=True, color=(238, 130, 238), thickness=5)

        # Mark screen polygons
        if closest_screen_polygon is not None:
            screen_color = (0, 255, 0) if min_dist < 400 else (238, 130, 238)
            cv2.fillPoly(frame, [closest_screen_polygon], color=screen_color)

    # Resize the image for display
    resized_frame = cv2.resize(frame, display_size, interpolation=cv2.INTER_AREA)
    
    return resized_frame

def video_to_frames(video_file_path):
    # Read the video file
    cap = cv2.VideoCapture(video_file_path)
    
    # Load the models
    model_human = YOLO("best (HUM).pt")
    model_screen = YOLO("best_SCR.pt")
    
    # Prepare the video writer
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    fps = int(cap.get(cv2.CAP_PROP_FPS))
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    
    # Create a temporary file to save the video output
    temp_output = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4")
    out = cv2.VideoWriter(temp_output.name, fourcc, fps, (width, height))
    
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        
        # Process the frame
        processed_frame = process_frame(frame, model_human, model_screen)
        
        # Write the processed frame to the output video
        out.write(processed_frame)
    
    cap.release()
    out.release()
    
    return temp_output.name

def process_video(video_file):
    output_file_path = video_to_frames(video_file)
    return output_file_path

iface = gr.Interface(
    fn=process_video,
    inputs=gr.Video(),
    outputs=gr.Video(),
    title="YOLO Video Processing",
    description="Upload a video and get processed results with YOLO models drawing polygons and calculating distances."
)

iface.launch(share=True)


Running on local URL:  http://127.0.0.1:7865

Could not create share link. Please check your internet connection or our status page: https://status.gradio.app.





0: 384x640 1 person, 1702.1ms
Speed: 7.0ms preprocess, 1702.1ms inference, 8.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 onscreen, 1656.4ms
Speed: 3.1ms preprocess, 1656.4ms inference, 2.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 1649.2ms
Speed: 3.0ms preprocess, 1649.2ms inference, 4.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 onscreen, 1702.8ms
Speed: 26.0ms preprocess, 1702.8ms inference, 19.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 1636.2ms
Speed: 2.9ms preprocess, 1636.2ms inference, 7.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 onscreen, 1686.1ms
Speed: 15.0ms preprocess, 1686.1ms inference, 4.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 1663.6ms
Speed: 2.0ms preprocess, 1663.6ms inference, 16.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 onscreen, 1639.4ms
Speed: 4.0ms preprocess, 1639.4ms inference, 4.