In [None]:
# TEST WITH IMAGE 

In [None]:
import cv2
import numpy as np
from ultralytics import YOLO
from scipy.spatial import distance

def draw_road_polygon_on_image(img_path, output_path):
    display_size = (800, 600)
    
    # Read the image
    image = cv2.imread(img_path)
    
    # Load the models
    model_human = YOLO("best (human).pt")
    model_screen = YOLO("best (screen).pt")
    
    # Perform prediction using the models
    r = model_human.predict(img_path)
    l = model_screen.predict(img_path)
    
    # Draw polygons for model_screen (on screen and off screen)
    screen_polygons = []
    off_screen_polygons = []
    for mask_points in l[0].masks.xy:
        polygon_points = np.array(mask_points, np.int32)
        polygon_points = polygon_points.reshape((-1, 1, 2))
        if 'on' in l[0].names:
            screen_polygons.append(polygon_points)
        else:
            off_screen_polygons.append(polygon_points)
    
    # Draw blue mask for off screen
    for off_screen_polygon in off_screen_polygons:
        cv2.fillPoly(image, [off_screen_polygon], color=(255, 0, 0))
    
    # Draw polygons for model_human
    human_polygons = []
    for mask_points in r[0].masks.xy:
        polygon_points = np.array(mask_points, np.int32)
        polygon_points = polygon_points.reshape((-1, 1, 2))
        human_polygons.append(polygon_points)
    
    # Process human polygons and screen polygons
    for human_polygon in human_polygons:
        min_dist = float('inf')
        closest_human_point = None
        closest_screen_point = None
        closest_screen_polygon = None
        
        # Find the nearest screen polygon
        for screen_polygon in screen_polygons:
            for human_point in human_polygon:
                for screen_point in screen_polygon:
                    dist = distance.euclidean(human_point[0], screen_point[0])
                    if dist < min_dist:
                        min_dist = dist
                        closest_human_point = human_point[0]
                        closest_screen_point = screen_point[0]
                        closest_screen_polygon = screen_polygon
        
        # Draw the line and masks if a valid closest point is found
        if closest_human_point is not None and closest_screen_point is not None:
            cv2.line(image, tuple(closest_human_point), tuple(closest_screen_point), color=(255, 0, 0), thickness=5)
            
            # Display the Euclidean distance
            midpoint = ((closest_human_point[0] + closest_screen_point[0]) // 2,
                        (closest_human_point[1] + closest_screen_point[1]) // 2)
            cv2.putText(image, f"{min_dist:.2f}", midpoint, cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 255), 2)
            
            # Draw the human mask based on the distance
            human_mask_color = (0, 0, 255) if min_dist < 400 else (0, 255, 255)
            cv2.fillPoly(image, [human_polygon], color=human_mask_color)
        
        # Mark screen polygons
        if closest_screen_polygon is not None:
            # Mark the screen polygon green if within range, violet otherwise
            screen_color = (0, 255, 0) if min_dist < 400 else (238, 130, 238)
            cv2.fillPoly(image, [closest_screen_polygon], color=screen_color)

    # Resize the image for display
    resized_image = cv2.resize(image, display_size, interpolation=cv2.INTER_AREA)
    
    # Save the image
    cv2.imwrite(output_path, resized_image)

# Example usage
draw_road_polygon_on_image('3.jpg', '3_5.jpg')


In [None]:
# TEST WITH VIDEO 

In [None]:
import cv2
import numpy as np
from ultralytics import YOLO
from scipy.spatial import distance

def process_frame(frame, model_human, model_screen):
    display_size = (800, 600)
    
    # Perform prediction using the models
    r = model_human.predict(frame)
    l = model_screen.predict(frame)
    
    # Draw polygons for model_screen (on screen and off screen)
    screen_polygons = []
    off_screen_polygons = []
    for mask_points in l[0].masks.xy:
        polygon_points = np.array(mask_points, np.int32)
        polygon_points = polygon_points.reshape((-1, 1, 2))
        if 'on' in l[0].names:
            screen_polygons.append(polygon_points)
        else:
            off_screen_polygons.append(polygon_points)
    
    # Draw blue mask for off screen
    for off_screen_polygon in off_screen_polygons:
        cv2.fillPoly(frame, [off_screen_polygon], color=(255, 0, 0))
    
    # Draw polygons for model_human
    human_polygons = []
    for mask_points in r[0].masks.xy:
        polygon_points = np.array(mask_points, np.int32)
        polygon_points = polygon_points.reshape((-1, 1, 2))
        human_polygons.append(polygon_points)
    
    # Process human polygons and screen polygons
    for human_polygon in human_polygons:
        min_dist = float('inf')
        closest_human_point = None
        closest_screen_point = None
        closest_screen_polygon = None
        
        # Find the nearest screen polygon
        for screen_polygon in screen_polygons:
            for human_point in human_polygon:
                for screen_point in screen_polygon:
                    dist = distance.euclidean(human_point[0], screen_point[0])
                    if dist < min_dist:
                        min_dist = dist
                        closest_human_point = human_point[0]
                        closest_screen_point = screen_point[0]
                        closest_screen_polygon = screen_polygon
        
        # Draw the line and masks if a valid closest point is found
        if closest_human_point is not None and closest_screen_point is not None:
            cv2.line(frame, tuple(closest_human_point), tuple(closest_screen_point), color=(255, 0, 0), thickness=5)
            
            # Display the Euclidean distance
            midpoint = ((closest_human_point[0] + closest_screen_point[0]) // 2,
                        (closest_human_point[1] + closest_screen_point[1]) // 2)
            cv2.putText(frame, f"{min_dist:.2f}", midpoint, cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 255), 2)
            
            # Draw the human mask based on the distance
            human_mask_color = (0, 0, 255) if min_dist < 400 else (0, 255, 255)
            cv2.fillPoly(frame, [human_polygon], color=human_mask_color)
        
        # Mark screen polygons
        if closest_screen_polygon is not None:
            # Mark the screen polygon green if within range, violet otherwise
            screen_color = (0, 255, 0) if min_dist < 400 else (238, 130, 238)
            cv2.fillPoly(frame, [closest_screen_polygon], color=screen_color)
    
    # Resize the image for display
    resized_frame = cv2.resize(frame, display_size, interpolation=cv2.INTER_AREA)
    
    return resized_frame

def process_video(input_path, output_path):
    # Open video capture
    cap = cv2.VideoCapture(input_path)
    
    # Get the codec information and create a VideoWriter object
    fourcc = cv2.VideoWriter_fourcc(*'XVID')  # Codec for .avi files
    fps = int(cap.get(cv2.CAP_PROP_FPS))
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
    
    # Load the models
    model_human = YOLO("best (human).pt")
    model_screen = YOLO("best (screen).pt")
    
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        
        # Process the frame
        processed_frame = process_frame(frame, model_human, model_screen)
        
        # Write the frame to the output video
        out.write(processed_frame)
    
    # Release resources
    cap.release()
    out.release()
    cv2.destroyAllWindows()

# Example usage
process_video('input_video.mp4', 'output_video.avi')


In [None]:
# GRADIO INTEGRATION

In [None]:
import gradio as gr
import cv2
import numpy as np
from ultralytics import YOLO
from scipy.spatial import distance
from PIL import Image
from io import BytesIO

def process_frame(frame, model_human, model_screen):
    display_size = (800, 600)
    
    # Perform prediction using the models
    r = model_human.predict(frame)
    l = model_screen.predict(frame)
    
    # Draw polygons for model_screen
    screen_polygons = []
    for mask_points in l[0].masks.xy:
        polygon_points = np.array(mask_points, np.int32)
        polygon_points = polygon_points.reshape((-1, 1, 2))
        screen_polygons.append(polygon_points)
    
    # Draw polygons for model_human
    human_polygons = []
    for mask_points in r[0].masks.xy:
        polygon_points = np.array(mask_points, np.int32)
        polygon_points = polygon_points.reshape((-1, 1, 2))
        human_polygons.append(polygon_points)
    
    # Process human polygons and screen polygons
    for human_polygon in human_polygons:
        min_dist = float('inf')
        closest_human_point = None
        closest_screen_point = None
        closest_screen_polygon = None
        
        # Find the nearest screen polygon
        for screen_polygon in screen_polygons:
            for human_point in human_polygon:
                for screen_point in screen_polygon:
                    dist = distance.euclidean(human_point[0], screen_point[0])
                    if dist < min_dist:
                        min_dist = dist
                        closest_human_point = human_point[0]
                        closest_screen_point = screen_point[0]
                        closest_screen_polygon = screen_polygon
        
        # Draw the line and masks if a valid closest point is found
        if closest_human_point is not None and closest_screen_point is not None:
            cv2.line(frame, tuple(closest_human_point), tuple(closest_screen_point), color=(255, 0, 0), thickness=5)
            
            # Display the Euclidean distance
            midpoint = ((closest_human_point[0] + closest_screen_point[0]) // 2,
                        (closest_human_point[1] + closest_screen_point[1]) // 2)
            cv2.putText(frame, f"{min_dist:.2f}", midpoint, cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 255), 2)
            
            # Check if the distance is greater than 444
            if min_dist > 444 and closest_screen_polygon is not None:
                # Draw a violet polygon around the screen
                cv2.polylines(frame, [closest_screen_polygon], isClosed=True, color=(238, 130, 238), thickness=5)

        # Mark screen polygons
        if closest_screen_polygon is not None:
            screen_color = (0, 255, 0) if min_dist < 400 else (238, 130, 238)
            cv2.fillPoly(frame, [closest_screen_polygon], color=screen_color)

    # Resize the image for display
    resized_frame = cv2.resize(frame, display_size, interpolation=cv2.INTER_AREA)
    
    return resized_frame

def video_to_frames(video_file):
    # Read the video file
    cap = cv2.VideoCapture(video_file.name)
    
    # Load the models
    model_human = YOLO("best (human).pt")
    model_screen = YOLO("best (screen).pt")
    
    # Prepare the video writer
    fourcc = cv2.VideoWriter_fourcc(*'XVID')
    fps = int(cap.get(cv2.CAP_PROP_FPS))
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    
    # Create a buffer to save the video output
    output_buffer = BytesIO()
    out = cv2.VideoWriter(output_buffer, fourcc, fps, (width, height))
    
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        
        # Process the frame
        processed_frame = process_frame(frame, model_human, model_screen)
        
        # Write the processed frame to the output video
        out.write(processed_frame)
    
    cap.release()
    out.release()
    
    output_buffer.seek(0)
    return output_buffer

def process_video(video_file):
    output_buffer = video_to_frames(video_file)
    return output_buffer

iface = gr.Interface(
    fn=process_video,
    inputs=gr.Video(type="file"),
    outputs=gr.Video(type="file"),
    title="YOLO Video Processing",
    description="Upload a video and get processed results with YOLO models drawing polygons and calculating distances."
)

iface.launch(share=True)


In [None]:
# TRY TO TAKE LIVE VIDEO INPUT FROM WEB CAM

In [None]:
import cv2
import numpy as np
from ultralytics import YOLO
from scipy.spatial import distance

# Load the YOLO models
model_human = YOLO("best (human).pt")
model_screen = YOLO("best (screen).pt")

def process_frame(frame):
    # Perform prediction using the models
    r = model_human.predict(frame)
    l = model_screen.predict(frame)
    
    # Draw polygons for model_screen
    screen_polygons = []
    for mask_points in l[0].masks.xy:
        polygon_points = np.array(mask_points, np.int32)
        polygon_points = polygon_points.reshape((-1, 1, 2))
        screen_polygons.append(polygon_points)
    
    # Draw polygons for model_human
    human_polygons = []
    for mask_points in r[0].masks.xy:
        polygon_points = np.array(mask_points, np.int32)
        polygon_points = polygon_points.reshape((-1, 1, 2))
        human_polygons.append(polygon_points)
    
    # Process human polygons and screen polygons
    for human_polygon in human_polygons:
        min_dist = float('inf')
        closest_human_point = None
        closest_screen_point = None
        closest_screen_polygon = None
        
        # Find the nearest screen polygon
        for screen_polygon in screen_polygons:
            for human_point in human_polygon:
                for screen_point in screen_polygon:
                    dist = distance.euclidean(human_point[0], screen_point[0])
                    if dist < min_dist:
                        min_dist = dist
                        closest_human_point = human_point[0]
                        closest_screen_point = screen_point[0]
                        closest_screen_polygon = screen_polygon
        
        # Draw the line and masks if a valid closest point is found
        if closest_human_point is not None and closest_screen_point is not None:
            cv2.line(frame, tuple(closest_human_point), tuple(closest_screen_point), color=(255, 0, 0), thickness=5)
            
            # Display the Euclidean distance
            midpoint = ((closest_human_point[0] + closest_screen_point[0]) // 2,
                        (closest_human_point[1] + closest_screen_point[1]) // 2)
            cv2.putText(frame, f"{min_dist:.2f}", midpoint, cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 255), 2)
            
            # Check if the distance is greater than 444
            if min_dist > 444 and closest_screen_polygon is not None:
                # Draw a violet polygon around the screen
                cv2.polylines(frame, [closest_screen_polygon], isClosed=True, color=(238, 130, 238), thickness=5)

        # Mark screen polygons
        if closest_screen_polygon is not None:
            screen_color = (0, 255, 0) if min_dist < 400 else (238, 130, 238)
            cv2.fillPoly(frame, [closest_screen_polygon], color=screen_color)

    return frame

def main():
    # Open a connection to the webcam
    cap = cv2.VideoCapture(0)  # 0 is the default webcam

    if not cap.isOpened():
        print("Error: Could not open webcam.")
        return

    while True:
        # Capture frame-by-frame
        ret, frame = cap.read()
        if not ret:
            break

        # Process the frame
        processed_frame = process_frame(frame)

        # Display the resulting frame
        cv2.imshow('Masked Video', processed_frame)
        
        # Exit loop when 'q' is pressed
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    # Release the webcam and close all OpenCV windows
    cap.release()
    cv2.destroyAllWindows()

if __name__ == "__main__":
    main()
