In [5]:
import torch
from ultralytics import YOLO
import os


device = torch.device('mps') if torch.backends.mps.is_available() else torch.device('cpu')

In [7]:
final_model = YOLO("runs_lr/detect/train7/weights/best.pt")

In [9]:
image_folder = "visualization_trial"
image_paths = [os.path.join(image_folder, f) for f in os.listdir(image_folder) if f.endswith('.jpeg')]

In [23]:
image_paths

['visualization_trial/IMG_4646.jpeg',
 'visualization_trial/IMG_4647.jpeg',
 'visualization_trial/IMG_4642.jpeg',
 'visualization_trial/IMG_4648.jpeg',
 'visualization_trial/IMG_4644.jpeg',
 'visualization_trial/IMG_4645.jpeg',
 'visualization_trial/IMG_4649.jpeg']

In [12]:
trial = final_model.predict(image_paths)


0: 640x640 (no detections), 38.8ms
1: 640x640 1 TC, 38.8ms
2: 640x640 (no detections), 38.8ms
3: 640x640 2 9Cs, 5 TCs, 1 hand, 1 flop, 38.8ms
4: 640x640 1 2D, 1 4D, 3 7Hs, 2 8Cs, 2 JSs, 1 KC, 1 QC, 1 TH, 1 hand, 1 flop, 38.8ms
5: 640x640 1 3C, 1 4C, 1 4H, 1 5H, 1 7H, 2 TCs, 1 hand, 1 flop, 38.8ms
6: 640x640 1 TC, 38.8ms
Speed: 3.4ms preprocess, 38.8ms inference, 0.8ms postprocess per image at shape (1, 3, 640, 640)


In [22]:
trial[6].show()

In [36]:
import cv2

# Path to the input video
input_video_path = "visualization_trial/IMG_4650.mp4"
output_video_path = "demo.mp4"

# Load the YOLO model and predict on the video stream
video = final_model.predict(input_video_path, stream=True)

# Initialize the VideoCapture to get video properties
cap = cv2.VideoCapture(input_video_path)
fps = cap.get(cv2.CAP_PROP_FPS)  # Frames per second
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))  # Video frame width
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))  # Video frame height
cap.release()

# Initialize OpenCV VideoWriter
out = cv2.VideoWriter(
    output_video_path,
    cv2.VideoWriter_fourcc(*'mp4v'),  # Codec for .mp4 files
    fps,
    (frame_width, frame_height)
)

# Process and write each frame to the output video
for result in video:
    # Plot the frame with YOLO predictions
    frame = result.plot()  # Adds bounding boxes to the frame

    # Write the processed frame to the output video
    out.write(frame)

# Release the VideoWriter
out.release()
print(f"Processed video saved to: {output_video_path}")


video 1/1 (frame 1/1819) /Users/alessandropranzo/Library/Mobile Documents/com~apple~CloudDocs/Desktop/University/Master/1st Year/1st Sem/Image Analysis/Project/poker-visual-engine/visualization_trial/IMG_4650.mp4: 384x640 (no detections), 43.2ms
video 1/1 (frame 2/1819) /Users/alessandropranzo/Library/Mobile Documents/com~apple~CloudDocs/Desktop/University/Master/1st Year/1st Sem/Image Analysis/Project/poker-visual-engine/visualization_trial/IMG_4650.mp4: 384x640 (no detections), 37.8ms
video 1/1 (frame 3/1819) /Users/alessandropranzo/Library/Mobile Documents/com~apple~CloudDocs/Desktop/University/Master/1st Year/1st Sem/Image Analysis/Project/poker-visual-engine/visualization_trial/IMG_4650.mp4: 384x640 (no detections), 32.4ms
video 1/1 (frame 4/1819) /Users/alessandropranzo/Library/Mobile Documents/com~apple~CloudDocs/Desktop/University/Master/1st Year/1st Sem/Image Analysis/Project/poker-visual-engine/visualization_trial/IMG_4650.mp4: 384x640 (no detections), 34.2ms
video 1/1 (fram

In [51]:
import cv2

# Path to the input video
input_video_path = "visualization_trial/IMG_4650.mp4"
output_video_path = "demo4.mp4"

# Load the YOLO model and predict on the video stream
video = final_model.predict(input_video_path, stream=True)

# Initialize the VideoCapture to get video properties
cap = cv2.VideoCapture(input_video_path)
fps = cap.get(cv2.CAP_PROP_FPS)  # Frames per second
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))  # Video frame width
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))  # Video frame height
cap.release()

# Initialize OpenCV VideoWriter
out = cv2.VideoWriter(
    output_video_path,
    cv2.VideoWriter_fourcc(*'mp4v'),  # Codec for .mp4 files
    fps,
    (frame_width, frame_height)
)

# Helper function to check if a bounding box is inside another
def is_inside(box1, box2):
    """
    Check if box1 is inside box2
    box = [x_min, y_min, x_max, y_max]
    """
    return box1[0] >= box2[0] and box1[1] >= box2[1] and box1[2] <= box2[2] and box1[3] <= box2[3]

# Process and write each frame to the output video
for result in video:
    frame = result.orig_img.copy()  # Original frame without bounding boxes
    
    hand_cards = []  # Cards detected in the hand
    flop_cards = []  # Cards detected in the flop
    
    # Find bounding boxes and class names
    boxes = result.boxes.xyxy.cpu().numpy()  # Bounding box coordinates
    classes = result.boxes.cls.cpu().numpy()  # Class indices
    labels = result.names  # Class label names

    hand_box = None
    flop_box = None
    
    # Identify "HAND" and "FLOP" bounding boxes
    for i, cls_idx in enumerate(classes):
        cls_name = labels[int(cls_idx)]
        if cls_name == "hand":
            hand_box = boxes[i]
        elif cls_name == "flop":
            flop_box = boxes[i]
    
    # Identify cards inside HAND and FLOP
    for i, cls_idx in enumerate(classes):
        cls_name = labels[int(cls_idx)]
        if cls_name != "hand" and cls_name != "flop":
            card_box = boxes[i]

            try:
                if hand_box is not None and is_inside(card_box, hand_box):
                    print(hand_box)
                    hand_cards.append(cls_name)
                elif flop_box is not None and is_inside(card_box, flop_box):
                    print(flop_box)
                    flop_cards.append(cls_name)
            except:
                raise Exception(f"Error processing frame")
            finally:
                print("Hand", hand_box)
                print("Flop", flop_box)
                print("Card", card_box)

    # Add bounding boxes to the frame
    frame_with_boxes = result.plot()

    # Prepare text for HAND and FLOP
    hand_text = "HAND: " + ", ".join(hand_cards) if hand_cards else "HAND: None"
    flop_text = "FLOP: " + ", ".join(flop_cards) if flop_cards else "FLOP: None"

    # Overlay text on the frame
    cv2.putText(frame_with_boxes, hand_text, (50, frame_height - 100),
                cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, cv2.LINE_AA)
    cv2.putText(frame_with_boxes, flop_text, (50, frame_height - 50),
                cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, cv2.LINE_AA)

    # Write the processed frame to the output video
    out.write(frame_with_boxes)

# Release the VideoWriter
out.release()
print(f"Processed video saved to: {output_video_path}")


video 1/1 (frame 1/1819) /Users/alessandropranzo/Library/Mobile Documents/com~apple~CloudDocs/Desktop/University/Master/1st Year/1st Sem/Image Analysis/Project/poker-visual-engine/visualization_trial/IMG_4650.mp4: 384x640 (no detections), 32.7ms
video 1/1 (frame 2/1819) /Users/alessandropranzo/Library/Mobile Documents/com~apple~CloudDocs/Desktop/University/Master/1st Year/1st Sem/Image Analysis/Project/poker-visual-engine/visualization_trial/IMG_4650.mp4: 384x640 (no detections), 32.3ms
video 1/1 (frame 3/1819) /Users/alessandropranzo/Library/Mobile Documents/com~apple~CloudDocs/Desktop/University/Master/1st Year/1st Sem/Image Analysis/Project/poker-visual-engine/visualization_trial/IMG_4650.mp4: 384x640 (no detections), 26.3ms
video 1/1 (frame 4/1819) /Users/alessandropranzo/Library/Mobile Documents/com~apple~CloudDocs/Desktop/University/Master/1st Year/1st Sem/Image Analysis/Project/poker-visual-engine/visualization_trial/IMG_4650.mp4: 384x640 (no detections), 25.5ms
video 1/1 (fram

In [54]:
import cv2
import numpy as np

# Path to the input video
input_video_path = "visualization_trial/IMG_4650.mp4"
output_video_path = "demo4.mp4"

# Load the YOLO model and predict on the video stream
video = final_model.predict(input_video_path, stream=True)

# Initialize the VideoCapture to get video properties
cap = cv2.VideoCapture(input_video_path)
fps = cap.get(cv2.CAP_PROP_FPS)  # Frames per second
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))  # Video frame width
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))  # Video frame height
cap.release()

# Initialize OpenCV VideoWriter
out = cv2.VideoWriter(
    output_video_path,
    cv2.VideoWriter_fourcc(*'mp4v'),  # Codec for .mp4 files
    fps,
    (frame_width, frame_height)
)

# Helper function to check if a bounding box is inside another
def is_inside(box1, box2):
    """
    Check if box1 is inside box2
    box = [x_min, y_min, x_max, y_max]
    """
    return box1[0] >= box2[0] and box1[1] >= box2[1] and box1[2] <= box2[2] and box1[3] <= box2[3]


def weighted_rolling_average(data):
    """
    Calculate a weighted moving average for a list of floats.
    Weights increase toward the end of the list.
    """
    n = len(data)
    if n == 0:
        return 0  # Handle empty lists
    
    # Generate weights that increase linearly: [1, 2, ..., n]
    weights = np.arange(1, n + 1)
    
    # Compute weighted average: sum(data[i] * weight[i]) / sum(weights)
    weighted_avg = np.dot(data, weights) / weights.sum()
    
    # Adjust for streak length by multiplying by log(n + 1) to favor longer streaks
    adjusted_avg = weighted_avg * np.log1p(n)  # log1p(x) is log(1 + x) to avoid issues with small n
    return adjusted_avg

# Memory for HAND and FLOP
memory_hand = dict()
memory_flop = dict()

# Process and write each frame to the output video
for result in video:
    frame = result.orig_img.copy()  # Original frame without bounding boxes
    
    current_hand_cards = []  # Cards detected in the hand
    current_flop_cards = []  # Cards detected in the flop
    
    # Find bounding boxes, class names, and confidence scores
    boxes = result.boxes.xyxy.cpu().numpy()  # Bounding box coordinates
    classes = result.boxes.cls.cpu().numpy()  # Class indices
    confidences = result.boxes.conf.cpu().numpy()  # Confidence scores
    labels = result.names  # Class label names
    
    # Identify "HAND" and "FLOP" bounding boxes
    for i, cls_idx in enumerate(classes):
        cls_name = labels[int(cls_idx)]
        if cls_name == "hand":
            hand_box = boxes[i]
        elif cls_name == "flop":
            flop_box = boxes[i]
    
    # Identify cards inside HAND and FLOP
    for i, cls_idx in enumerate(classes):
        cls_name = labels[int(cls_idx)]
        if cls_name != "hand" and cls_name != "flop":
            card_box = boxes[i]
            confidence = confidences[i]
            if hand_box is not None and is_inside(card_box, hand_box):
                if cls_name not in memory_hand:
                    memory_hand[cls_name] = []
                memory_hand[cls_name].append(confidence)
            elif flop_box is not None and is_inside(card_box, flop_box):
                if cls_name not in memory_flop:
                    memory_flop[cls_name] = []
                memory_flop[cls_name].append(confidence)


    avg_confidence_hand = {card: weighted_rolling_average(confidences) for card, confidences in memory_hand.items()}
    avg_confidence_flop = {card: weighted_rolling_average(confidences) for card, confidences in memory_flop.items()}

    current_hand = sorted(avg_confidence_hand, key=avg_confidence_hand.get, reverse=True)[:2]
    current_flop = sorted(avg_confidence_flop, key=avg_confidence_flop.get, reverse=True)[:5]

    # Add bounding boxes to the frame
    frame_with_boxes = result.plot()

    # Prepare text for HAND and FLOP
    hand_text = "HAND: " + ", ".join(current_hand) if current_hand else "HAND: None"
    flop_text = "FLOP: " + ", ".join(current_flop) if current_flop else "FLOP: None"

    # Overlay text on the frame
    cv2.putText(frame_with_boxes, hand_text, (50, frame_height - 100),
                cv2.FONT_HERSHEY_COMPLEX, 1, (0, 0, 255), 2, cv2.LINE_AA)
    cv2.putText(frame_with_boxes, flop_text, (50, frame_height - 50),
                cv2.FONT_HERSHEY_COMPLEX, 1, (0, 0, 0), 2, cv2.LINE_AA)

    # Write the processed frame to the output video
    out.write(frame_with_boxes)

# Release the VideoWriter
out.release()
print(f"Processed video saved to: {output_video_path}")


video 1/1 (frame 1/1819) /Users/alessandropranzo/Library/Mobile Documents/com~apple~CloudDocs/Desktop/University/Master/1st Year/1st Sem/Image Analysis/Project/poker-visual-engine/visualization_trial/IMG_4650.mp4: 384x640 (no detections), 37.5ms
video 1/1 (frame 2/1819) /Users/alessandropranzo/Library/Mobile Documents/com~apple~CloudDocs/Desktop/University/Master/1st Year/1st Sem/Image Analysis/Project/poker-visual-engine/visualization_trial/IMG_4650.mp4: 384x640 (no detections), 33.6ms
video 1/1 (frame 3/1819) /Users/alessandropranzo/Library/Mobile Documents/com~apple~CloudDocs/Desktop/University/Master/1st Year/1st Sem/Image Analysis/Project/poker-visual-engine/visualization_trial/IMG_4650.mp4: 384x640 (no detections), 31.4ms
video 1/1 (frame 4/1819) /Users/alessandropranzo/Library/Mobile Documents/com~apple~CloudDocs/Desktop/University/Master/1st Year/1st Sem/Image Analysis/Project/poker-visual-engine/visualization_trial/IMG_4650.mp4: 384x640 (no detections), 36.3ms
video 1/1 (fram