In [1]:

!pip install ultralytics

import numpy as np
from ultralytics import YOLO
from skimage.feature import hog
from sklearn.metrics.pairwise import cosine_similarity

Defaulting to user installation because normal site-packages is not writeable


In [2]:
pip install ultralytics

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


In [3]:
import cv2
import numpy as np
from ultralytics import YOLO
from skimage.feature import hog
from sklearn.metrics.pairwise import cosine_similarity

In [4]:

broadcast_cap = cv2.VideoCapture('broadcast.mp4')
tacticam_cap = cv2.VideoCapture('tacticam.mp4')

if not broadcast_cap.isOpened() or not tacticam_cap.isOpened():
    print("Error loading videos! Please check file paths.")
else:
    print(f"Broadcast video: {int(broadcast_cap.get(cv2.CAP_PROP_FRAME_COUNT))} frames")
    print(f"Tacticam video: {int(tacticam_cap.get(cv2.CAP_PROP_FRAME_COUNT))} frames")
    broadcast_cap.release()
    tacticam_cap.release()

Broadcast video: 132 frames
Tacticam video: 201 frames


In [5]:

model = YOLO('yolov8n.pt') 


In [6]:
def extract_features(image):
    if len(image.shape) == 2:
        image = cv2.cvtColor(image, cv2.COLOR_GRAY2RGB)
    image = cv2.resize(image, (64, 128))
    hog_features = hog(image, orientations=9, pixels_per_cell=(8, 8),
                     cells_per_block=(2, 2), channel_axis=-1)
    color_features = []
    for channel in range(3):
        hist = cv2.calcHist([image], [channel], None, [16], [0, 256])
        hist = cv2.normalize(hist, hist).flatten()
        color_features.extend(hist)
    return np.concatenate([hog_features, color_features])

In [7]:
broadcast_cap = cv2.VideoCapture('broadcast.mp4')
broadcast_players = {}
next_player_id = 1

for frame_num in range(0, 300, 30):  
    broadcast_cap.set(cv2.CAP_PROP_POS_FRAMES, frame_num)
    ret, frame = broadcast_cap.read()
    if not ret: break
    
    results = model(frame, classes=[0])  # Assuming class 0=players
    for box in results[0].boxes:
        x1, y1, x2, y2 = map(int, box.xyxy[0].tolist())
        player_img = frame[y1:y2, x1:x2]
        features = extract_features(player_img)
        
        if frame_num == 0:  # First frame - initialize all
            broadcast_players[next_player_id] = {
                'features': features,
                'color_hist': cv2.calcHist([player_img], [0,1,2], None, [8,8,8], [0,256,0,256,0,256])
            }
            next_player_id += 1
        else:  # Subsequent frames - match existing
            best_id = max(
                broadcast_players.keys(),
                key=lambda pid: cosine_similarity(
                    features.reshape(1,-1),
                    broadcast_players[pid]['features'].reshape(1,-1)
                )[0][0],
                default=None
            )
            if best_id and cosine_similarity(
                features.reshape(1,-1),
                broadcast_players[best_id]['features'].reshape(1,-1)
            )[0][0] > 0.7:
                broadcast_players[best_id]['features'] = 0.8*broadcast_players[best_id]['features'] + 0.2*features

broadcast_cap.release()
print(f"Broadcast reference created with {len(broadcast_players)} players")


0: 384x640 2 persons, 85.8ms
Speed: 7.2ms preprocess, 85.8ms inference, 2.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 13 persons, 52.0ms
Speed: 2.4ms preprocess, 52.0ms inference, 1.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 11 persons, 57.4ms
Speed: 2.6ms preprocess, 57.4ms inference, 1.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 11 persons, 68.8ms
Speed: 2.6ms preprocess, 68.8ms inference, 2.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 12 persons, 65.6ms
Speed: 2.9ms preprocess, 65.6ms inference, 1.4ms postprocess per image at shape (1, 3, 384, 640)
Broadcast reference created with 2 players


In [8]:
tacticam_cap = cv2.VideoCapture('tacticam.mp4')
mapping_results = []

for frame_num in range(0, 300, 30):  # Same frame sampling as broadcast
    tacticam_cap.set(cv2.CAP_PROP_POS_FRAMES, frame_num)
    ret, frame = tacticam_cap.read()
    if not ret: break
    
    results = model(frame, classes=[0])
    for box in results[0].boxes:
        x1, y1, x2, y2 = map(int, box.xyxy[0].tolist())
        player_img = frame[y1:y2, x1:x2]
        features = extract_features(player_img)
        color_hist = cv2.calcHist([player_img], [0,1,2], None, [8,8,8], [0,256,0,256,0,256])
        
        # Find best broadcast match
        best_match = None
        best_score = -1
        for pid, data in broadcast_players.items():
            feature_sim = cosine_similarity(
                features.reshape(1,-1),
                data['features'].reshape(1,-1)
            )[0][0]
            hist_sim = cv2.compareHist(color_hist, data['color_hist'], cv2.HISTCMP_CORREL)
            total_score = 0.6*feature_sim + 0.4*hist_sim
            
            if total_score > best_score:
                best_score = total_score
                best_match = pid
        
        if best_score > 0.6:  # Confidence threshold
            mapping_results.append({
                'tacticam_frame': frame_num,
                'position': ((x1+x2)/2, (y1+y2)/2),
                'broadcast_id': best_match,
                'confidence': best_score
            })

tacticam_cap.release()
print(f"Matching complete. Found {len(mapping_results)} cross-camera associations")


0: 384x640 15 persons, 58.3ms
Speed: 2.3ms preprocess, 58.3ms inference, 1.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 16 persons, 55.8ms
Speed: 2.8ms preprocess, 55.8ms inference, 1.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 17 persons, 61.3ms
Speed: 2.9ms preprocess, 61.3ms inference, 1.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 19 persons, 69.6ms
Speed: 2.4ms preprocess, 69.6ms inference, 1.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 20 persons, 52.7ms
Speed: 2.4ms preprocess, 52.7ms inference, 1.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 18 persons, 66.5ms
Speed: 2.6ms preprocess, 66.5ms inference, 1.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 16 persons, 114.3ms
Speed: 2.8ms preprocess, 114.3ms inference, 3.2ms postprocess per image at shape (1, 3, 384, 640)
Matching complete. Found 86 cross-camera associations


In [9]:
import pandas as pd
pd.set_option('display.max_rows', 10)

results_df = pd.DataFrame(mapping_results)
print("Sample matches:")
display(results_df.sample(5))

print("\nPlayer ID mapping summary:")
print(results_df['broadcast_id'].value_counts())

Sample matches:


Unnamed: 0,tacticam_frame,position,broadcast_id,confidence
4,0,"(1313.5, 851.0)",1,0.719284
65,150,"(1404.0, 703.0)",1,0.711388
53,120,"(676.5, 348.5)",1,0.704413
80,180,"(89.5, 849.0)",2,0.606932
62,120,"(1034.5, 970.5)",2,0.650677



Player ID mapping summary:
broadcast_id
1    82
2     4
Name: count, dtype: int64


In [12]:
def show_match(broadcast_id):
    match = results_df[results_df['broadcast_id']==broadcast_id].iloc[0]
    tacticam_cap = cv2.VideoCapture('tacticam.mp4')
    tacticam_cap.set(cv2.CAP_PROP_POS_FRAMES, match['tacticam_frame'])
    _, tacticam_frame = tacticam_cap.read()
    
    x, y = match['position']
    cv2.circle(tacticam_frame, (int(x), int(y)), 10, (0,0,255), -1)
    cv2.putText(tacticam_frame, f"ID:{broadcast_id}", (int(x)+15, int(y)), 
                cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0,0,255), 2)
    
    tacticam_frame = cv2.resize(tacticam_frame, (0,0), fx=0.5, fy=0.5)
    cv2.imshow(f"Tacticam View - Player {broadcast_id}", tacticam_frame)
    cv2.waitKey(0)
    cv2.destroyAllWindows()
    tacticam_cap.release()

# Show first 3 players
for pid in results_df['broadcast_id'].unique()[:3]:
    show_match(pid)