In [2]:
import json
import numpy as np
import os
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report

# update as needed
bounding_boxes_json_path = "/Users/thomasmcconnell/Library/CloudStorage/OneDrive-PomonaCollege/School/CS153/Project/cs153-football-formation-id/yolo_player_detector/player_bboxes.json"
dataset_json_path = "/Users/thomasmcconnell/Library/CloudStorage/OneDrive-PomonaCollege/School/CS153/Project/cs153-football-formation-id/sample_data/sample_dataset.json"

with open(bounding_boxes_json_path, "r") as f:
    all_detections = json.load(f)
    
with open(dataset_json_path, "r") as f:
    dataset_metadata = [json.loads(line) for line in f]

Now that we have bounding boxes for player detection, we will create a model that uses these bounding boxes as input to predict formations.

In [4]:
# look for formation
video_to_formation = {}
for entry in dataset_metadata:
    video_to_formation[entry["video_path"]] = entry["off_formation"]

X = []  # Feature vectors
y = []  # Formation labels

for detection in all_detections:
    filename = detection["image_filename"]
    video_path = filename.replace("cropped_sideline_", "").replace(".png", "")
    formation_label = video_to_formation.get(video_path, None)
    if formation_label is None:
        continue

    # build feature vector
    player_positions = []
    for player in detection["players"]:
        x1, y1, x2, y2 = player["bbox"]
        cx = (x1 + x2) / 2  # center x
        cy = (y1 + y2) / 2  # center y
        player_positions.append([cx, cy])

    # flatten the player positions so we can use them as features
    max_players = 20
    flattened = np.array(player_positions).flatten()
    if len(flattened) < max_players * 2:
        flattened = np.pad(flattened, (0, max_players * 2 - len(flattened)))
    else:
        flattened = flattened[:max_players * 2]

    X.append(flattened)
    y.append(formation_label)

X = np.array(X)
y = np.array(y)

# train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# train random forest
clf = RandomForestClassifier(n_estimators=50, random_state=42)
clf.fit(X_train, y_train)

# evaluate
y_pred = clf.predict(X_test)

print(classification_report(y_test, y_pred))


              precision    recall  f1-score   support

        Aces       1.00      1.00      1.00         1
  KINGSSPLIT       0.50      1.00      0.67         1
      QUEENS       0.00      0.00      0.00         1

    accuracy                           0.67         3
   macro avg       0.50      0.67      0.56         3
weighted avg       0.50      0.67      0.56         3



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
