In [23]:
import pandas as pd
import numpy as np

In [37]:
rng = np.random.default_rng()

# Matches Generation

In [24]:
def generate_matches(opponents_array, num_games=10, start_date="2025-01-15", freq="W"):
    dates = pd.date_range(start_date, periods=num_games, freq=freq)
    selected_opponents = np.random.choice(opponents_array, size=num_games)
    
    matches = pd.DataFrame({
        "date": dates,
        "opponent": selected_opponents
    })
    
    return matches

In [25]:
opponents = ["Fábrica de Azulejos Coentrão", "Comunidade Judaica de Vila Real", "Sporting Clube de Cascos de Rolha", "Associação Desportiva de Cascos de Rolha"]
matches_df = generate_matches(opponents, num_games=10, freq="W")
matches_df.insert(0, "match_id", np.arange(len(matches_df), dtype=int))
matches_df

Unnamed: 0,match_id,date,opponent
0,0,2025-01-19,Fábrica de Azulejos Coentrão
1,1,2025-01-26,Associação Desportiva de Cascos de Rolha
2,2,2025-02-02,Fábrica de Azulejos Coentrão
3,3,2025-02-09,Associação Desportiva de Cascos de Rolha
4,4,2025-02-16,Fábrica de Azulejos Coentrão
5,5,2025-02-23,Fábrica de Azulejos Coentrão
6,6,2025-03-02,Fábrica de Azulejos Coentrão
7,7,2025-03-09,Sporting Clube de Cascos de Rolha
8,8,2025-03-16,Comunidade Judaica de Vila Real
9,9,2025-03-23,Fábrica de Azulejos Coentrão


# Appearances Generation

In [None]:
def generate_appearances(matches_df, starter_id=0, starter_weight=0.7, substitute_chance=0.15, available_players=[0, 1, 2, 3]):
    appearances = []

    for _, match in matches_df.iterrows():
        match_id = match["match_id"]
        players_in_match = []  # Track players already used in this match

        num_appearances = 2 if np.random.random() < substitute_chance else 1

        for appearance_num in range(num_appearances):
            # Pool of players not yet used in this match
            available_pool = [p for p in available_players if p not in players_in_match]
            if not available_pool:
                break  # No unique players left

            # Prefer starter if available and weighted
            prefer_starter = (np.random.random() < starter_weight) and (starter_id in available_pool)
            if prefer_starter:
                player_id = starter_id
            else:
                # Choose from non-starters first; fall back to any available if necessary
                non_starter_pool = [p for p in available_pool if p != starter_id]
                choice_pool = non_starter_pool if non_starter_pool else available_pool
                player_id = np.random.choice(choice_pool)

            players_in_match.append(player_id)
            appearances.append({
                "match_id": match_id,
                "player_id": player_id,
                "appearance_number": appearance_num + 1
            })

    appearances_df = pd.DataFrame(appearances)
    appearances_df.insert(0, "appearance_id", np.arange(len(appearances_df), dtype=int))
    return appearances_df

In [31]:
appearances_df = generate_appearances(matches_df, starter_id=0, starter_weight=0.7, substitute_chance=0.15)
appearances_df

Unnamed: 0,appearance_id,match_id,player_id,appearance_number
0,0,0,0,1
1,1,1,1,1
2,2,2,0,1
3,3,2,2,2
4,4,3,0,1
5,5,4,0,1
6,6,5,0,1
7,7,6,0,1
8,8,7,0,1
9,9,8,0,1


In [32]:
dup_check = appearances_df.groupby(["match_id", "player_id"]).size().reset_index(name="count")
dup_check[dup_check["count"] > 1]

Unnamed: 0,match_id,player_id,count


# Shots Generation

In [35]:
players_df = pd.read_excel("Synthetic Data.xlsx")
players_df.head()

Unnamed: 0,player_id,name,age,base_torso_x,base_torso_y,upper_arm_length,forearm_length,thigh_length,shin_length,head_vertical_offset,shoulder_horizontal_offset,shoulder_vertical_offset,hip_horizontal_offset,hip_vertical_offset,agility,presence,flexibility,reflexes
0,0,Pedro Farelo,29,13,15,3.5,3.0,6.0,4.5,2.5,2.0,1.5,1.5,3.0,90,85,85,80
1,1,Vasco Palmeirim,23,13,15,3.3,3.1,6.0,4.3,2.4,1.8,1.4,1.5,2.9,80,90,80,75
2,2,José Saramago,38,11,15,3.6,3.0,6.1,4.5,2.5,2.0,1.7,1.5,3.0,70,90,90,60
3,3,Eça de Queirós,31,13,14,3.5,2.9,6.2,4.6,2.6,2.0,1.5,1.5,3.1,75,65,70,70


In [33]:
def generate_shots(appearances_df, players_df, shots_per_appearance_range=(50, 75)):
    shots = []
    
    for _, appearance in appearances_df.iterrows():
        appearance_id = appearance["appearance_id"]
        player_id = appearance["player_id"]
        
        player = players_df[players_df["player_id"] == player_id].iloc[0]
        presence = player["presence"]
        
        x_min = 0 + presence * 0.01
        x_max = 27 - presence * 0.01
        y_min = 0 + presence * 0.01
        y_max = 27 - presence * 0.01
        
        num_shots = np.random.randint(shots_per_appearance_range[0], shots_per_appearance_range[1] + 1)
        
        for _ in range(num_shots):
            shots.append({
                "appearance_id": appearance_id,
                "match_id": appearance["match_id"],
                "player_id": player_id,
                "x": rng.uniform(x_min, x_max),
                "y": rng.uniform(y_min, y_max),
                "velocity": rng.lognormal(mean=4.55, sigma=0.16)
            })
    
    shots_df = pd.DataFrame(shots)
    shots_df.insert(0, "shot_id", np.arange(len(shots_df), dtype=int))
    return shots_df


In [40]:
shots_df = generate_shots(appearances_df, players_df)
shots_df.head(20)

Unnamed: 0,shot_id,appearance_id,match_id,player_id,x,y,velocity
0,0,0,0,0,7.120633,17.37901,121.123561
1,1,0,0,0,26.149327,13.629128,104.561212
2,2,0,0,0,4.545165,9.87805,90.276675
3,3,0,0,0,25.743936,8.237526,85.457989
4,4,0,0,0,12.704846,4.167894,77.912958
5,5,0,0,0,2.593464,25.468946,73.921179
6,6,0,0,0,14.337214,10.560967,112.639504
7,7,0,0,0,9.350535,9.7499,83.648728
8,8,0,0,0,10.893455,9.569219,108.5087
9,9,0,0,0,1.678062,24.866253,97.700626


In [48]:
from physicsbasedposes import generate_pose, evaluate_save

ImportError: cannot import name 'evaluate_save' from 'physicsbasedposes' (c:\Users\joaod\Documents\GitHub\synthetic-goalkeeping-data\physicsbasedposes.py)

In [46]:
from physicsbasedposes import generate_pose, evaluate_save

BODY_NODES = [
    'torso', 'head',
    'left_shoulder', 'left_elbow', 'left_hand',
    'left_hip', 'left_knee', 'left_foot',
    'right_shoulder', 'right_elbow', 'right_hand',
    'right_hip', 'right_knee', 'right_foot'
]

# Initialize pose columns and isgoal column
for node in BODY_NODES:
    shots_df[f'{node}_x'] = 0.0
    shots_df[f'{node}_y'] = 0.0
shots_df['isgoal'] = False

# Generate poses and populate columns
for idx, row in shots_df.iterrows():
    player_id = row["player_id"]
    shot_coords = [row['x'], row['y']]
    shot_velocity = row['velocity']
    
    try:
        pose = generate_pose(player_id, shot_coords, shot_velocity)
        
        # Evaluate if it's a goal
        save_result = evaluate_save(pose, shot_coords, radius=1.0)
        shots_df.at[idx, 'isgoal'] = not save_result['saved']
        
        # Store pose coordinates
        for node in BODY_NODES:
            shots_df.at[idx, f'{node}_x'] = pose[node]['x']
            shots_df.at[idx, f'{node}_y'] = pose[node]['y']
    except Exception as e:
        print(f"Error processing shot {row['shot_id']}: {e}")

print(f"Added {len(BODY_NODES) * 2} pose columns and 'isgoal' column")
print(f"Goals: {shots_df['isgoal'].sum()}, Saves: {(~shots_df['isgoal']).sum()}")

ImportError: cannot import name 'evaluate_save' from 'physicsbasedposes' (c:\Users\joaod\Documents\GitHub\synthetic-goalkeeping-data\physicsbasedposes.py)

In [None]:
poses = generate_poses_for_shots(shots_df, players_df)

AttributeError: 'list' object has no attribute 'head'