In [3]:
import pandas as pd
import numpy as np

In [4]:
rng = np.random.default_rng()

In [18]:
from physicsbasedposes import generate_pose, evaluate_save, pose_to_dataframe

# Matches Generation

In [6]:
def generate_matches(opponents_array, num_games=10, start_date="2025-01-15", freq="W"):
    dates = pd.date_range(start_date, periods=num_games, freq=freq)
    selected_opponents = np.random.choice(opponents_array, size=num_games)
    
    matches = pd.DataFrame({
        "date": dates,
        "opponent": selected_opponents
    })
    
    return matches

In [7]:
opponents = ["Fábrica de Azulejos Coentrão", "Comunidade Judaica de Vila Real", "Sporting Clube de Cascos de Rolha", "Associação Desportiva de Cascos de Rolha"]
matches_df = generate_matches(opponents, num_games=10, freq="W")
matches_df.insert(0, "match_id", np.arange(len(matches_df), dtype=int))
matches_df

Unnamed: 0,match_id,date,opponent
0,0,2025-01-19,Sporting Clube de Cascos de Rolha
1,1,2025-01-26,Comunidade Judaica de Vila Real
2,2,2025-02-02,Fábrica de Azulejos Coentrão
3,3,2025-02-09,Sporting Clube de Cascos de Rolha
4,4,2025-02-16,Sporting Clube de Cascos de Rolha
5,5,2025-02-23,Comunidade Judaica de Vila Real
6,6,2025-03-02,Sporting Clube de Cascos de Rolha
7,7,2025-03-09,Sporting Clube de Cascos de Rolha
8,8,2025-03-16,Sporting Clube de Cascos de Rolha
9,9,2025-03-23,Fábrica de Azulejos Coentrão


# Appearances Generation

In [8]:
def generate_appearances(matches_df, starter_id=0, starter_weight=0.7, substitute_chance=0.15, available_players=[0, 1, 2, 3]):
    appearances = []

    for _, match in matches_df.iterrows():
        match_id = match["match_id"]
        players_in_match = []  # Track players already used in this match

        num_appearances = 2 if np.random.random() < substitute_chance else 1

        for appearance_num in range(num_appearances):
            # Pool of players not yet used in this match
            available_pool = [p for p in available_players if p not in players_in_match]
            if not available_pool:
                break  # No unique players left

            # Prefer starter if available and weighted
            prefer_starter = (np.random.random() < starter_weight) and (starter_id in available_pool)
            if prefer_starter:
                player_id = starter_id
            else:
                # Choose from non-starters first; fall back to any available if necessary
                non_starter_pool = [p for p in available_pool if p != starter_id]
                choice_pool = non_starter_pool if non_starter_pool else available_pool
                player_id = np.random.choice(choice_pool)

            players_in_match.append(player_id)
            appearances.append({
                "match_id": match_id,
                "player_id": player_id,
                "appearance_number": appearance_num + 1
            })

    appearances_df = pd.DataFrame(appearances)
    appearances_df.insert(0, "appearance_id", np.arange(len(appearances_df), dtype=int))
    return appearances_df

In [9]:
appearances_df = generate_appearances(matches_df, starter_id=0, starter_weight=0.7, substitute_chance=0.15)
appearances_df

Unnamed: 0,appearance_id,match_id,player_id,appearance_number
0,0,0,3,1
1,1,1,3,1
2,2,1,0,2
3,3,2,0,1
4,4,3,0,1
5,5,4,1,1
6,6,5,2,1
7,7,6,0,1
8,8,7,0,1
9,9,8,0,1


In [10]:
dup_check = appearances_df.groupby(["match_id", "player_id"]).size().reset_index(name="count")
dup_check[dup_check["count"] > 1]

Unnamed: 0,match_id,player_id,count


# Shots Generation

In [11]:
players_df = pd.read_excel("Synthetic Data.xlsx")
players_df.head()

Unnamed: 0,player_id,name,age,base_torso_x,base_torso_y,upper_arm_length,forearm_length,thigh_length,shin_length,head_vertical_offset,shoulder_horizontal_offset,shoulder_vertical_offset,hip_horizontal_offset,hip_vertical_offset,agility,presence,flexibility,reflexes
0,0,Pedro Farelo,29,13,15,3.5,3.0,6.0,4.5,2.5,2.0,1.5,1.5,3.0,90,85,85,80
1,1,Vasco Palmeirim,23,13,15,3.3,3.1,6.0,4.3,2.4,1.8,1.4,1.5,2.9,80,90,80,75
2,2,José Saramago,38,11,15,3.6,3.0,6.1,4.5,2.5,2.0,1.7,1.5,3.0,70,90,90,60
3,3,Eça de Queirós,31,13,14,3.5,2.9,6.2,4.6,2.6,2.0,1.5,1.5,3.1,75,65,70,70


In [12]:
def generate_shots(appearances_df, players_df, shots_per_appearance_range=(50, 75)):
    shots = []
    start_time=pd.Timestamp("2026-01-01 00:00")
    
    for _, appearance in appearances_df.iterrows():
        appearance_id = appearance["appearance_id"]
        player_id = appearance["player_id"]
        
        player = players_df[players_df["player_id"] == player_id].iloc[0]
        presence = player["presence"]
        
        x_min = 0 + presence * 0.01
        x_max = 27 - presence * 0.01
        y_min = 0 + presence * 0.01
        y_max = 27 - presence * 0.01
        
        num_shots = np.random.randint(shots_per_appearance_range[0], shots_per_appearance_range[1] + 1)
        
        for _ in range(num_shots):
            shots.append({
                "appearance_id": appearance_id,
                "match_id": appearance["match_id"],
                "player_id": player_id,
                "x": rng.uniform(x_min, x_max),
                "y": rng.uniform(y_min, y_max),
                "velocity": rng.lognormal(mean=4.55, sigma=0.16),
                "timestamp": start_time + pd.to_timedelta(rng.uniform(0, 3600),unit="s")
            })
    
    shots_df = pd.DataFrame(shots)
    shots_df.insert(0, "shot_id", np.arange(len(shots_df), dtype=int))
    return shots_df

In [None]:
shots_df = generate_shots(appearances_df, players_df)
shots_df.head(15)

Unnamed: 0,shot_id,appearance_id,match_id,player_id,x,y,velocity,timestamp
0,0,0,0,3,1.199349,2.920744,74.297915,2026-01-01 00:09:38.906051919
1,1,0,0,3,21.14402,15.826254,101.184202,2026-01-01 00:42:14.164854669
2,2,0,0,3,3.532768,12.605381,64.068811,2026-01-01 00:49:40.170411242
3,3,0,0,3,24.575365,2.633133,76.007628,2026-01-01 00:56:26.122701596
4,4,0,0,3,11.103867,23.43241,84.270891,2026-01-01 00:31:25.999133496
5,5,0,0,3,2.301567,1.982534,107.140765,2026-01-01 00:11:02.654356210
6,6,0,0,3,17.025762,21.171631,78.419812,2026-01-01 00:44:17.295902631
7,7,0,0,3,4.6775,24.414911,84.47404,2026-01-01 00:15:56.187171736
8,8,0,0,3,7.994096,22.863094,84.056164,2026-01-01 00:42:10.269300955
9,9,0,0,3,2.140233,22.128941,87.624323,2026-01-01 00:01:28.457517808


In [22]:
results = []
for _, shot in shots_df.iterrows():
    pose = generate_pose(shot['player_id'], [shot['x'], shot['y']], shot['velocity'])
    
    flat_pose = {}
    for joint_name, coords in pose.items():
        flat_pose[f'{joint_name}_x'] = coords['x']
        flat_pose[f'{joint_name}_y'] = coords['y']
    
    pose_df = pose_to_dataframe(pose)
    eval_result = evaluate_save(pose_df, [shot['x'], shot['y']], radius=1.0)
    
    combined = {**flat_pose, **eval_result}
    results.append(combined)

shots_df = pd.concat([shots_df, pd.DataFrame(results)], axis=1)
shots_df.head(15)

Unnamed: 0,shot_id,appearance_id,match_id,player_id,x,y,velocity,timestamp,torso_x,torso_y,...,right_hip_x,right_hip_y,right_knee_x,right_knee_y,right_foot_x,right_foot_y,nearest_node,distance,radius,saved
0,0,0,0,3,1.199349,2.920744,74.297915,2026-01-01 00:09:38.906051919,9.55065,10.759987,...,11.05065,7.659987,16.654097,5.006427,12.268122,3.619624,left_foot,3.236553,1.0,False
1,1,0,0,3,21.14402,15.826254,101.184202,2026-01-01 00:42:14.164854669,14.710301,14.364132,...,16.210301,11.264132,12.458702,6.32799,16.609648,4.345657,right_hand,1.471743,1.0,False
2,2,0,0,3,3.532768,12.605381,64.068811,2026-01-01 00:49:40.170411242,9.789152,13.505692,...,11.289152,10.405692,16.731605,7.435894,12.651189,5.31217,left_foot,1.769154,1.0,False
3,3,0,0,3,24.575365,2.633133,76.007628,2026-01-01 00:56:26.122701596,16.258419,10.750723,...,17.758419,7.650723,23.871418,8.685728,22.145021,4.421979,right_foot,3.017704,1.0,False
4,4,0,0,3,11.103867,23.43241,84.270891,2026-01-01 00:31:25.999133496,12.48989,16.388203,...,13.98989,13.288203,9.727198,8.786042,13.478953,6.1244,left_hand,1.934416,1.0,False
5,5,0,0,3,2.301567,1.982534,107.140765,2026-01-01 00:11:02.654356210,10.822148,11.556718,...,12.322148,8.456718,17.972668,5.904929,13.642814,4.351749,left_foot,3.07326,1.0,False
6,6,0,0,3,17.025762,21.171631,78.419812,2026-01-01 00:44:17.295902631,14.081805,15.9467,...,15.581805,12.8467,11.643479,8.058215,15.640979,5.782255,right_hand,1.286446,1.0,False
7,7,0,0,3,4.6775,24.414911,84.47404,2026-01-01 00:15:56.187171736,10.850879,16.63316,...,12.350879,13.53316,8.389629,8.763621,12.30635,6.351298,left_hand,3.285717,1.0,False
8,8,0,0,3,7.994096,22.863094,84.056164,2026-01-01 00:42:10.269300955,11.69101,16.248341,...,13.19101,13.148341,9.423555,8.224289,13.433562,5.970438,left_hand,1.747677,1.0,False
9,9,0,0,3,2.140233,22.128941,87.624323,2026-01-01 00:01:28.457517808,10.30295,15.975129,...,11.80295,12.875129,7.903029,8.055314,11.850283,5.693285,left_hand,3.466502,1.0,False


In [21]:
print(f"Goals: {shots_with_full_data_df['saved'].sum()}, Saves: {(~shots_with_full_data_df['saved']).sum()}")

Goals: 234, Saves: 449
