In [None]:
import pandas as pd
import numpy as np

In [None]:
rng = np.random.default_rng()

# Shots Table

In [None]:
df = pd.read_excel("Synthetic Data.xlsx", sheet_name=1)
df.head()

In [None]:
def generate_synthetic(df, n_rows):
    synthetic = pd.DataFrame(index=range(n_rows))
    start = pd.Timestamp("2026-01-01 00:00:00")
    end = pd.Timestamp("2026-01-01 01:00:00")
    
    for col in df.select_dtypes(include=np.number):
        if col == "appearance":
            synthetic[col] = rng.integers(25, size=n_rows)
        else:
            synthetic[col] = rng.integers(27, size=n_rows)
    
    for col in df.select_dtypes(exclude=np.number):
        freq = df[col].value_counts(normalize=True)
        synthetic[col] = rng.choice(
            freq.index,
            size=n_rows,
            p=freq.values
        )
    
    # Generate random timestamps from 00:00:00 to 59:59:59
    random_seconds = rng.integers(
        0,
        3600,  # 60 minutes * 60 seconds = 3600 seconds (00:00 to 59:59)
        size=n_rows
    )

    synthetic["timestamp"] = start + pd.to_timedelta(random_seconds, unit="s")
    
    return synthetic

In [None]:
synth = generate_synthetic(df, n_rows=500)
synth.head(20)

In [None]:
synth["appearance"].value_counts().sort_values(ascending=False)

# Appearances Table

In [None]:
app_df = synth.groupby("appearance")["isgoal"].sum().reset_index(name="total_goals")
app_df.info()

In [None]:
n_players = 4

player_ids = np.arange(0, n_players)
player_probs = np.array([0.5, 0.2, 0.2, 0.1], dtype=float)
player_probs = player_probs / player_probs.sum()
app_df["player_id"] = rng.choice(player_ids, size=len(app_df), p=player_probs)

app_df["match_id"] = rng.integers(0, 23, size=len(app_df))

In [None]:
app_df["player_id"].value_counts()

In [None]:
app_df.head(10)

# Players Table

In [None]:
player_df = app_df.groupby("player_id")["total_goals"].sum().reset_index(name="total_goals")
player_df.info()

In [None]:
player_df.head()

# Matches Table

In [None]:
match_df = app_df.groupby("match_id")["total_goals"].sum().reset_index(name="conceded")
match_df.head(10)

In [None]:
with pd.ExcelWriter('output.xlsx') as writer:
    synth.to_excel(writer, sheet_name='Shots', index=False)
    app_df.to_excel(writer, sheet_name='Appearances', index=False)
    player_df.to_excel(writer, sheet_name='Players', index=False)
    match_df.to_excel(writer, sheet_name='Matches', index=False)