# Notebook for Downloading Events of a Season

### Imports

In [None]:
import os
import pandas as pd
from dotenv import load_dotenv
from sportradar_datacore_api import HandballAPI
from sportradar_datacore_api.workflows.handball.competitions import (
    # list_competitions_df,
    get_competition_id_by_name,
)
from sportradar_datacore_api.workflows.handball.seasons import (
    list_seasons_df,
    get_season_id_by_name,
)
from sportradar_datacore_api.workflows.handball.games import (
    list_season_fixtures_df,
    fetch_fixture_events_df,
)



### Configuration

In [None]:
NAME_COMPETITION = "1. Handball-Bundesliga"

NAME_SEASON = "DAIKIN HBL 2024/25"
PARAMETERS_SEASONS = {"startDate": "2024-01-01"}

PATH_TO_OUTPUT = os.path.join(
    os.getcwd(), "..", "data", "season_24_25"
)

# create path if it does not exist
os.makedirs(PATH_TO_OUTPUT, exist_ok=True)

### Initialize API

In [None]:
# Load credentials from .env_prd file
load_dotenv(".env", override=True)

# Initialize API client with environment-based credentials
def initialize_api() -> HandballAPI:
    """Load .env credentials and return an authenticated client."""
    load_dotenv(".env", override=True)
    return HandballAPI(
        base_url=os.getenv("BASE_URL", ""),
        auth_url=os.getenv("AUTH_URL", ""),
        client_id=os.getenv("CLIENT_ID", ""),
        client_secret=os.getenv("CLIENT_SECRET", ""),
        org_id=os.getenv("CLIENT_ORGANIZATION_ID"),
        scopes=["read:organization"],
        sport="handball",
    )

api = initialize_api()

### Get wanted competition ID

In [None]:
competition_id = get_competition_id_by_name(api, NAME_COMPETITION)

# Check if the competition was found
if not competition_id:
    raise ValueError(f"Competition '{NAME_COMPETITION}' not found.")
else:
    print(f"→ Competition '{NAME_COMPETITION}' -> {competition_id}")

### Get wanted season ID

In [None]:
season_id = get_season_id_by_name(api, competition_id, NAME_SEASON)

# Check if the season was found
if not season_id:
    raise ValueError(f"Season '{NAME_SEASON}' not found in competition '{NAME_COMPETITION}'.")
else:
    print(f"→ Season '{NAME_SEASON}' -> {season_id}")

### Get the fixtures (matches) of a season

In [None]:
fixtures_df = list_season_fixtures_df(api, season_id)
print(f"Found {len(fixtures_df)} fixtures.")
# sort by startTimeLocal
fixtures_df = fixtures_df.sort_values(by="startTimeLocal")
display(fixtures_df[["fixtureId", "nameLocal", "startTimeLocal", "roundNumber"]])

### define columns to keep in events dataframe

In [None]:
# TODO: put in config or something
columns_to_keep = [
    # "clientId",
    # "clientType",
    "fixtureId",
    "organizationId",
    # "received",
    # "sport",
    # "topic",
    "type",
    "data.class",
    "data.eventId",
    "data.eventTime",
    "data.eventType",
    # "data.options.attendance",
    # "data.options.numberOfPeriods",
    # "data.options.periodLength",
    # "data.status",
    "data.subType",
    "data.timestamp",
    "data.entityId",
    # "data.options.active",
    "data.options.bib",
    # "data.options.captain",
    # "data.options.name",
    "data.options.position",
    "data.options.starter",
    # "data.personId",
    # "data.options.number",
    "data.periodId",
    "data.sequence",
    "score_home",
    "score_away",
    "data.playId",
    "data.clock",
    "data.options.goalKeeperId",
    "data.options.location",
    "data.success",
    "data.x",
    "data.y",
    "data.options.failureReason",
    "data.options.attackType",
    "data.options.value",
    "data.options.emptyNet",
    # "team.added",
    # "team.ageGroup",
    # "team.alternateVenueIds",
    # "team.codeLatin",
    "team.codeLocal",
    # "team.defaultVenueId",
    # "team.discipline",
    # "team.entityGroupId",
    "team.entityId",
    "team.externalId",
    # "team.gender",
    # "team.grade",
    # "team.historicalNames",
    # "team.internationalReference",
    # "team.nameFullLatin",
    "team.nameFullLocal",
    # "team.organizationId",
    # "team.representing",
    # "team.standard",
    # "team.status",
    # "team.updated",
    # "team.additionalNames.namePlaceLatin",
    # "team.additionalNames.namePlaceLocal",
    # "team.additionalNames.nameShortLatin",
    # "team.additionalNames.nameShortLocal",
    "team.colors.primary",
    "team.colors.secondary",
    "team.colors.tertiary",
    # "team.contacts.email",
    # "team.contacts.fax",
    # "team.contacts.phone",
    # "team.entityGroup.id",
    # "team.entityGroup.resourceType",
    # "team.organization.id",
    # "team.organization.resourceType",
    # "player.added",
    # "player.deceased",
    "player.dob",
    "player.externalId",
    "player.gender",
    # "player.historicalNames",
    # "player.languageLocal",
    # "player.nameAbbreviated",
    # "player.nameFamilyLatin",
    "player.nameFamilyLocal",
    # "player.nameFullLatin",
    # "player.nameFullLocal",
    # "player.nameGivenLatin",
    "player.nameGivenLocal",
    "player.nationality",
    # "player.organizationId",
    "player.personId",
    # "player.representing",
    # "player.status",
    # "player.updated",
    "player.additionalDetails.height",
    "player.additionalDetails.weight",
    # "player.organization.id",
    # "player.organization.resourceType",
    "team_home_abbr",
    "team_away_abbr",
    "team_home_id",
    "team_away_id",
    "team_home_name",
    "team_away_name",
    "gameday",
    "team_attacking_id",
    "team_attacking_name",
    "team_attacking_side",
]

### Get Play-by-Play (Event Timeline) for every match in the season

Simple for looping. Use next cell for fast downloading using futures.

In [None]:
for _, row in fixtures_df.iterrows():
    fid = row["fixtureId"]
    print(f"\nDownloading events for fixture {fid} ...")
    events_df = fetch_fixture_events_df(api, fixture_id=fid, debug=False)
    gameday = events_df["gameday"].iloc[0]

    events_df = events_df[columns_to_keep].copy()

    # write schema based on events_df columns

    date_str = pd.to_datetime(row["startTimeLocal"]).strftime("%Y-%m-%d")
    out_path = os.path.join(
        PATH_TO_OUTPUT, f"{date_str}_gd-{gameday}_id-{fid}.csv"
    )

    # create path if it does not exist
    os.makedirs(os.path.dirname(out_path), exist_ok=True)

    events_df.to_csv(out_path, index=False, encoding="utf-8-sig")
    print(f"Saved to {os.path.abspath(out_path)}")
    
print("\nAll fixtures processed.")

In [None]:
from concurrent.futures import ThreadPoolExecutor
import threading

def process_fixture(row):
    fid = row["fixtureId"]
    print(f"Downloading events for fixture {fid} ...")
    events_df = fetch_fixture_events_df(api, fixture_id=fid, debug=False)
    gameday = events_df["gameday"].iloc[0]

    events_df = events_df[columns_to_keep].copy()

    # write schema based on events_df columns
    date_str = pd.to_datetime(row["startTimeLocal"]).strftime("%Y-%m-%d")
    out_path = os.path.join(
        PATH_TO_OUTPUT, f"{date_str}_gd-{gameday}_id-{fid}.csv"
    )

    # create path if it does not exist
    os.makedirs(os.path.dirname(out_path), exist_ok=True)

    events_df.to_csv(out_path, index=False, encoding="utf-8-sig")
    print(f"Saved to {os.path.abspath(out_path)}")

# Use ThreadPoolExecutor for multithreading
print_lock = threading.Lock()  # Optional: To synchronize print statements
with ThreadPoolExecutor(max_workers=16) as executor:
    futures = [executor.submit(process_fixture, row) for _, row in fixtures_df.iterrows()]
    for future in futures:
        try:
            future.result()  # Wait for each thread to complete
        except Exception as e:
            with print_lock:
                print(f"Error processing fixture: {e}")

print("\nAll fixtures processed.")