# Notebook for Downloading Events of a Season

### Imports

In [None]:
import os
import pandas as pd
from dotenv import load_dotenv
from sportradar_datacore_api import HandballAPI



### Configuration

In [None]:
NAME_COMPETITION = "1. Handball-Bundesliga"

NAME_SEASON = "DAIKIN HBL 2024/25"
PARAMETERS_SEASONS = {"startDate": "2024-01-01"}

### Initialize API

In [None]:
# Load credentials from .env_prd file
load_dotenv(".env", override=True)

# Initialize API client with environment-based credentials
api = HandballAPI(
    base_url=os.getenv("BASE_URL", ""),
    auth_url=os.getenv("AUTH_URL", ""),
    client_id=os.getenv("CLIENT_ID", ""),
    client_secret=os.getenv("CLIENT_SECRET", ""),
    org_id=os.getenv("CLIENT_ORGANIZATION_ID"),
    scopes=["read:organization"],
    sport="handball",
)

### Get wanted competition ID

In [None]:
competition = api.get_competitions(params={"nameLatinContains": NAME_COMPETITION})
display(competition)
competition_id = competition.get("data", [{}])[0].get("competitionId")

# Check if the competition was found
if not competition_id:
    raise ValueError(f"Competition '{NAME_COMPETITION}' not found.")
else:
    print(f"Competition ID: {competition_id}")

### Get wanted season ID

In [None]:
season = api.get_seasons(
    competition_id=competition_id, params=PARAMETERS_SEASONS
)
display(season)
season_id = season.get("data", [{}])[0].get("seasonId")
# Check if the season was found
if not season_id:
    raise ValueError(f"Season '{NAME_SEASON}' not found.")
else:
    print(f"Season ID: {season_id}")

In [None]:
import uuid
from IPython.display import display_javascript, display_html, display
import json

class RenderJSON(object):
    def __init__(self, json_data):
        if isinstance(json_data, str):
            try:
                parsed = json.loads(json_data)
                self.json_str = json.dumps(parsed)
            except json.JSONDecodeError:
                raise ValueError("Invalid JSON string")
        else:
            self.json_str = json.dumps(json_data)
        self.uuid = str(uuid.uuid4())


    def _ipython_display_(self):
        display_html('<div id="{}" style="height: 600px; width:100%;"></div>'.format(self.uuid), raw=True)
        display_javascript("""
        require(["https://rawgit.com/caldwell/renderjson/master/renderjson.js"], function() {
        document.getElementById('%s').appendChild(renderjson(%s))
        });
        """ % (self.uuid, self.json_str), raw=True)

### Get the fixtures (matches) of a season

In [None]:
import json
from IPython.display import JSON

season_fixtures = api.get_season_fixtures(season_id, params = {
        "include": "entities,organizations,persons,fixtures,competitions,seasons",
        "external": "entityId,personId",
        # "fields":"dob,firstName,organization(id),organizations[name], teams[name,details/metrics/*,tags(id)]",
        "limit": 1000,
    })
# print all keys in the response
print("Keys in the response:")
for key in season_fixtures.keys():
    print(key)
    value = season_fixtures[key]
    if isinstance(value, list):
        print(f"  Length list: {len(value)}")
    elif isinstance(value, dict):
        print(f"  Length dict: {len(value)}")

# Directly check for "includes" at top level
if "includes" in season_fixtures:
    print("Includes present at top level.")
    print("Includes keys:", list(season_fixtures["includes"].keys()))
    if "resources" in season_fixtures["includes"]:
        print("Resources found in includes:")
        for resource_key, resource_value in season_fixtures["includes"]["resources"].items():
            print(f"  {resource_key}: {len(resource_value)} items")
else: 
    print("No 'includes' key found at top level.")


In [None]:
# Convert fixture to dataframe
df_season_fixtures_data = pd.json_normalize(season_fixtures['data'], sep='_')
df_season_fixtures_includes_resources_entities = pd.DataFrame([
    {"key": key, **value} for key, value in season_fixtures.get('includes', {}).get('resources', {}).get('entities', {}).items()
])


print("season_fixtures['data']")
display(df_season_fixtures_data)
# # Convert the entities to a DataFrame
# df_season_fixtures_entities = pd.DataFrame([
#     {"key": key, **value} for key, value in season_fixtures.get('includes', {}).get('resources', {}).get('entities', {}).items()
# ])
print("season_fixtures['includes']['resources']['entities']")
display(df_season_fixtures_includes_resources_entities)



### Get Play-by-Play (Event Timeline) for every match in the season

In [None]:
play_by_play_exports = []

for index, row in df_season_fixtures_data.iterrows():
    fixture_id = row['fixtureId']

    # get more info about fixture
    fixture = api.get_fixture(fixture_id, params={
        "include": "entities,organizations,persons",
        "external": "entityId,personId",
    })
    df_fixture = pd.json_normalize(fixture["data"], sep='_')
    df_fixture_include = pd.json_normalize(fixture.get("includes", {}).get("resources", {}), sep='_')
    # print("df_fixture")
    # display(df_fixture)
    # print("df_fixture_include")
    # display(df_fixture_include)

    fixture_playbyplay_export = api.get_match_events_export(fixture_id, params={
            "include": "entities,organizations,persons,fixtures,competitions,seasons",
            "external": "entityId,personId",
            "withScores": "true",
        },)
    
    fixture_playbyplay = api.get_playbyplay(fixture_id, params={
            "include": "entities,organizations,persons,fixtures,competitions,seasons",
            "external": "entityId,personId",
        })

    
    fixture_timeline = fixture_playbyplay_export.get("data", [{}])
    df_fixture_timeline = pd.json_normalize(fixture_timeline, sep='_')
    print("df_fixture_timeline")
    display(df_fixture_timeline.head(5))

    list_entities_match = df_fixture_timeline["data_entityId"].unique().tolist()
    # remove nans
    list_entities_match = [e for e in list_entities_match if pd.notna(e)]

    # filter entities from the season fixtures in column entityId
    df_match_entities = df_season_fixtures_includes_resources_entities[
        df_season_fixtures_includes_resources_entities["entityId"].isin(list_entities_match)
    ]
    # unravel json items in the columns
    df_match_entities = pd.json_normalize(df_match_entities.to_dict(orient='records'), sep='_')
    display(df_match_entities.head(5))

    fixture_gameday = row.get("roundNumber", "")
    
    events = []
    for event in fixture_timeline:
        scores = event.get("scores", {})
        flat = {
            **{k: v for k, v in event.items() if k not in ["data", "scores"]},
            **{f"data.{k}": v for k, v in event.get("data", {}).items()},
            **{f"data.options.{k}": v for k, v in event.get("data", {}).get("options", {}).items()},
        }
        flat["raw_scores"] = scores
        events.append(flat)

    df = pd.DataFrame(events)

    df_entities = pd.DataFrame.from_dict(
        fixture.get("includes", {}).get("resources", {}).get("entities", {}),
        orient="index"
    )
    
    # df_entities = df_entities.reset_index().rename(columns={"index": "entityId"})
    df_entities["entityId"] = df_entities["entityId"]
    df_entities["abbreviation"] = df_entities["codeLocal"]

    persons = fixture_playbyplay.get("includes", {}).get("resources", {}).get("persons", {})
    df_persons = pd.DataFrame.from_dict(persons, orient="index")
    df_persons["personId"] = df_persons["personId"]

    df = df.merge(
        df_entities[["entityId", "nameFullLocal", "abbreviation"]],
        left_on="data.entityId",
        right_on="entityId",
        how="left"
    )
    df = df.merge(
        df_persons[["personId", "nameFullLocal"]],
        left_on="data.personId",
        right_on="personId",
        how="left",
        suffixes=("_entity", "_person"),
    )

    df = df.rename(columns={
        "nameFullLocal_entity": "team_name",
        "nameFullLocal_person": "player_name",
        "abbreviation": "team_abbreviation"
    })

    competitors = df_fixture.loc[0, "competitors"]
    home_team = next(c for c in competitors if c.get("isHome"))
    away_team = next(c for c in competitors if not c.get("isHome"))

    home_team_id = home_team["entityId"]
    away_team_id = away_team["entityId"]

    home_team_name = df_entities.loc[df_entities["entityId"] == home_team_id, "nameFullLocal"].iloc[0]
    away_team_name = df_entities.loc[df_entities["entityId"] == away_team_id, "nameFullLocal"].iloc[0]
    home_team_abbr = df_entities.loc[df_entities["entityId"] == home_team_id, "abbreviation"].iloc[0]
    away_team_abbr = df_entities.loc[df_entities["entityId"] == away_team_id, "abbreviation"].iloc[0]

    df["team_home_abbr"] = home_team_abbr
    df["team_away_abbr"] = away_team_abbr
    df["team_home_id"] = home_team_id
    df["team_home_name"] = home_team_name
    df["team_away_id"] = away_team_id
    df["team_away_name"] = away_team_name
    df["team_attacking_id"] = df["data.entityId"]
    df["team_attacking_name"] = df["team_name"]
    df["gameday"] = fixture_gameday

    df["score_home"] = df.apply(
        lambda row: row["raw_scores"].get(home_team_id) if isinstance(row["raw_scores"], dict) else None,
        axis=1
    )
    df["score_away"] = df.apply(
        lambda row: row["raw_scores"].get(away_team_id) if isinstance(row["raw_scores"], dict) else None,
        axis=1
    )
    df["attacking_side"] = df["team_attacking_id"].apply(
        lambda x: "home" if x == home_team_id else "away" if x == away_team_id else "unknown"
    )

    # get season information
    season_id = fixture.get("seasonId", "")

    # save to csv with naming scheme YYYY-MM-DD_gameday_GAMEDAY_id_FIXTURE-ID_ABBREVIATION-HOME-ABBREVIATION-AWAY.csv
    fixture_date = row.get("startTimeLocal", "").split("T")[0]
    fixture_gameday = f"{int(row.get('roundNumber', 0)):02}"
    fixture_id = row.get("fixtureId", "")

    fixture_abbr = f"{home_team_abbr}-{away_team_abbr}"
    fixture_abbr = fixture_abbr.replace("-", "_")

    name_file = f"{fixture_date}_gameday_{fixture_gameday}_id_{fixture_id}_{fixture_abbr}.csv"
    path = os.path.join("../data", name_file)
    df.to_csv(path, index=False)
    print(f"Fixture data saved to {path}")

In [None]:
# for fixture in season_fixtures["data"]:
#     id_fixture = fixture.get("fixtureId")
#     # Check if the fixture was found
#     if not id_fixture:
#         raise ValueError(f"Fixture not found.")
#     else:
#         print(f"Fixture ID: {id_fixture}")

#     querystring = {
#         "external": "entityId,personId",
#         # "fields": "dob,firstName,organization(id),organizations[name], teams[name,details/metrics/*,tags(id)]",
#         "hideNull": "true",
#         "include": "organizations,fixtures,entities",
#         "withScores": "true",
#     }

#     # Get fixture details
#     fixture_playbyplay_export = api.get_match_events_export(
#         id_fixture,
#         params={
#             "include": "entities,organizations,fixtures,persons",
#             "external": "entityId,personId",
#             "withScores": "true",
#         },
#         # params=querystring,
#     )

#     fixture_playbyplay = api.get_playbyplay(id_fixture, params={
#         "include": "entities,organizations,persons",
#         "external": "entityId,personId",
#     })

#     # Check if the fixture details were found
#     if not fixture_playbyplay_export:
#         raise ValueError(f"Fixture details not found.")

#     # Extract key fields into a flattened structure for easier inspection
#     fixture_data = fixture_playbyplay_export.get("data", [{}])
#     fixture_includes = fixture_playbyplay.get("includes", {})

#     fixture_gameday = fixture.get("roundNumber", {})

#     events = []
#     for event in fixture_data:
#         scores = event.get("scores", {})

#         flat = {
#             **{k: v for k, v in event.items() if k not in ["data", "scores"]},
#             **{f"data.{k}": v for k, v in event.get("data", {}).items()},
#             **{
#                 f"data.options.{k}": v
#                 for k, v in event.get("data", {}).get("options", {}).items()
#             },
#         }

#         flat["raw_scores"] = scores
#         events.append(flat)

#     df = pd.DataFrame(events)

#     entities = fixture_includes.get("resources", {}).get("entities", {})
#     df_entities = pd.DataFrame.from_dict(entities, orient="index")
#     df_entities["entityId"] = df_entities["entityId"]
#     df_entities["abbreviation"] = df_entities["codeLocal"]

#     persons = fixture_includes.get("resources", {}).get("persons", {})
#     df_persons = pd.DataFrame.from_dict(persons, orient="index")
#     df_persons["personId"] = df_persons["personId"]

#     df = df.merge(
#         df_entities[["entityId", "nameFullLocal", "abbreviation"]],
#         left_on="data.entityId",
#         right_on="entityId",
#         how="left"
#     )
#     df = df.merge(
#         df_persons[["personId", "nameFullLocal"]],
#         left_on="data.personId",
#         right_on="personId",
#         how="left",
#         suffixes=("_entity", "_person"),
#     )

#     df = df.rename(
#         columns={
#             "nameFullLocal_entity": "team_name",
#             "nameFullLocal_person": "player_name",
#             "abbreviation": "team_abbreviation"
#         }
#     )

#     # Extract home/away teams from fixture metadata
#     competitors = fixture.get("competitors", [])
#     home_team = next(c for c in competitors if c.get("isHome"))
#     away_team = next(c for c in competitors if not c.get("isHome"))

#     home_team_id = home_team["entityId"]
#     away_team_id = away_team["entityId"]

#     home_team_name = df_entities.loc[df_entities["entityId"] == home_team_id, "nameFullLocal"].iloc[0]
#     away_team_name = df_entities.loc[df_entities["entityId"] == away_team_id, "nameFullLocal"].iloc[0]

#     home_team_abbr = df_entities.loc[df_entities["entityId"] == home_team_id, "abbreviation"].iloc[0]
#     away_team_abbr = df_entities.loc[df_entities["entityId"] == away_team_id, "abbreviation"].iloc[0]

#     df["team_home_abbr"] = home_team_abbr
#     df["team_away_abbr"] = away_team_abbr



#     df['team_home_id'] = home_team_id
#     df['team_home_name'] = home_team_name
#     df['team_away_id'] = away_team_id
#     df['team_away_name'] = away_team_name
#     df['team_attacking_id'] = df['data.entityId']
#     df['team_attacking_name'] = df['team_name']
#     df['gameday'] = fixture_gameday

#     df['score_home'] = df.apply(
#         lambda row: row['raw_scores'].get(row['team_home_id']) if isinstance(row['raw_scores'], dict) else None,
#         axis=1
#     )
#     df['score_away'] = df.apply(
#         lambda row: row['raw_scores'].get(row['team_away_id']) if isinstance(row['raw_scores'], dict) else None,
#         axis=1
#     )

#     df['attacking_side'] = df['team_attacking_id'].apply(
#         lambda x: 'home' if x == home_team_id else 'away' if x == away_team_id else 'unknown'
#     )

#     # save to csv with naming scheme YYYY-MM-DD_gameday_GAMEDAY_id_FIXTURE-ID_ABBREVIATION-HOME-ABBREVIATION-AWAY.csv
#     fixture_date = fixture.get("startTimeLocal", {}).split("T")[0]
#     fixture_gameday = f"{int(fixture.get('roundNumber', 0)):02}"
#     fixture_id = fixture.get("fixtureId", {})

#     fixture_abbr = f"{home_team_abbr}-{away_team_abbr}"
#     fixture_abbr = fixture_abbr.replace("-", "_")

#     name_file = f"{fixture_date}_gameday_{fixture_gameday}_id_{fixture_id}_{fixture_abbr}.csv"
#     path = os.path.join("../data", name_file)
#     df.to_csv(path, index=False)
#     print(f"Fixture data saved to {path}")






#     # break

In [None]:
import os
import pandas as pd
from concurrent.futures import ThreadPoolExecutor, as_completed


for fixture in season_fixtures["data"].items():
#     process_fixture(fixture)
# def process_fixture(fixture):
    id_fixture = fixture.get("fixtureId")
    if not id_fixture:
        continue  # Skip if fixture ID is not found

    try:
        fixture_gameday = fixture.get("roundNumber", {})
        fixture_date = fixture.get("startTimeLocal", {}).split("T")[0]
        fixture_id = fixture.get("fixtureId", {})

        # Get fixture details
        fixture_playbyplay_export = api.get_match_events_export(
            id_fixture,
            params={
                "include": "entities,organizations,fixtures,persons",
                "external": "entityId,personId",
                "withScores": "true",
            },
        )

        fixture_playbyplay = api.get_fixture(id_fixture, params={
            "include": "entities,organizations,persons",
            "external": "entityId,personId",
        })

        if not fixture_playbyplay_export:
            # return
            continue

        fixture_timeline = fixture_playbyplay_export.get("data", [{}])
        fixture_includes = fixture_playbyplay.get("includes", {})

        events = []
        for event in fixture_timeline:
            scores = event.get("scores", {})
            flat = {
                **{k: v for k, v in event.items() if k not in ["data", "scores"]},
                **{f"data.{k}": v for k, v in event.get("data", {}).items()},
                **{f"data.options.{k}": v for k, v in event.get("data", {}).get("options", {}).items()},
            }
            flat["raw_scores"] = scores
            events.append(flat)

        df = pd.DataFrame(events)

        entities = fixture.get("resources", {}).get("entities", {})
        df_entities = pd.DataFrame.from_dict(entities, orient="index")
        df_entities["entityId"] = df_entities["entityId"]
        df_entities["abbreviation"] = df_entities["codeLocal"]

        persons = fixture_includes.get("resources", {}).get("persons", {})
        df_persons = pd.DataFrame.from_dict(persons, orient="index")
        df_persons["personId"] = df_persons["personId"]

        df = df.merge(
            df_entities[["entityId", "nameFullLocal", "abbreviation"]],
            left_on="data.entityId",
            right_on="entityId",
            how="left"
        )
        df = df.merge(
            df_persons[["personId", "nameFullLocal"]],
            left_on="data.personId",
            right_on="personId",
            how="left",
            suffixes=("_entity", "_person"),
        )

        df = df.rename(columns={
            "nameFullLocal_entity": "team_name",
            "nameFullLocal_person": "player_name",
            "abbreviation": "team_abbreviation"
        })

        competitors = fixture.get("competitors", [])
        home_team = next(c for c in competitors if c.get("isHome"))
        away_team = next(c for c in competitors if not c.get("isHome"))

        home_team_id = home_team["entityId"]
        away_team_id = away_team["entityId"]

        home_team_name = df_entities.loc[df_entities["entityId"] == home_team_id, "nameFullLocal"].iloc[0]
        away_team_name = df_entities.loc[df_entities["entityId"] == away_team_id, "nameFullLocal"].iloc[0]
        home_team_abbr = df_entities.loc[df_entities["entityId"] == home_team_id, "abbreviation"].iloc[0]
        away_team_abbr = df_entities.loc[df_entities["entityId"] == away_team_id, "abbreviation"].iloc[0]

        df["team_home_abbr"] = home_team_abbr
        df["team_away_abbr"] = away_team_abbr
        df["team_home_id"] = home_team_id
        df["team_home_name"] = home_team_name
        df["team_away_id"] = away_team_id
        df["team_away_name"] = away_team_name
        df["team_attacking_id"] = df["data.entityId"]
        df["team_attacking_name"] = df["team_name"]
        df["gameday"] = fixture_gameday

        df["score_home"] = df.apply(
            lambda row: row["raw_scores"].get(home_team_id) if isinstance(row["raw_scores"], dict) else None,
            axis=1
        )
        df["score_away"] = df.apply(
            lambda row: row["raw_scores"].get(away_team_id) if isinstance(row["raw_scores"], dict) else None,
            axis=1
        )
        df["attacking_side"] = df["team_attacking_id"].apply(
            lambda x: "home" if x == home_team_id else "away" if x == away_team_id else "unknown"
        )

        fixture_abbr = f"{home_team_abbr}-{away_team_abbr}".replace("-", "_")
        fixture_gameday_str = f"{int(fixture_gameday):02}"
        name_file = f"{fixture_date}_gameday_{fixture_gameday_str}_id_{fixture_id}_{fixture_abbr}.csv"
        path = os.path.join("../data", name_file)
        df.to_csv(path, index=False)
        print(f"Saved: {path}")
    except Exception as e:
        print(f"Failed to process fixture {id_fixture}: {e}")


# Run in a normal loop
# for fixture in season_fixtures["data"]:
#     process_fixture(fixture)

# Run in parallel
# with ThreadPoolExecutor(max_workers=12) as executor:
#     futures = [executor.submit(process_fixture, f) for f in season_fixtures["data"]]
#     for future in as_completed(futures):
#         future.result()