In [1]:
import httpx
import duckdb

API_URL = "https://saisonmanager.de/api/v2"

leagues_url = API_URL + "/leagues.json"
league_url = API_URL + "/leagues/{league_id}.json"
standings_url = API_URL + "/leagues/{league_id}/table.json"
scorers_url = API_URL + "/leagues/{league_id}/scorer.json"
schedule_url = API_URL + "/leagues/{league_id}/schedule.json"
game_url = API_URL + "/games/{game_id}.json"

conn = duckdb.connect()


In [2]:
leagues = conn.sql(
    f"""
CREATE OR REPLACE TABLE leagues AS
    SELECT id, game_operation, name, season::INT AS season
    FROM '{leagues_url}'
    WHERE
        game_operation = 'Floorball Deutschland'
        AND NOT contains(lower(name), 'junior')
        AND NOT contains(lower(name), 'kleinfeld')
        AND NOT contains(lower(name), 'kf')
        AND NOT contains(lower(name), 'damen')
    ORDER BY season::INT DESC;
SELECT * FROM leagues;
"""
).df()
leagues

Unnamed: 0,id,game_operation,name,season
0,1442,Floorball Deutschland,1. FBL Herren,15
1,1513,Floorball Deutschland,1. FBL Herren - Playoffs,15
2,1514,Floorball Deutschland,1. FBL Herren - Playdowns,15
3,1444,Floorball Deutschland,2. FBL Herren Nord/West,15
4,1445,Floorball Deutschland,2. FBL Herren Süd/West,15
...,...,...,...,...
81,11,Floorball Deutschland,Regionalligameisterschaft Nord/West,6
82,197,Floorball Deutschland,1. FBL Herren,6
83,378,Floorball Deutschland,2. FBL Nord/West,6
84,66,Floorball Deutschland,2. FBL Süd/Ost,6


In [3]:
schedule_urls = [schedule_url.format(league_id=league_id) for league_id in leagues.id]

# filter out URLS that fail to load, usually with an Internal Server Error
schedule_urls = [url for url in schedule_urls if httpx.head(url).is_success]

In [7]:
matches = conn.sql(
    f"""
CREATE OR REPLACE TABLE matches AS
    SELECT
        game_id,
        game_number,
        date,
        hosting_club,
        arena AS arena_id,
        arena_name,
        arena_short,
        home_team_name,
        guest_team_name,
        result.home_goals AS home_goals,
        result.guest_goals AS guest_goals,
        result.forfait::BOOL as forfait,
        result.overtime::BOOL as overtime,
    FROM read_json_auto([{",".join(f"'{url}'" for url in schedule_urls)}])
    WHERE NOT (home_goals IS NULL OR guest_goals IS NULL);
COPY matches TO 'matches.parquet';
SELECT * FROM matches;
"""
).df()

In [9]:
conn.sql("SELECT * FROM 'matches.parquet'").df()

Unnamed: 0,game_id,game_number,date,hosting_club,arena_id,arena_name,arena_short,home_team_name,guest_team_name,home_goals,guest_goals,forfait,overtime
0,32400,1,2023-09-09,Berlin Rockets,645,Werner-Ruhemann-Sporthalle,"Berlin, Werner-Ruhemann-Sporthalle",Berlin Rockets,VfL Red Hocks Kaufering,7,2,False,False
1,32401,2,2023-09-09,Floorball-Club München,434,Sporthalle Berufsschule ALS 3,"München, Sporthalle Berufsschule ALS 3",Floorball-Club München,MFBC Leipzig,3,6,False,False
2,32402,3,2023-09-09,Floor Fighters Chemnitz,503,Schlossteichhalle,"Chemnitz, Schlossteichhalle",Floor Fighters Chemnitz,SSF Dragons Bonn,9,7,False,False
3,32403,4,2023-09-09,TV Schriesheim,151,Mehrzweckhalle,"Schriesheim, Mehrzweckhalle",TV Schriesheim,Red Devils Wernigerode,9,7,False,False
4,32404,5,2023-09-09,UHC Weißenfels,38,Stadthalle,"Weißenfels, Stadthalle",UHC Sparkasse Weißenfels,ETV Piranhhas Hamburg,4,3,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2623,9334,502,2016-01-16,Red Devils Wernigerode,475,Stadtfeldhalle,"Wernigerode, Stadtfeldhalle",Red Devils Wernigerode,BAT Berlin,7,2,False,False
2624,996,503,2016-03-20,TV Eiche Horn Bremen,100,Sportzentrum Eiche Horn,"Bremen, Sportzentrum Eiche Horn",TV Eiche Horn Bremen,Red Hocks Kaufering,3,6,False,False
2625,943,601,2016-05-14,UHC Döbeln 06,305,Stadtsporthalle,"Döbeln, Stadtsporthalle",UHC Weißenfels,UHC Döbeln 06,12,3,False,False
2626,7328,602,2016-05-14,UHC Döbeln 06,305,Stadtsporthalle,"Döbeln, Stadtsporthalle",Red Hocks Kaufering,Red Devils Wernigerode,5,3,False,False
