In [1]:
import httpx
import duckdb

API_URL = "https://saisonmanager.de/api/v2"

leagues_url = API_URL + "/leagues.json"
league_url = API_URL + "/leagues/{league_id}.json"
standings_url = API_URL + "/leagues/{league_id}/table.json"
scorers_url = API_URL + "/leagues/{league_id}/scorer.json"
schedule_url = API_URL + "/leagues/{league_id}/schedule.json"
game_url = API_URL + "/games/{game_id}.json"

conn = duckdb.connect()


In [2]:
leagues = conn.sql(
    f"""
CREATE OR REPLACE TABLE leagues AS
    SELECT id, game_operation, name, season::INT AS season
    FROM '{leagues_url}'
    WHERE
        game_operation = 'Floorball Deutschland'
        AND NOT contains(lower(name), 'junior')
        AND NOT contains(lower(name), 'kleinfeld')
        AND NOT contains(lower(name), 'kf')
        AND NOT contains(lower(name), 'damen')
    ORDER BY season::INT DESC;
SELECT * FROM leagues;
"""
).df()
leagues

Unnamed: 0,id,game_operation,name,season
0,1442,Floorball Deutschland,1. FBL Herren,15
1,1513,Floorball Deutschland,1. FBL Herren - Playoffs,15
2,1514,Floorball Deutschland,1. FBL Herren - Playdowns,15
3,1444,Floorball Deutschland,2. FBL Herren Nord/West,15
4,1445,Floorball Deutschland,2. FBL Herren Süd/West,15
...,...,...,...,...
81,11,Floorball Deutschland,Regionalligameisterschaft Nord/West,6
82,197,Floorball Deutschland,1. FBL Herren,6
83,378,Floorball Deutschland,2. FBL Nord/West,6
84,66,Floorball Deutschland,2. FBL Süd/Ost,6


In [3]:
schedule_urls = [schedule_url.format(league_id=league_id) for league_id in leagues.id]

# filter out URLS that fail to load, usually with an Internal Server Error
schedule_urls = [url for url in schedule_urls if httpx.head(url).is_success]

In [7]:
matches = conn.sql(
    f"""
CREATE OR REPLACE TABLE matches AS
    SELECT
        game_id,
        game_number,
        date,
        hosting_club,
        arena AS arena_id,
        arena_name,
        arena_short,
        home_team_name,
        guest_team_name,
        result.home_goals AS home_goals,
        result.guest_goals AS guest_goals,
        result.forfait::BOOL as forfait,
        result.overtime::BOOL as overtime,
    FROM read_json_auto([{",".join(f"'{url}'" for url in schedule_urls)}])
    WHERE NOT (home_goals IS NULL OR guest_goals IS NULL);
COPY matches TO 'matches.parquet';
SELECT * FROM matches;
"""
).df()

In [6]:
conn.sql("SELECT * FROM 'matches.parquet'")

┌─────────┬─────────────┬────────────┬──────────────────────┬───┬────────────┬─────────────┬─────────┬──────────┐
│ game_id │ game_number │    date    │     hosting_club     │ … │ home_goals │ guest_goals │ forfait │ overtime │
│  int64  │    int64    │    date    │       varchar        │   │   int64    │    int64    │ boolean │ boolean  │
├─────────┼─────────────┼────────────┼──────────────────────┼───┼────────────┼─────────────┼─────────┼──────────┤
│   32400 │           1 │ 2023-09-09 │ Berlin Rockets       │ … │          7 │           2 │ false   │ false    │
│   32401 │           2 │ 2023-09-09 │ Floorball-Club Mün…  │ … │          3 │           6 │ false   │ false    │
│   32402 │           3 │ 2023-09-09 │ Floor Fighters Che…  │ … │          9 │           7 │ false   │ false    │
│   32403 │           4 │ 2023-09-09 │ TV Schriesheim       │ … │          9 │           7 │ false   │ false    │
│   32404 │           5 │ 2023-09-09 │ UHC Weißenfels       │ … │          4 │          