In [42]:
import pandas as pd
import requests

In [43]:
bl1_matches = []

for year in range(2010, 2024):
    response = requests.get(f"https://api.openligadb.de/getmatchdata/bl1/{year}")
    years_matches = response.json()
    bl1_matches += years_matches
print(f"Scraped {len(bl1_matches)} bl1 matches")

bl1_tables = {}
for year in range(2010, 2024):
    response = requests.get(f"https://api.openligadb.de/getbltable/bl1/{year}")
    bl1_tables[str(year)] = response.json()

Scraped 4284 bl1 matches


In [44]:
print(bl1_tables['2018'][0].keys())
columns = ['points', 'opponentGoals', 'goals', 'matches', 'won', 'lost', 'draw', 'goalDiff']
print(columns)

dict_keys(['teamInfoId', 'teamName', 'shortName', 'teamIconUrl', 'points', 'opponentGoals', 'goals', 'matches', 'won', 'lost', 'draw', 'goalDiff'])
['points', 'opponentGoals', 'goals', 'matches', 'won', 'lost', 'draw', 'goalDiff']


In [49]:
team_rows = []
for year, teams in bl1_tables.items():
    for team in teams:
        for col in columns:
            team_rows.append([col, year, team['teamInfoId'], team[col]])

df_teams_all = pd.DataFrame(team_rows, columns=['feature', 'year', 'team', 'value'])
print(f"Scraped {len(df_teams_all.team.unique())} teams")

df_teams_all.head()

Scraped 32 teams


Unnamed: 0,feature,year,team,value
0,points,2010,7,75
1,opponentGoals,2010,7,22
2,goals,2010,7,67
3,matches,2010,7,34
4,won,2010,7,23


In [50]:
df_teams = df_teams_all.groupby(["team", "year", "feature"]).sum()["value"].reset_index()
df_teams = pd.pivot_table(df_teams, values="value", index=["team", "year"], columns="feature")
df_teams.to_csv("teams.csv")
df_teams

Unnamed: 0_level_0,feature,draw,goalDiff,goals,lost,matches,opponentGoals,points,won
team,year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
6,2010,8.0,20.0,64.0,6.0,34.0,44.0,68.0,20.0
6,2011,9.0,8.0,52.0,10.0,34.0,44.0,54.0,15.0
6,2012,8.0,26.0,65.0,7.0,34.0,39.0,65.0,19.0
6,2013,4.0,19.0,60.0,11.0,34.0,41.0,61.0,19.0
6,2014,10.0,25.0,62.0,7.0,34.0,37.0,61.0,17.0
...,...,...,...,...,...,...,...,...,...
1635,2019,12.0,44.0,81.0,4.0,34.0,37.0,66.0,18.0
1635,2020,8.0,28.0,60.0,7.0,34.0,32.0,65.0,19.0
1635,2021,7.0,35.0,72.0,10.0,34.0,37.0,58.0,17.0
1635,2022,6.0,23.0,64.0,8.0,34.0,41.0,66.0,20.0


In [47]:
bl1_matches[0]

{'matchID': 9998,
 'matchDateTime': '2010-08-20T20:30:00',
 'timeZoneID': 'W. Europe Standard Time',
 'leagueId': 159,
 'leagueName': '1. Fussball-Bundesliga 2010/2011',
 'leagueSeason': 2010,
 'leagueShortcut': 'bl1',
 'matchDateTimeUTC': '2010-08-20T18:30:00Z',
 'group': {'groupName': '1. Spieltag', 'groupOrderID': 1, 'groupID': 12175},
 'team1': {'teamId': 40,
  'teamName': 'FC Bayern München',
  'shortName': 'Bayern',
  'teamIconUrl': 'https://i.imgur.com/jJEsJrj.png',
  'teamGroupName': None},
 'team2': {'teamId': 131,
  'teamName': 'VfL Wolfsburg',
  'shortName': 'Wolfsburg',
  'teamIconUrl': 'https://i.imgur.com/ucqKV4B.png',
  'teamGroupName': None},
 'lastUpdateDateTime': '2010-08-21T01:01:01.457',
 'matchIsFinished': True,
 'matchResults': [{'resultID': 14227,
   'resultName': 'Endergebnis',
   'pointsTeam1': 2,
   'pointsTeam2': 1,
   'resultOrderID': 1,
   'resultTypeID': 2,
   'resultDescription': 'Ergebnis nach Ende der offiziellen Spielzeit'},
  {'resultID': 14228,
   'r

In [52]:
def get_end_result(row):
    results = row['matchResults']
    for result in results:
        if result["resultName"] == "Endergebnis":
            return result
    return results[-1]

def match_to_record(row):
    result = get_end_result(row)
    return {
        "id": row['matchID'],
        "date": row['matchDateTimeUTC'],
        "matchDay": row['group']['groupName'],
        "matchDayID": row['group']['groupOrderID'],
        "leagueID": row['leagueId'],
        "leagueName": row['leagueName'],
        "leagueSeason": row['leagueSeason'],
        "team1ID":row['team1']['teamId'],
        "team1Name": row['team1']['teamName'],
        "team1Short":row['team1']['shortName'],
        "team2ID": row['team2']['teamId'],
        "team2Name":row['team2']['teamName'],
        "team2Short": row['team2']['shortName'],
        "team1Goals":result['pointsTeam1'],
        "team2Goals": result['pointsTeam2'],
        "result": result['resultName'],
    }

records = [match_to_record(match) for match in bl1_matches]
df = pd.DataFrame.from_records(records)
df = df.set_index("id")
df.to_csv("matches.csv")
df.tail()

Unnamed: 0_level_0,date,matchDay,matchDayID,leagueID,leagueName,leagueSeason,team1ID,team1Name,team1Short,team2ID,team2Name,team2Short,team1Goals,team2Goals,result
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
66932,2024-05-18T13:30:00Z,34. Spieltag,34,4608,1. Fußball-Bundesliga 2023/2024,2023,131,VfL Wolfsburg,Wolfsburg,81,1. FSV Mainz 05,Mainz,1,3,Endergebnis
66933,2024-05-18T13:30:00Z,34. Spieltag,34,4608,1. Fußball-Bundesliga 2023/2024,2023,175,TSG 1899 Hoffenheim,Hoffenheim,40,FC Bayern München,Bayern,4,2,Endergebnis
66934,2024-05-18T13:30:00Z,34. Spieltag,34,4608,1. Fußball-Bundesliga 2023/2024,2023,134,Werder Bremen,Bremen,129,VfL Bochum,Bochum,4,1,Endergebnis
66935,2024-05-18T13:30:00Z,34. Spieltag,34,4608,1. Fußball-Bundesliga 2023/2024,2023,16,VfB Stuttgart,Stuttgart,87,Borussia Mönchengladbach,Gladbach,4,0,Endergebnis
66936,2024-05-18T13:30:00Z,34. Spieltag,34,4608,1. Fußball-Bundesliga 2023/2024,2023,199,1. FC Heidenheim 1846,Heidenheim,65,1. FC Köln,Köln,4,1,Endergebnis
