In [10]:
from api import openligadb
import datetime

def extract_result(match_results):
    for match_result in match_results:
        if match_result['resultName'] == 'Endergebnis':
            return "finished", match_result['pointsTeam1'], match_result['pointsTeam2']
    return "future", None, None



def extract_winner(match_data):
    try:
        if match_data["goalsHome"] > match_data["goalsAway"]:
            return match_data["teamHomeId"]
        elif match_data["goalsHome"] < match_data["goalsAway"]:
            return match_data["teamAwayId"]
        else:
            return None
    except TypeError:
        return None


def extract_match_data(json):
    match_data = {}
    match_data["id"] = json["matchID"]
    match_data["date"] = datetime.datetime.strptime(json["matchDateTime"],"%Y-%m-%dT%H:%M:%S")
    match_data["teamHomeId"] = json["team1"]["teamId"]
    match_data["teamHomeName"] = json["team1"]["teamName"]
    match_data["teamAwayId"] = json["team2"]["teamId"]
    match_data["teamAwayName"] = json["team2"]["teamName"]
    match_data["status"], match_data["goalsHome"], match_data["goalsAway"] = extract_result(json["matchResults"])
    match_data["winnerTeamId"] = extract_winner(match_data)
    match_data["matchDay"] = json["group"]["groupOrderID"]
    match_data["season"] = json["leagueSeason"]
    return match_data

In [11]:
from tqdm import tqdm
import pickle
import pandas as pd

try:
    match_df = pickle.load(open("match_df.pck", "rb"))
except:
    match_data = []
    for year in tqdm(range(2005, 2024)):
        json = openligadb.get_all_season_matches("bl1", year)
        for match_data_json in json:
            match_data.append(extract_match_data(match_data_json))
    match_df = pd.DataFrame(match_data)
    pickle.dump(match_df, open("match_df.pck", "wb"))

In [12]:
import numpy as np
from dateutil.relativedelta import relativedelta
import datetime

testing = False

if testing:
    testing_date = datetime.datetime.today() + relativedelta(days=-14)
    manip_df = match_df.copy(deep=True)
    manip_df = manip_df.loc[(match_df["status"] == "finished") & (match_df["date"] > testing_date)]
    manip_df["status"] = ["future" for _ in range(len(manip_df.index))]
    manip_df[["goalsHome", "goalsAway", "winnerTeamId"]] = np.NaN
    match_df[(match_df["status"] == "finished") & (match_df["date"] > testing_date)] = manip_df

In [13]:
updatable_df = match_df[(match_df["status"] == "future") & (match_df["date"] < datetime.datetime.today())]
for _, row in tqdm(updatable_df.iterrows()):
    row_match_data = openligadb.get_match_data(row["id"])
    match_df.loc[row.name] = row_match_data
if len(updatable_df) > 0:
    pickle.dump(match_df, open("match_df.pck", "wb"))

0it [00:00, ?it/s]


In [14]:
match_df = match_df[match_df["status"] == "finished"].fillna(0)

In [15]:
team_match_df_dict = dict()

In [16]:
def get_team_match_df(teamId):
    team_match_df = match_df[(match_df["teamHomeId"] == teamId) | (match_df["teamAwayId"] == teamId)].copy(deep=True)
    goalsTeam = []
    goalsOpponent = []
    teamPoints = []
    for _, row in team_match_df.iterrows():
        if row["teamHomeId"] == teamId:
            goalsTeam.append(row["goalsHome"])
            goalsOpponent.append(row["goalsAway"])
        else:
            goalsOpponent.append(row["goalsHome"])
            goalsTeam.append(row["goalsAway"])
        if row["winnerTeamId"] == teamId:
            teamPoints.append(3)
        elif row["winnerTeamId"] == 0:
            teamPoints.append(1)
        else:
            teamPoints.append(0)
    team_match_df["goalsTeam"] = goalsTeam
    team_match_df["goalsOpponent"] = goalsOpponent
    team_match_df["teamPoints"] = teamPoints
    return team_match_df

In [17]:
for teamId in tqdm(set(match_df["teamHomeId"])):
    team_match_df_dict[teamId] = get_team_match_df(teamId)

100%|██████████| 37/37 [00:00<00:00, 183.68it/s]


In [19]:
from api.transfermarkt import get_teams_market_values_threaded2
from collections import defaultdict

try:
    market_value_dict =  pickle.load(open("market_values_dict.pck", "rb"))
except:
    team_on_season_df = match_df.groupby(["teamHomeName", "season"]).size().reset_index(name='Freq')
    market_value_list = get_teams_market_values_threaded2(team_on_season_df)
    market_value_dict = defaultdict(lambda: defaultdict(float))
    for market_value in market_value_list:
        market_value_dict[market_value[0]][market_value[1]] = market_value[2]
    market_value_dict = dict(market_value_dict)
    pickle.dump(market_value_dict, open("market_values_dict.pck", "wb"))

[('1. FC Heidenheim 1846', 2023), ('1. FC Kaiserslautern', 2005), ('1. FC Kaiserslautern', 2010), ('1. FC Kaiserslautern', 2011), ('1. FC Köln', 2005)]
