In [1]:
# GER-Bundesliga / 78 | 2015, 2016, 2018, 2019, 2020, 2021
# GER-Bundesliga2 / 79 | 2018, 2019, 2020, 2021

In [2]:
from elo import update_elo

In [3]:
import itertools
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import json
import http.client
from time import sleep

In [4]:
api_key = json.load(open("../../.secrets.json"))["soccerapi"]

In [5]:
def call_api(league, season, page):
    
    conn = http.client.HTTPSConnection("v3.football.api-sports.io")

    headers = {
        'x-rapidapi-host': "v3.football.api-sports.io",
        'x-rapidapi-key': api_key
        }

    conn.request("GET", f"/players?league={league}&season={season}&page={page}", headers=headers)

    res = conn.getresponse()
    data = res.read()

    return json.loads(data.decode("utf-8"))

def get_player_data(league, season, page = 1, players_data = []):
    players = call_api(league, season, page)
    players_data.append(players)

    if players["paging"]["current"] < players["paging"]["total"]:
        page = players["paging"]["current"] + 1
        if (page % 9) == 0:
            sleep(60)

        players_data = get_player_data(league, season, page, players_data)
    return players_data


In [24]:
year = 2017
league = "GER-Bundesliga"
league_id = 78
player_data = get_player_data(league_id, year)

In [25]:
player_names = []
birthdates = []
club1 = []
club2 = []
club3 = []
for resp in player_data:
    for pl in resp["response"]:
        player_names.append(f'{pl["player"]["firstname"]} {pl["player"]["lastname"]}')
        birthdates.append(pl["player"]["birth"]["date"])
        club1.append(pl["statistics"][0]["team"]["name"])
        if len(pl["statistics"]) > 1:
            club2.append(pl["statistics"][1]["team"]["name"])
        else:
            club2.append(None)
        if len(pl["statistics"]) > 2:
            club3.append(pl["statistics"][2]["team"]["name"])
        else:
            club3.append(None)

In [26]:
player_df = pd.DataFrame({"Name": player_names, "Birthday": birthdates, "Club": club1, "Club2": club2, "Club3": club3})
player_df["Year"] = year
player_df["League"] = league

In [27]:
name_substitutes = json.load(
    open("/home/morten/soccerdata/config/teamname_replacements.json")
)
for replace in name_substitutes:
    for name in name_substitutes[replace]:
        player_df.replace(name, replace, inplace=True)

In [28]:
player_df["Club"].unique()

array(['Schalke', 'Borussia M.Gladbach', 'Mainz', 'Borussia Dortmund',
       'Leverkusen', 'Stuttgart', 'Darmstadt', 'Bayern', 'FC Koln',
       'Hertha Berlin', 'Hannover', 'Ingolstadt', 'Augsburg', 'Hamburg',
       'Hoffenheim', 'Wolfsburg', 'Eintracht Frankfurt', 'Werder Bremen',
       'Freiburg', 'RBL'], dtype=object)

In [29]:
player_birthday_db = pd.read_csv(f"player_birthday_db.csv", sep=";")
player_birthday_db = pd.concat([player_df, player_birthday_db], ignore_index=True)
player_birthday_db.to_csv("player_birthday_db.csv", index=False, sep=";")

In [30]:
player_birthday_db = pd.read_csv(f"player_birthday_db.csv", sep=";")
print(player_birthday_db.shape)
player_birthday_db = player_birthday_db.drop_duplicates()
print(player_birthday_db.shape)
player_birthday_db.to_csv("player_birthday_db.csv", index=False, sep=";")

(10526, 7)
(7837, 7)


In [37]:
### 
# teamname replacement
player_birthday_db = pd.read_csv(f"player_birthday_db.csv", sep=";")
player_birthday_db[player_birthday_db["League"] == "GER-Bundesliga2"]["Club"].unique()

array(['Nuernberg', 'Karlsruher SC', 'FC Heidenheim', 'Arminia Bielefeld',
       'Erzgebirge Aue', 'Dynamo Dresden', 'Greuther Fuerth',
       'Wehen Wiesbaden', 'Stuttgart', 'Osnabrueck', 'St. Pauli',
       'Hannover', 'Jahn Regensburg', 'Bochum', 'Sandhausen', 'Hamburg',
       'Darmstadt', 'Holstein Kiel', 'Eintracht Braunschweig',
       'Wuerzburger Kickers', 'Fortuna Duesseldorf', 'Paderborn',
       'Kaiserslautern', 'FC Koln', 'Union Berlin', 'Ingolstadt',
       'Magdeburg', 'MSV Duisburg'], dtype=object)

In [36]:
name_substitutes = json.load(
    open("/home/morten/soccerdata/config/teamname_replacements.json")
)
for replace in name_substitutes:
    for name in name_substitutes[replace]:
        player_birthday_db.replace(name, replace, inplace=True)
player_birthday_db.to_csv("player_birthday_db.csv", index=False, sep=";")