In [1]:
import requests
import pandas as pd
import numpy as np
from pandas.io.json import json_normalize

In [2]:
def get_data(league, season):
    #retrieve data from the Swiss TXT API
    base_football_url = "https://test.sport.api.swisstxt.ch/v1/contests/football/"
    #define league
    liga = league + "/"
    #define season
    season = str(season)
    #define language
    lang = "?lang=DE"
   
    #get season
    season_info_raw = requests.get(base_football_url + liga+lang).json()
    data_season = season_info_raw['phases']
    
    # Normalize the JSON data
    normalized_data = json_normalize(data_season)
    
    # Convert the normalized data to a Pandas DataFrame
    df_season_raw = pd.DataFrame(normalized_data)
    
    #get the season id (for now singular)
    df_season_raw['season_id'] = df_season_raw['cesimReference.contestId'].astype(str) + "-" + df_season_raw['cesimReference.masterEventId'].astype(str)
    season_ids = list(set(df_season_raw['season_id']))
    season_id = season_ids[0]
    
    #get all the games for this season
    base_url_eventitems = "https://test.sport.api.swisstxt.ch/v1/eventItems?phaseIds="
    season_id = season_ids[0]
    start_date = "2010-01-01" #we don't care as long as it includes our chosen season above
    end_date = "2030-01-01"
    games_info_raw = requests.get(base_url_eventitems + season_id +"&fromDate="+start_date +"&toDate="+end_date+lang).json()
    finished_rounds = list(df_season_raw[df_season_raw['state']=="Finished"]['displayName'])
    
    #extract the nested info about the phase, as the normalization below doesn't get this one
    container = []
    
    for i in range(0,len(games_info_raw)):
        try:
            game = {'id': games_info_raw[i]['id'],
                    'round_id': games_info_raw[i]['contestInfo']['phases'][1]['id']}
            container.append(game)
        except:
            game = {'id': games_info_raw[i]['id'],
                    'round_id': games_info_raw[i]['contestInfo']['phases'][0]['id']}
            container.append(game)
    
    phase_info = pd.DataFrame(container)
    
    # Normalize the JSON data
    normalized_data = json_normalize(games_info_raw)
    
    # Convert the normalized data to a Pandas DataFrame
    df_games_raw = pd.DataFrame(normalized_data)
    
    #drop all the columns in Italian and French
    cols_to_drop = [col for col in df_games_raw.columns if col.endswith('.it') or col.endswith('.fr')]
    df_games_raw.drop(cols_to_drop, axis=1, inplace=True)
    pd.set_option('display.max_columns', None)
    
    df_games_raw = pd.merge(df_games_raw, phase_info, on='id')
    
    df_games_raw.head()

    #get a list of all the teams of this season
    all_teams = list(set(df_games_raw["competitor1.name.de"]))
    
    # Create a list of dictionaries for each finished game with the round ID, competitor names, and scores
    games = []
    for i in range(len(df_games_raw)):
        if df_games_raw.loc[i, 'state'] == 'Finished':
            game = {}
            game['round_id'] = df_games_raw.loc[i, 'round_id']
            game['team1'] = df_games_raw.loc[i, 'competitor1.name.de']
            game['team2'] = df_games_raw.loc[i, 'competitor2.name.de']
            game['team1_score'] = df_games_raw.loc[i, 'competitor1.results.main']
            game['team2_score'] = df_games_raw.loc[i, 'competitor2.results.main']
            games.append(game)
    
    # Create a list of dictionaries for each team with the team name, round ID, points, goals scored, and goals scored against
    teams = []
    for team in all_teams:
        for i, round_id in enumerate(df_games_raw['round_id'].unique(), start=1):
            team_data = {'team': team, 'round_id': round_id, 'points': 0, 'goals_scored': 0, 'goals_conceded': 0, 'round_num': i}
            teams.append(team_data)
    
    # Calculate the points and other stats for each team in each round based on their scores in the games
    for game in games:
        team1_goals = game['team1_score']
        team2_goals = game['team2_score']
        team1_points = 1
        team2_points = 1
        if team1_goals > team2_goals:
            team1_points = 3
            team2_points = 0
        elif team1_goals < team2_goals:
            team1_points = 0
            team2_points = 3
        for team in teams:
            if team['team'] == game['team1'] and team['round_id'] == game['round_id']:
                team['points'] += team1_points
                team['goals_scored'] += team1_goals
                team['goals_conceded'] += team2_goals
            elif team['team'] == game['team2'] and team['round_id'] == game['round_id']:
                team['points'] += team2_points
                team['goals_scored'] += team2_goals
                team['goals_conceded'] += team1_goals
    
    # Create a new DataFrame from the list of team dictionaries
    df_stats = pd.DataFrame(teams)
    
    # Add up points for each team one round after another
    df_stats.sort_values(['team', 'round_num'], inplace=True)
    df_stats['total_points'] = df_stats.groupby('team')['points'].cumsum()
    
    # Calculate average points scored for last 5 rounds
    df_stats['avg_points_last_5'] = df_stats.groupby('team')['points'].rolling(5, min_periods=1).mean().reset_index(0, drop=True)
    
    # Calculate average points scored for last 10 rounds
    df_stats['avg_points_last_10'] = df_stats.groupby('team')['points'].rolling(10, min_periods=1).mean().reset_index(0, drop=True)
    
    # Calculate average goals scored for last 5 rounds
    df_stats['avg_goals_scored_last_5'] = df_stats.groupby('team')['goals_scored'].rolling(5, min_periods=1).mean().reset_index(0, drop=True)
    
    # Calculate average goals scored for last 10 rounds
    df_stats['avg_goals_scored_last_10'] = df_stats.groupby('team')['goals_scored'].rolling(10, min_periods=1).mean().reset_index(0, drop=True)
    
    # Calculate average goals conceded for last 5 rounds
    df_stats['avg_goals_conceded_last_5'] = df_stats.groupby('team')['goals_conceded'].rolling(5, min_periods=1).mean().reset_index(0, drop=True)
    
    # Calculate average goals conceded for last 10 rounds
    df_stats['avg_goals_conceded_last_10'] = df_stats.groupby('team')['goals_conceded'].rolling(10, min_periods=1).mean().reset_index(0, drop=True)
    
    # Sort by round_num
    df_stats.sort_values(['team','round_num'], inplace=True)
    df_stats = df_stats.reset_index()

    df_season_small = df_season_raw[["id", "displayName"]]
    df_season_small.columns = ["round_id", "displayName"]
    df_stats = pd.merge(df_stats, df_season_small)
    
    # Number of rounds left
    df_stats['num_rounds_left'] = max(df_stats['round_num'])-df_stats['round_num']
    
    #get the projected averages
    proj_points_avg_last_5 = df_stats['total_points'] + (df_stats['num_rounds_left'] * df_stats['avg_points_last_5'])
    df_stats['proj_points_avg_last_5'] = proj_points_avg_last_5
    proj_points_avg_last_10 = df_stats['total_points'] + (df_stats['num_rounds_left'] * df_stats['avg_points_last_10'])
    df_stats['proj_points_avg_last_10'] = proj_points_avg_last_10
    
    # Calculate goal difference for each game
    df_stats['goal_diff_game'] = df_stats['goals_scored'] - df_stats['goals_conceded']
    
    # Group by round and sum goal differences
    df_stats['goal_diff'] = df_stats.groupby('team')['goal_diff_game'].cumsum()
    
    #Rank now for each round
    df_stats['rank'] = df_stats.sort_values(by=['total_points', 'goal_diff'],
                   ascending=[False, False]).groupby('round_num').cumcount() + 1
    df_stats['rank_proj5'] = df_stats.sort_values(by=['proj_points_avg_last_5', 'goal_diff'],
                   ascending=[False, False]).groupby('round_num').cumcount() + 1
    df_stats['rank_proj10'] = df_stats.sort_values(by=['proj_points_avg_last_10', 'goal_diff'],
                   ascending=[False, False]).groupby('round_num').cumcount() + 1
    
    df_stats[df_stats['displayName'].isin(finished_rounds)].to_csv('plots/'+league+'.csv', index=False)
    print(league + " is done! Züri allez!")

In [3]:
leagues = ["super-league","bundesliga"]

In [4]:
for league in leagues:
    get_data(league,2022)

JSONDecodeError: Expecting value: line 1 column 1 (char 0)