In [None]:
from player import Player, Game

In [None]:
import socceraction.spadl as spadl
import socceraction.xthreat as xthreat
import soccerdata as sd
from pathlib import PosixPath
import pandas as pd
from tqdm import tqdm
from datetime import datetime
import numpy as np
import matplotlib.pyplot as plt

import socceraction.spadl.config as spadlconfig
from socceraction.spadl.schema import SPADLSchema
from pandera.typing import DataFrame, Series
import numpy.typing as npt
from sklearn.exceptions import NotFittedError
from typing import Callable, List, Optional, Tuple
import os
import copy

In [None]:
xTModell = xthreat.load_model("/home/morten/Develop/packing-report/xT-impact/models/xT_full_data")

In [None]:
ws = sd.WhoScored(
    leagues="GER-Bundesliga2",
    seasons=[17, 18, 19, 20, 21],
    no_cache=False,
    no_store=False,
    data_dir=PosixPath("/home/morten/Develop/Open-Data/soccerdata"),
    path_to_browser="/usr/bin/chromium",
    headless=True,
)
corona_games = [1376730, 1376707, 1376732, 1376726, 1376733, 1376725, 1376729, 1376716, 1376735, 1376734, 1376721, 1376722, 1376723, 1376728, 1376724, 1376731, 1376727, 1376720, 1376718, 1376717, 1376719]

loader = ws.read_events(output_fmt='loader', skip=corona_games)
loader_season_ids = ["1718", "1819", "1920", "2021", "2122"]
all_games = []
for s_id in loader_season_ids:
    all_games.append(loader.games(competition_id="GER-Bundesliga2", season_id=s_id))

df_games = pd.concat(all_games)
ce = sd.ClubElo(
    no_cache=False,
    no_store=False,
    data_dir=PosixPath("/home/morten/Develop/Open-Data/soccerdata"),
)

ws = None

In [None]:
print(df_games.shape)
display(df_games.head())

In [None]:
def get_top_league_elo(df_elo):
    all_leagues = ["GER-Bundesliga", "GER-Bundesliga2", "ENG-Premier League", "ESP-La Liga", "FRA-Ligue 1", "ITA-Serie A"]
    top_league = 0
    for league in all_leagues:
        league_elo = np.mean(df_elo[df_elo['league'] == league]['elo'])
        if league_elo > top_league:
            top_league = league_elo
    return top_league

In [None]:
keeper_actions = ["keeper_save", "keeper_claim", "keeper_punch"] #, "keeper_pick_up"
defensive_actions = ["tackle", "interception", "clearance"] # "keeper_save"

for idx, game_id in enumerate(tqdm(df_games['game_id'])):
    # receive dataframes
    df_teams = loader.teams(game_id=game_id)
    df_players = loader.players(game_id=game_id)
    df_events = loader.events(game_id=game_id)
    df_events.dropna(subset=["player_id"], inplace=True)
    # defensive actions
    df_actions = spadl.opta.convert_to_actions(df_events, df_games[df_games['game_id'] == game_id].home_team_id.values[0]).merge(spadl.actiontypes_df()).merge(df_players[['player_name', "player_id"]])
    df_actions_ltr = spadl.play_left_to_right(df_actions, df_games[df_games['game_id'] == game_id].home_team_id.values[0])
    df_actions_rtl = spadl.play_left_to_right(df_actions, df_games[df_games['game_id'] == game_id].away_team_id.values[0])
    df_all_defense_pressing = df_actions_ltr[df_actions_ltr['type_name'].isin(defensive_actions) ]
    df_all_defense_pressing = df_all_defense_pressing[df_all_defense_pressing['result_id'] == 1]
    df_all_defense_normal = df_actions_rtl[df_actions_rtl['type_name'].isin(defensive_actions) ]
    df_all_defense_normal = df_all_defense_normal[df_all_defense_normal['result_id'] == 1]
    xt_defense_ratings_pressing = xTModell.rate_defensive(df_all_defense_pressing.reset_index())
    xt_defense_ratings_normal = xTModell.rate_defensive(df_all_defense_normal.reset_index())
    # attacking actions
    df_attacking_actions = xthreat.get_successful_move_actions(df_actions_ltr)
    xt_attacking_ratings = xTModell.rate(df_attacking_actions)
    # goalie actions
    df_keeper = df_actions_rtl[df_actions_rtl['type_name'].isin(keeper_actions) ]
    df_keeper = df_keeper[df_keeper['result_id'] == 1]
    xt_keeper = xTModell.rate_defensive(df_keeper.reset_index())
    # xG actions
    df_shot_actions = df_actions_ltr[df_actions_ltr['type_id'] == 11]
    xG_ratings = xTModell.rate_xG(df_shot_actions)
    # GI actions 
    df_goals = df_shot_actions[df_shot_actions['result_id'] == 1]
    df_own_goals = df_actions_ltr[df_actions_ltr['result_id'] == 3]
    df_all_goals = pd.concat([df_goals, df_own_goals], ignore_index=True)
    # create people dict
    people_dict = {}
    for people_id in df_players['player_id']:
        people_dict[people_id] = {}
        people_dict[people_id]['xT'] = 0
        people_dict[people_id]['xD'] = 0
        people_dict[people_id]['xK'] = 0
        people_dict[people_id]['xG'] = 0
        people_dict[people_id]['gI'] = 0
        people_dict[people_id]['is_starter'] = df_players[df_players['player_id'] == people_id]['is_starter'].values[0]
        people_dict[people_id]['played'] = False if df_players[df_players['player_id'] == people_id]['minutes_played'].values[0] == 0 else True
        people_dict[people_id]['minutes'] = df_players[df_players['player_id'] == people_id]['minutes_played'].values[0]
        people_dict[people_id]['name'] = df_players[df_players['player_id'] == people_id]['player_name'].values[0]
        people_dict[people_id]['team_id'] = df_players[df_players['player_id'] == people_id]['team_id'].values[0]
        people_dict[people_id]['opp_team_name'] = df_teams[df_teams['team_id'] != df_players[df_players['player_id'] == people_id]['team_id'].values[0]]['team_name'].values[0]
        people_dict[people_id]['game_date'] = np.datetime_as_string(df_games[df_games['game_id'] == game_id]['game_date'].values[0])[:10]
    # create team dict  
    team_dict = {}
    for team_id in df_teams['team_id']:
        team_dict[team_id] = {}
        team_dict[team_id]['accumulated_xT'] = 0
        team_dict[team_id]['accumulated_xD'] = 0
        team_dict[team_id]['accumulated_xG'] = 0
        team_dict[team_id]['accumulated_xK'] = 0
        team_dict[team_id]['accumulated_xT_sub'] = 0
        team_dict[team_id]['accumulated_xD_sub'] = 0
        team_dict[team_id]['accumulated_xG_sub'] = 0
        team_dict[team_id]['accumulated_xK_sub'] = 0
    # value attacking actions
    # TODO no negative values?!
    for idx, player_id in enumerate(df_attacking_actions['player_id']):
        people_dict[player_id]['xT'] += xt_attacking_ratings[idx] if xt_attacking_ratings[idx] > 0 else 0
    # value defensive actions
    for idx, player_id in enumerate(df_all_defense_pressing['player_id']):
        people_dict[player_id]['xD'] += xt_defense_ratings_pressing[idx]
    for idx, player_id in enumerate(df_all_defense_normal['player_id']):
        people_dict[player_id]['xD'] += xt_defense_ratings_normal[idx]
    # value goalie
    for idx, player_id in enumerate(df_keeper['player_id']):
        people_dict[player_id]['xK'] += xt_keeper[idx]
    # value xG 
    for idx, player_id in enumerate(df_shot_actions['player_id']):
        people_dict[player_id]['xG'] += xG_ratings[idx]
    # value gI
    for player_id in df_players['player_id']:
        for goal_id in df_all_goals['original_event_id']:
            if people_dict[player_id]['is_starter']:
                # player is starter and on the field
                if people_dict[player_id]['minutes'] >= (((df_all_goals[df_all_goals['original_event_id'] == goal_id]['period_id'].values[0] - 1) * 45) + (df_all_goals[df_all_goals['original_event_id'] == goal_id]['time_seconds'].values[0] / 60)):
                    # is goal
                    if df_all_goals[df_all_goals['original_event_id'] == goal_id]['type_id'].values[0] == 11:
                        people_dict[player_id]['gI'] += 1 if df_all_goals[df_all_goals['original_event_id'] == goal_id]['team_id'].values[0] == people_dict[player_id]['team_id'] else -1
                    # is own goal
                    else:
                        people_dict[player_id]['gI'] += -1 if df_all_goals[df_all_goals['original_event_id'] == goal_id]['team_id'].values[0] == people_dict[player_id]['team_id'] else 1
            else:
                if (((df_all_goals[df_all_goals['original_event_id'] == goal_id]['period_id'].values[0] - 1) * 45) + (df_all_goals[df_all_goals['original_event_id'] == goal_id]['time_seconds'].values[0] / 60)) >= (max(df_players['minutes_played']) - people_dict[player_id]['minutes']):
                    if df_all_goals[df_all_goals['original_event_id'] == goal_id]['type_id'].values[0] == 11:
                        people_dict[player_id]['gI'] += 1 if df_all_goals[df_all_goals['original_event_id'] == goal_id]['team_id'].values[0] == people_dict[player_id]['team_id'] else -1
                    else:
                        people_dict[player_id]['gI'] += -1 if df_all_goals[df_all_goals['original_event_id'] == goal_id]['team_id'].values[0] == people_dict[player_id]['team_id'] else 1

    # team values 
    for player_id in people_dict.keys():
        if people_dict[player_id]["is_starter"]:
            team_dict[people_dict[player_id]["team_id"]]['accumulated_xT'] += people_dict[player_id]['xT']
            team_dict[people_dict[player_id]["team_id"]]['accumulated_xD'] += people_dict[player_id]['xD']
            team_dict[people_dict[player_id]["team_id"]]['accumulated_xK'] += people_dict[player_id]['xK']
            team_dict[people_dict[player_id]["team_id"]]['accumulated_xG'] += people_dict[player_id]['xG']        
        else:
            team_dict[people_dict[player_id]["team_id"]]['accumulated_xT_sub'] += people_dict[player_id]['xT']
            team_dict[people_dict[player_id]["team_id"]]['accumulated_xD_sub'] += people_dict[player_id]['xD']
            team_dict[people_dict[player_id]["team_id"]]['accumulated_xK_sub'] += people_dict[player_id]['xK']
            team_dict[people_dict[player_id]["team_id"]]['accumulated_xG_sub'] += people_dict[player_id]['xG']

    # create Proto files
    for player_id in people_dict.keys():
        ce_df = ce.read_by_date(people_dict[player_id]['game_date'])
        if not people_dict[player_id]['played']:
            # only handle player if he played
            continue
        # search dir for existing proto file:
        if str(player_id) + ".pb" not in os.listdir("../data/"):
            # create new proto obj
            proto_player = Player()
            proto_player.player_id = player_id
            proto_player.player_name = people_dict[player_id]['name']
            # add new player to csv
            pd.DataFrame({"player_name": [people_dict[player_id]['name']],"id": [player_id]}).to_csv("../data/player_db.csv", mode='a', header=False, index=False, sep=';')
        else:
            proto_player = Player().parse(open(f"../data/{str(player_id)}.pb", "rb").read())
        team_elo = ce_df.loc[people_dict[player_id]['opp_team_name']].elo
        league_mean = np.mean(ce_df[ce_df['league'] == df_games[df_games['game_id'] == game_id]['competition_id'].values[0]].elo)
        top_league = get_top_league_elo(ce_df)
        # create game
        player_game = Game()
        player_game.game_id = game_id
        player_game.game_date = people_dict[player_id]['game_date']
        player_game.minutes_played = people_dict[player_id]['minutes']
        player_game.starter = people_dict[player_id]['is_starter']
        player_game.team = people_dict[player_id]['team_id']
        player_game.league_elo = league_mean / top_league
        player_game.opposition_elo = (team_elo / league_mean)
        # xT starter
        if people_dict[player_id]['is_starter']:
            player_game.x_t = (people_dict[player_id]['xT'])
            player_game.x_t_pm = player_game.x_t / player_game.minutes_played
            player_game.x_d = (people_dict[player_id]['xD'])
            player_game.x_d_pm = player_game.x_d / player_game.minutes_played
            player_game.x_k = (people_dict[player_id]['xK'])
            player_game.x_k_pm = player_game.x_k / player_game.minutes_played
            player_game.x_g = (people_dict[player_id]['xG'])
            player_game.x_g_pm = player_game.x_g / player_game.minutes_played
            player_game.g_i = people_dict[player_id]['gI']
            player_game.g_i_pm = player_game.g_i / player_game.minutes_played
            proto_player.starter.x_i.append(player_game)
        else:
            player_game.x_t = (people_dict[player_id]['xT'])
            player_game.x_t_pm = player_game.x_t / player_game.minutes_played
            player_game.x_d = (people_dict[player_id]['xD'])
            player_game.x_d_pm = player_game.x_d / player_game.minutes_played
            player_game.x_k = (people_dict[player_id]['xK'])
            player_game.x_k_pm = player_game.x_k / player_game.minutes_played
            player_game.x_g = (people_dict[player_id]['xG'])
            player_game.x_g_pm = player_game.x_g / player_game.minutes_played
            player_game.g_i = people_dict[player_id]['gI']
            player_game.g_i_pm = player_game.g_i / player_game.minutes_played
            proto_player.sub.x_i.append(player_game)

        with open(f"../data/{str(player_id)}.pb", "wb") as f:
            f.write(bytes(proto_player))


In [None]:
# data
# eng_1 17, 18, 19, 20, 21
# esp_1 17, 18, 19, 20, 21
# fra_1 17, 18, 19, 20, 21
# ita_1 17, 18, 19, 20, 21
# ger_1 17, 18, 19, 20, 21
# ger_2 17, 18, 19, 20, 21