In [None]:
import sys
sys.path.append('../')
from proto_files.player import Player

import socceraction.spadl as spadl
import socceraction.xthreat as xthreat
import soccerdata as sd
from pathlib import PosixPath
import pandas as pd
from tqdm import tqdm
from datetime import datetime
import numpy as np
import matplotlib.pyplot as plt

import socceraction.spadl.config as spadlconfig
from socceraction.spadl.schema import SPADLSchema
from pandera.typing import DataFrame, Series
import numpy.typing as npt
from sklearn.exceptions import NotFittedError
from typing import Callable, List, Optional, Tuple
import os
from datetime import date, timedelta
from ipywidgets import interact
import math
from oddCalculation import OddToPercentage
import json

In [None]:
ws = sd.WhoScored(
    leagues="FRA-Ligue 1",
    seasons=[21],
    no_cache=False,
    no_store=False,
    data_dir=PosixPath("/home/morten/Develop/Open-Data/soccerdata"),
    path_to_browser="/usr/bin/chromium",
    headless=True,
)
corona_games = [1376730, 1376707, 1376732, 1376726, 1376733, 1376725, 1376729, 1376716, 1376735, 1376734, 1376721, 1376722, 1376723, 1376728, 1376724, 1376731, 1376727, 1376720, 1376718, 1376717, 1376719]
loader = ws.read_events(output_fmt='loader', skip=corona_games)
loader_season_ids = ["2122"]
all_games = []
for s_id in loader_season_ids:
    all_games.append(loader.games(competition_id="FRA-Ligue 1", season_id=s_id))

df_games = pd.concat(all_games)
# ["GER-Bundesliga", "GER-Bundesliga2", "ENG-Premier League", "ESP-La Liga", "FRA-Ligue 1", "ITA-Serie A"]
betting_data = pd.read_csv("betting-data/F1.csv", sep=',')
name_substitutes = json.load(open("/home/morten/soccerdata/config/teamname_replacements.json"))
for replace in name_substitutes:
    for name in name_substitutes[replace]:
        betting_data.replace(name, replace, inplace=True)

In [None]:
betting_data.HomeTeam.unique()

In [None]:
def get_odds_by_names(home, away):
    betting_game_entry = betting_data[betting_data["HomeTeam"] == home]
    betting_game_entry = betting_game_entry[betting_game_entry["AwayTeam"] == away]
    # get Bet365 Values
    home_odds = betting_game_entry["B365H"].values[0]
    draw_odds = betting_game_entry["B365D"].values[0]
    away_odds = betting_game_entry["B365A"].values[0]
    o2p = OddToPercentage()
    home_prob, home_perc = o2p.odd_to_percentage(home_odds)
    draw_prob, draw_perc = o2p.odd_to_percentage(draw_odds)
    away_prob, away_perc = o2p.odd_to_percentage(away_odds)
    # make them fair
    bookie_sum = home_prob + draw_prob + away_prob
    home_perc = home_perc / bookie_sum
    draw_perc = draw_perc / bookie_sum
    away_perc = away_perc / bookie_sum
    return [home_perc, draw_perc, away_perc]

In [None]:
for idx, game_id in enumerate(tqdm(df_games['game_id'])):
    df_teams = loader.teams(game_id=game_id)
    df_players = loader.players(game_id=game_id)
    df_game = df_games[df_games['game_id'] == game_id]
    home_id = df_game.home_team_id.values[0]
    away_id = df_game.away_team_id.values[0]
    game_date = df_game.game_date.values[0].astype(str)[:10]
    home_score = df_game.home_score.values[0]
    away_score = df_game.away_score.values[0]
    home_team_name = df_teams[df_teams["team_id"] == home_id]['team_name'].values[0]
    away_team_name = df_teams[df_teams["team_id"] == away_id]['team_name'].values[0]
    if(home_team_name == "Torino" and away_team_name == "Fiorentina"):
        continue
    bookie_h, bookie_d, bookie_a = get_odds_by_names(home_team_name, away_team_name)
    home_elo = None
    away_elo = None
    home = {}
    away = {}
    home['starter'] = {}
    home['subs'] = {}
    away['starter'] = {}
    away['subs'] = {}
    home['starter']['xT'] = 0
    home['starter']['xG'] = 0
    home['starter']['xD'] = 0
    home['starter']['xK'] = 0
    home['subs']['xT'] = 0
    home['subs']['xG'] = 0
    home['subs']['xD'] = 0
    home['subs']['xK'] = 0
    away['starter']['xT'] = 0
    away['starter']['xG'] = 0
    away['starter']['xD'] = 0
    away['starter']['xK'] = 0
    away['subs']['xT'] = 0
    away['subs']['xG'] = 0
    away['subs']['xD'] = 0
    away['subs']['xK'] = 0
    for player_id in df_players["player_id"]:
        player_row = df_players[df_players['player_id'] == player_id]
        if player_row.minutes_played.values[0] == 0:
            continue
        proto_player = Player().parse(open(f"./../data/{str(player_id)}.pb", "rb").read())
        if player_row.is_starter.values[0]:
            if player_row.team_id.values[0] == home_id:
                df = pd.DataFrame(proto_player.starter.x_i)
                df.sort_values('game_date', inplace=True)
                df = df[df['game_date'] == game_date]
                away_elo = df["opposition_elo"].values[0]
                val_xt = (df["x_t"].values[0])
                val_xg = (df["x_g"].values[0])
                val_xd = (df["x_d"].values[0])
                val_xk = (df["x_k"].values[0])
                home['starter']['xT'] += val_xt if not math.isnan(val_xt) else 0
                home['starter']['xG'] += val_xg if not math.isnan(val_xg) else 0
                home['starter']['xD'] += val_xd if not math.isnan(val_xd) else 0
                home['starter']['xK'] += val_xk if not math.isnan(val_xk) else 0
            else:
                df = pd.DataFrame(proto_player.starter.x_i)
                df.sort_values('game_date', inplace=True)
                df = df[df['game_date'] == game_date]
                home_elo = df["opposition_elo"].values[0]
                val_xt = (df["x_t"].values[0])
                val_xg = (df["x_g"].values[0])
                val_xd = (df["x_d"].values[0])
                val_xk = (df["x_k"].values[0])
                away['starter']['xT'] += val_xt if not math.isnan(val_xt) else 0
                away['starter']['xG'] += val_xg if not math.isnan(val_xg) else 0
                away['starter']['xD'] += val_xd if not math.isnan(val_xd) else 0
                away['starter']['xK'] += val_xk if not math.isnan(val_xk) else 0
        else:
            if player_row.team_id.values[0] == home_id:
                df = pd.DataFrame(proto_player.sub.x_i)
                df.sort_values('game_date', inplace=True)
                df = df[df['game_date'] == game_date]
                val_xt = (df["x_t"].values[0])
                val_xg = (df["x_g"].values[0])
                val_xd = (df["x_d"].values[0])
                val_xk = (df["x_k"].values[0])
                home['subs']['xT'] += val_xt if not math.isnan(val_xt) else 0
                home['subs']['xG'] += val_xg if not math.isnan(val_xg) else 0
                home['subs']['xD'] += val_xd if not math.isnan(val_xd) else 0
                home['subs']['xK'] += val_xk if not math.isnan(val_xk) else 0
            else:
                df = pd.DataFrame(proto_player.sub.x_i)
                df.sort_values('game_date', inplace=True)
                df = df[df['game_date'] == game_date]
                val_xt = (df["x_t"].values[0])
                val_xg = (df["x_g"].values[0])
                val_xd = (df["x_d"].values[0])
                val_xk = (df["x_k"].values[0])
                away['subs']['xT'] += val_xt if not math.isnan(val_xt) else 0
                away['subs']['xG'] += val_xg if not math.isnan(val_xg) else 0
                away['subs']['xD'] += val_xd if not math.isnan(val_xd) else 0
                away['subs']['xK'] += val_xk if not math.isnan(val_xk) else 0
    pd.DataFrame({"home_xG": [home["starter"]["xG"]], 
                  "home_xT": [home["starter"]["xT"]],
                  "home_xD": [home["starter"]["xD"]],
                  "home_xK": [home["starter"]["xK"]],
                  "home_sub_xG": [home["subs"]["xG"]],
                  "home_sub_xT": [home["subs"]["xT"]],
                  "home_sub_xD": [home["subs"]["xD"]],
                  "home_sub_xK": [home["subs"]["xK"]],
                  "away_xG": [away["starter"]["xG"]],
                  "away_xT": [away["starter"]["xT"]],
                  "away_xD": [away["starter"]["xD"]],
                  "away_xK": [away["starter"]["xK"]],
                  "away_sub_xG": [away["subs"]["xG"]],
                  "away_sub_xT": [away["subs"]["xT"]],
                  "away_sub_xD": [away["subs"]["xD"]],
                  "away_sub_xK": [away["subs"]["xK"]],
                  "home_elo": [home_elo],
                  "away_elo": [away_elo],
                  "home_score": [home_score],
                  "away_score": [away_score],
                  "bookie_home": [bookie_h],
                  "bookie_draw": [bookie_d],
                  "bookie_away": [bookie_a]
                 }).to_csv("data_game_values_test.csv", mode='a', header=False, index=False, sep=';')
    

In [None]:
# test 
# esp_1
# ger_1
# ger_2
# eng_1
# ita_1
# fra_1