In [1]:
%load_ext autoreload
%autoreload 2
%reload_ext autoreload

from IPython.display import Javascript, display, clear_output, HTML

display(HTML("<style>.container { width:100% !important;}</style>"))

import pandas as pd

pd.set_option("display.max_columns", 500)
pd.set_option("display.max_rows", 500)

In [2]:
%run data_loading.ipynb

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [3]:
from typing import Literal

In [4]:
import numpy as np

In [5]:
fixtures_df = get_fixtures_df()
teams_df = get_teams_df()

In [6]:
cd ~/Documents/GitHub/predict_the_prem/

/Users/thomasrichardson/Documents/GitHub/predict_the_prem


In [7]:
from predict_the_prem.munge import ( 
    get_entries_position_df,
    get_entry_score_vs_game_week,
    get_league_table_df,
    get_team_fixtures_df,
    get_team_position_vs_game_week,
    merge_fixtures_and_teams,
)

In [8]:
fixtures_df = merge_fixtures_and_teams(fixtures_df=fixtures_df, teams_df=teams_df)

In [9]:
team_fixtures_df = get_team_fixtures_df(fixtures_df=fixtures_df)

In [10]:
league_table_df = get_league_table_df(team_fixtures_df=team_fixtures_df)

In [11]:
team_position_vs_game_week = get_team_position_vs_game_week(team_fixtures_df=team_fixtures_df)

In [12]:
entries = get_entries()

In [13]:
entries_position_df = get_entries_position_df(teams_df=teams_df, entries=entries)

In [14]:
get_entry_score_vs_game_week_df = get_entry_score_vs_game_week(team_fixtures_df, entries_position_df)

In [15]:
fixtures_df.head().T

Unnamed: 0,0,1,2,3,4
global_match_id,2444470,2444473,2444471,2444472,2444474
match_id,1,4,2,3,5
game_week,1,1,1,1,1
is_finished,True,True,True,True,True
is_finished_provisional,True,True,True,True,True
match_start_time,2024-08-16 19:00:00+00:00,2024-08-17 11:30:00+00:00,2024-08-17 14:00:00+00:00,2024-08-17 14:00:00+00:00,2024-08-17 14:00:00+00:00
is_provisional_start_time,False,False,False,False,False
is_started,True,True,True,True,True
home_team_id,14,10,1,8,15
away_team_id,9,12,20,5,17


In [104]:
def _expand_stats(stats):
    stats = eval(stats)
    stats = pd.DataFrame.from_records(stats)
    row_data = {}
    for idx, row in stats.iterrows():
        identifier = row['identifier']
        if identifier in ["goals_scored", "own_goals"]:
            row_data[f'home_team_{identifier}_stats'] = row['h']
            row_data[f'away_team_{identifier}_stats'] = row['a']
    return row_data

In [105]:
test = pd.DataFrame.from_records(fixtures_df["stats"].apply(_expand_stats))

In [106]:
test.head()

Unnamed: 0,home_team_goals_scored_stats,away_team_goals_scored_stats,home_team_own_goals_stats,away_team_own_goals_stats
0,"[{'value': 1, 'element': 389}]",[],[],[]
1,[],"[{'value': 1, 'element': 317}, {'value': 1, 'e...",[],[]
2,"[{'value': 1, 'element': 4}, {'value': 1, 'ele...",[],[],[]
3,[],"[{'value': 1, 'element': 113}, {'value': 1, 'e...",[],[]
4,"[{'value': 1, 'element': 403}]",[],[],[]


In [18]:
test[["home_team_goals_scored_stats", "away_team_goals_scored_stats"]].head().T

Unnamed: 0,0,1,2,3,4
home_team_goals_scored_stats,"[{'value': 1, 'element': 389}]",[],"[{'value': 1, 'element': 4}, {'value': 1, 'ele...",[],"[{'value': 1, 'element': 403}]"
away_team_goals_scored_stats,[],"[{'value': 1, 'element': 317}, {'value': 1, 'e...",[],"[{'value': 1, 'element': 113}, {'value': 1, 'e...",[]


In [172]:
from typing import List, Dict, Any

def _map_player_in_stat(stat: List[Dict[str, Any]], player_mapping_dict: Dict[int, str]):
    stat = [x if not x.get("element") else {**x, **{"player_name": player_mapping_dict.get(x.get("element"))}} for x in stat]
    return stat

In [173]:
test[["home_team_goals_scored_stats", "away_team_goals_scored_stats", "home_team_own_goals_stats"]] = test[["home_team_goals_scored_stats", "away_team_goals_scored_stats", "home_team_own_goals_stats"]].map(_map_player_in_stat, player_mapping_dict=player_mapping_dict)

In [174]:
test.head().to_dict()

{'home_team_goals_scored_stats': {0: [{'value': 1,
    'element': 389,
    'player_name': 'Elliott'}],
  1: [],
  2: [{'value': 1, 'element': 4, 'player_name': 'Setford'},
   {'value': 1, 'element': 17, 'player_name': 'Ødegaard'}],
  3: [],
  4: [{'value': 1, 'element': 403, 'player_name': 'Gvardiol'}]},
 'away_team_goals_scored_stats': {0: [],
  1: [{'value': 1, 'element': 317, 'player_name': 'Andersen'},
   {'value': 1, 'element': 328, 'player_name': 'Sessegnon'}],
  2: [],
  3: [{'value': 1, 'element': 113, 'player_name': 'Van den Berg'},
   {'value': 1, 'element': 136, 'player_name': 'Thiago'},
   {'value': 1, 'element': 148, 'player_name': 'F.Kadıoğlu'}],
  4: []},
 'home_team_own_goals_stats': {0: [], 1: [], 2: [], 3: [], 4: []},
 'away_team_own_goals_stats': {0: [], 1: [], 2: [], 3: [], 4: []}}

In [121]:
def format_goal_row(goals, own_goals):
    parts = []
    for scorer in goals:
        name = scorer['player_name']
        balls = '⚽' * scorer['value']
        parts.append(f"{name} {balls}")
    for og in own_goals:
        name = og['player_name']
        balls = '<span style="color:red;">' + '⚽' * og['value'] + '</span>'
        parts.append(f"{name} {balls}")
    return '<br>'.join(parts) if parts else '—'

# Generate HTML table
html_rows = ['<table border="1" style="border-collapse:collapse;">',
             '<tr><th>Match</th><th>Home Team Scorers</th></tr>']
for i, row in test.head(10).iterrows():
    goals = row['home_team_goals_scored_stats']
    own_goals = row['home_team_own_goals_stats']
    html_rows.append(f"<tr><td>{i}</td><td>{format_goal_row(goals, own_goals)}</td></tr>")
html_rows.append('</table>')

# Combine into a single HTML string
html_table = '\n'.join(html_rows)

# Display HTML (in notebook environments)
from IPython.core.display import display, HTML
display(HTML(html_table))

  from IPython.core.display import display, HTML


Match,Home Team Scorers
0,Elliott ⚽
1,—
2,Setford ⚽ Ødegaard ⚽
3,—
4,Gvardiol ⚽
5,Heaven ⚽
6,Awoniyi ⚽
7,Adu-Adjei ⚽ Kayode ⚽ Eyestone ⚽
8,—
9,Bates ⚽


In [162]:
import pandas as pd
from IPython.display import display, HTML

# Your high-quality SVG path wrapped in a function
def svg_football(color='black'):
    return f'''
    <svg width="20" height="20" viewBox="0 0 60 60" 
         style="display:inline-block; vertical-align:bottom;  margin:0.1px;">
        <path fill="{color}" fill-opacity="1" stroke-width="0.2" stroke-linejoin="round"
        d="M 43.5,21.0285L 48.6842,25.394L 50.3689,24.9228C 48.4338,23.092 46.0926,21.6859 43.4946,20.854L 43.5,21.0285 Z 
        M 53.6014,39.3315L 52.5714,45.5285L 53.8558,46.5275C 55.221,43.9944 55.9969,41.0968 56,38.0182L 53.6014,39.3315 Z 
        M 43.8214,48.7596L 38.3871,44.894L 32.1728,47.2255L 30.6427,53.6725L 32.5593,55.1631C 34.2758,55.7068 36.1036,56 38,56C 
        39.7036,56 41.3519,55.7633 42.9138,55.3211L 43.8214,48.7596 Z 
        M 29.5413,53.4566L 31.0714,47.0096L 25.3871,42.644L 21.0897,44.182C 22.5587,48.1994 25.4186,51.5469 29.0863,53.6415L 29.5413,53.4566 Z 
        M 25.5413,41.5626L 26.3214,34.1156L 21.5554,30.6696C 20.5557,32.9087 20,35.3894 20,38C 20,39.8384 20.2756,41.6124 20.7877,43.2829L 25.5413,41.5626 Z 
        M 27.0056,33.644L 33.22,31.0625L 34.5,24.3656L 30.7276,21.5296C 26.8969,23.2235 23.768,26.2146 21.898,29.946L 27.0056,33.644 Z 
        M 51.7199,45.2066L 52.7499,39.0096L 46.8157,34.394L 40.3514,36.7255L 39.3214,44.1725L 44.7556,48.038L 51.7199,45.2066 Z 
        M 46.4699,33.4566L 47.4999,26.0096L 42.3157,21.6441L 35.3514,24.4755L 34.0714,31.1725L 40.0056,35.788L 46.4699,33.4566 Z 
        M 38,18C 49.0457,18 58,26.9543 58,38C 58,49.0457 49.0457,58 38,58C 26.9543,58 18,49.0457 18,38C 18,26.9543 26.9543,18 38,18 Z" />
    </svg>
    '''

# Example dataset
data = {
    'home_team_goals_scored_stats': {
        0: [{'value': 1, 'element': 389, 'player_name': 'Elliott'}],
        1: [],
        2: [{'value': 4, 'element': 17, 'player_name': 'Ødegaard'}],
        3: [],
        4: [{'value': 1, 'element': 403, 'player_name': 'Gvardiol'}]
    },
    'home_team_own_goals_stats': {
        0: [{'value': 1, 'element': 999, 'player_name': 'OwnGoalMan'}],
        1: [], 2: [], 3: [], 4: []
    },
}

df = pd.DataFrame(data)

# Format cell content with SVGs
def format_goals(goals, color='black'):
    if not goals:
        return '—'
    return '<br>'.join(
        f"{p['player_name']} {''.join(svg_football(color) for _ in range(p['value']))}"
        for p in goals
    )

# Create HTML table
html = ['<table border="1" style="border-collapse:collapse; font-family:sans-serif;">',
        '<tr><th>Match</th><th>Home Goals</th><th>Home Own Goals</th></tr>']

for i, row in test.head(10).iterrows():
    scored = format_goals(row['home_team_goals_scored_stats'], 'black')
    own = format_goals(row['home_team_own_goals_stats'], 'red')
    html.append(f"<tr><td>{i}</td><td>{scored}</td><td>{own}</td></tr>")

html.append('</table>')
display(HTML(''.join(html)))


Match,Home Goals,Home Own Goals
0,Elliott,—
1,—,—
2,Setford Ødegaard,—
3,—,—
4,Gvardiol,—
5,Heaven,—
6,Awoniyi,—
7,Adu-Adjei Kayode,Eyestone
8,—,—
9,Bates,—


In [169]:
player_mapping_dict = players_df[["player_id", "player_name"]].set_index("player_id").to_dict()["player_name"]

In [62]:
def get_players_df() -> pd.DataFrame:
    renaming_dict = {
        "code": "global_player_id",
        "id": "player_id",
        "web_name": "player_name",
    }
    bootstrap_url = os.path.join(base_url, "bootstrap-static/")
    bootstrap_response = requests.get(bootstrap_url)
    players_df = pd.DataFrame(bootstrap_response.json()["elements"])
    return players_df.rename(columns=renaming_dict)[list(renaming_dict.values())]

In [63]:
players_df = get_players_df()

In [71]:
players_df[["player_id", "player_name"]].set_index("player_id").to_dict()["player_name"]

{1: 'Raya',
 2: 'Arrizabalaga',
 3: 'Hein',
 4: 'Setford',
 5: 'Gabriel',
 6: 'Saliba',
 7: 'Calafiori',
 8: 'J.Timber',
 9: 'Kiwior',
 10: 'Lewis-Skelly',
 11: 'White',
 12: 'Zinchenko',
 13: 'Clarke',
 14: 'Kacurri',
 15: 'Nichols',
 16: 'Saka',
 17: 'Ødegaard',
 18: 'Madueke',
 19: 'Martinelli',
 20: 'Trossard',
 21: 'Rice',
 22: 'Merino',
 23: 'Fábio Vieira',
 24: 'Nørgaard',
 25: 'Nwaneri',
 26: 'Zubimendi',
 27: 'Nelson',
 28: 'Kabia',
 29: 'Sambi',
 30: 'Havertz',
 31: 'G.Jesus',
 662: 'Mosquera',
 666: 'Gyökeres',
 32: 'Martinez',
 33: 'M.Bizot',
 34: 'Gauci',
 35: 'Marschall',
 36: 'Cash',
 37: 'Digne',
 38: 'Konsa',
 39: 'Maatsen',
 40: 'Mings',
 41: 'Pau',
 42: 'A.García',
 43: 'Alex Moreno',
 44: 'Bogarde',
 45: 'Sousa',
 46: 'Yasin',
 47: 'Rogers',
 48: 'Tielemans',
 49: 'Bailey',
 50: 'Buendía',
 51: 'Iling Jr',
 52: 'J.Ramsey',
 53: 'Malen',
 54: 'McGinn',
 55: 'Barkley',
 56: 'Barrenechea',
 57: 'Dobbin',
 58: 'Kamara',
 59: 'Onana',
 60: 'Broggio',
 61: 'Dendoncker',
 

In [32]:
bootstrap_url = os.path.join(base_url, "bootstrap-static/")
bootstrap_response = requests.get(bootstrap_url)

In [35]:
players_df = pd.DataFrame(bootstrap_response.json()["elements"])

In [36]:
players_df.sort_values("selected_rank_type")[["web_name", "selected_rank_type"]].head(20)

Unnamed: 0,web_name,selected_rank_type
620,Wan-Bissaka,1
139,Kelleher,1
386,M.Salah,1
248,João Pedro,1
506,Isak,2
234,Palmer,2
375,Frimpong,2
0,Raya,2
509,Sels,3
32,Gyökeres,3
