In [15]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import asyncio
import json
import aiohttp
from understat import Understat
import nest_asyncio

In [16]:
def players():    
    nest_asyncio.apply()
    async def all_understat_players():
        async with aiohttp.ClientSession() as session:
            understat = Understat(session)
            players = await understat.get_league_players(
                "epl",
                2024,
            )
            return json.dumps(players)

    players = asyncio.run(all_understat_players())
    players = json.loads(players)
    return players

In [17]:
e = players()

In [18]:
e

[{'id': '1250',
  'player_name': 'Mohamed Salah',
  'games': '20',
  'time': '1768',
  'goals': '14',
  'xG': '13.610325902700424',
  'assists': '8',
  'xA': '9.57847910746932',
  'shots': '65',
  'key_passes': '47',
  'yellow_cards': '2',
  'red_cards': '0',
  'position': 'F',
  'team_title': 'Liverpool',
  'npg': '10',
  'npxG': '9.043387070298195',
  'xGChain': '19.68034301698208',
  'xGBuildup': '5.720102474093437'},
 {'id': '8260',
  'player_name': 'Erling Haaland',
  'games': '15',
  'time': '1305',
  'goals': '14',
  'xG': '16.727499932050705',
  'assists': '4',
  'xA': '3.2472310978919268',
  'shots': '58',
  'key_passes': '16',
  'yellow_cards': '1',
  'red_cards': '0',
  'position': 'F',
  'team_title': 'Manchester City',
  'npg': '11',
  'npxG': '13.68282476067543',
  'xGChain': '16.214465029537678',
  'xGBuildup': '1.3705047406256199'},
 {'id': '453',
  'player_name': 'Son Heung-Min',
  'games': '20',
  'time': '1739',
  'goals': '12',
  'xG': '7.47495724260807',
  'assists

In [20]:
cols = list(e[0].keys())
vals = []

for x in e:
    vals.append(list(x.values()))

players_df = pd.DataFrame(vals, columns = cols)

In [21]:
players_df

Unnamed: 0,id,player_name,games,time,goals,xG,assists,xA,shots,key_passes,yellow_cards,red_cards,position,team_title,npg,npxG,xGChain,xGBuildup
0,1250,Mohamed Salah,20,1768,14,13.610325902700424,8,9.57847910746932,65,47,2,0,F,Liverpool,10,9.043387070298195,19.68034301698208,5.720102474093437
1,8260,Erling Haaland,15,1305,14,16.727499932050705,4,3.2472310978919268,58,16,1,0,F,Manchester City,11,13.68282476067543,16.214465029537678,1.3705047406256199
2,453,Son Heung-Min,20,1739,12,7.47495724260807,5,6.843130592256784,52,37,1,0,F M,Tottenham,11,6.713788405060768,16.887082293629646,6.178518561646342
3,1679,Dominic Solanke,19,1708,12,12.378114826977253,1,0.9976950995624065,66,14,1,0,F,Bournemouth,11,11.61694597452879,12.262300044298172,1.8974752612411976
4,1776,Jarrod Bowen,19,1710,11,8.29572493582964,2,3.144163405522704,47,14,1,0,F M,West Ham,11,8.29572493582964,9.910744827240705,1.9265567287802696
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
500,12204,Benicio Baker-Boaitey,2,2,0,0,0,0,0,0,0,0,S,Brighton,0,0,0.1531512588262558,0.1531512588262558
501,12215,Michael Ndiweni,1,1,0,0,0,0,0,0,0,0,S,Newcastle United,0,0,0,0
502,12251,Sydie Peck,1,6,0,0,0,0,0,0,0,0,S,Sheffield United,0,0,0,0
503,12275,Willy Kambwala,2,88,0,0,0,0,0,0,0,0,D S,Manchester United,0,0,0.07591637223958969,0.07591637223958969


In [43]:
async def league_table():
    async with aiohttp.ClientSession() as session:
        understat = Understat(session)
        table = await understat.get_league_table("EPL", "2024")
        league_table_df = pd.DataFrame(table[1:], columns = table[0])
        return league_table_df

league_table_df = asyncio.run(league_table())
league_table_df["Position"] = np.arange(1,21)


In [44]:
league_table_df

Unnamed: 0,Team,M,W,D,L,G,GA,PTS,xG,NPxG,xGA,NPxGA,NPxGD,PPDA,OPPDA,DC,ODC,xPTS,Position
0,Liverpool,20,13,6,1,43,18,45,46.31,41.46,24.88,24.12,17.34,8.25,16.57,242,117,38.79,1
1,Aston Villa,20,13,3,4,43,27,42,38.53,35.48,27.82,27.82,7.66,12.59,10.36,201,128,35.42,2
2,Manchester City,19,12,4,3,45,21,40,42.79,38.98,18.5,16.98,22.0,11.69,21.05,220,82,40.81,3
3,Arsenal,20,12,4,4,37,20,40,40.56,35.99,19.71,18.19,17.8,9.1,16.76,227,91,39.24,4
4,Tottenham,20,12,3,5,42,29,39,37.68,36.92,36.66,32.86,4.06,8.3,10.93,270,145,29.43,5
5,West Ham,20,10,4,6,33,30,34,29.41,27.89,36.49,33.44,-5.56,17.47,9.88,90,218,23.88,6
6,Brighton,20,8,7,5,38,33,31,36.65,33.61,31.77,27.97,5.64,9.18,17.07,219,179,30.5,7
7,Manchester United,20,10,1,9,22,27,31,29.3,27.0,35.56,34.04,-7.04,11.72,13.68,178,164,23.78,8
8,Newcastle United,20,9,2,9,39,29,29,43.62,40.58,33.92,31.51,9.06,9.97,11.72,175,129,34.59,9
9,Chelsea,20,8,4,8,34,31,28,41.96,36.63,30.57,28.27,8.36,8.91,14.25,207,126,34.39,10


In [45]:
total_shots = []
players_id = list(players_df["id"])


async def get_player_shots(player_id):
    async with aiohttp.ClientSession() as session:
        understat = Understat(session)
        shots = await understat.get_player_shots(
            player_id
        )
        return json.dumps(shots)
for x in players_id:
    shots = asyncio.run(get_player_shots(x))
    shots = json.loads(shots)
    total_shots.append(shots)

In [46]:
def flatten_extend(matrix):
    flat_list = []
    for row in matrix:
        flat_list.extend(row)
    return flat_list

total_shots = flatten_extend(total_shots)

In [47]:
cols2 = list(total_shots[0].keys())
vals2 = []

for x in total_shots:
    vals2.append(list(x.values()))

total_player_shots_df = pd.DataFrame(vals2, columns = cols2)

In [48]:
total_player_shots_df['date']= pd.to_datetime(total_player_shots_df['date'])
total_player_shots_df = total_player_shots_df[total_player_shots_df["date"] > '2023-07-01']

In [49]:
total_player_shots_df

Unnamed: 0,id,minute,result,X,Y,xG,player,h_a,player_id,situation,season,shotType,match_id,h_team,a_team,h_goals,a_goals,date,player_assisted,lastAction
985,532588,11,ShotOnPost,0.8190000152587891,0.5279999923706055,0.06408040970563889,Mohamed Salah,a,1250,OpenPlay,2023,RightFoot,22283,Chelsea,Liverpool,1,1,2023-08-13 15:30:00,Cody Gakpo,Pass
986,532592,25,BlockedShot,0.850999984741211,0.445,0.10087071359157562,Mohamed Salah,a,1250,OpenPlay,2023,LeftFoot,22283,Chelsea,Liverpool,1,1,2023-08-13 15:30:00,Luis Díaz,Pass
987,532594,40,MissedShots,0.8590000152587891,0.315,0.04292548820376396,Mohamed Salah,a,1250,OpenPlay,2023,LeftFoot,22283,Chelsea,Liverpool,1,1,2023-08-13 15:30:00,,
988,533205,33,MissedShots,0.8190000152587891,0.5720000076293945,0.05514945462346077,Mohamed Salah,h,1250,OpenPlay,2023,LeftFoot,22288,Liverpool,Bournemouth,3,1,2023-08-19 14:00:00,Diogo Jota,Pass
989,533207,35,Goal,0.9569999694824218,0.5259999847412109,0.6668094396591187,Mohamed Salah,h,1250,SetPiece,2023,LeftFoot,22288,Liverpool,Bournemouth,3,1,2023-08-19 14:00:00,,Rebound
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
50328,554063,33,MissedShots,0.8619999694824219,0.3,0.04014270380139351,Murillo,h,12123,OpenPlay,2023,LeftFoot,22063,Nottingham Forest,Tottenham,0,2,2023-12-15 20:00:00,,
50329,557107,26,MissedShots,0.919000015258789,0.4490000152587891,0.07850664854049683,Murillo,h,12123,FromCorner,2023,Head,22091,Nottingham Forest,Manchester United,2,1,2023-12-30 17:30:00,Morgan Gibbs-White,Cross
50330,544109,76,MissedShots,0.6859999847412109,0.5079999923706054,0.0074213468469679356,Matheus França,a,12152,OpenPlay,2023,RightFoot,21981,Newcastle United,Crystal Palace,4,0,2023-10-21 14:00:00,Cheick Oumar Doucoure,
50331,545174,101,MissedShots,0.9330000305175781,0.6270000076293946,0.0706448033452034,Matheus França,h,12152,OpenPlay,2023,LeftFoot,21990,Crystal Palace,Tottenham,1,2,2023-10-27 19:00:00,Jean-Philippe Mateta,HeadPass


In [71]:
async def league_results():
    async with aiohttp.ClientSession() as session:
        understat = Understat(session)
        fixtures = await understat.get_league_results(
            "epl",
            2024,
            {
            }
        )
        return(json.dumps(fixtures))

matches = asyncio.run(league_results())
matches = json.loads(matches)

In [72]:
cols3 = list(matches[0].keys())
vals3 = []

for x in matches:
    vals3.append(list(x.values()))

matches_df = pd.DataFrame(vals3, columns = cols3)

In [73]:
def match_parser(row):
    row["home_id"] = row["h"]["id"]
    row["away_id"] = row["a"]["id"]
    row["home_team"] = row["h"]["title"]
    row["away_team"] = row["a"]["title"]
    row["home_goals"] = row["goals"]["h"]
    row["away_goals"] = row["goals"]["a"]
    row["home_xg"] = row["xG"]["h"]
    row["away_xg"] = row["xG"]["a"]
    if row["home_goals"] == row["away_goals"]:
        row["is_draw"] = True
    else:
        row["is_draw"] = False
    if row["home_goals"] > row["away_goals"]:
        row["home_win"] = True
    else:
        row["home_win"] = False
    if row["home_goals"] < row["away_goals"]:
        row["away_win"] = True
    else:
        row["away_win"] = False
    return row

matches_df = matches_df.apply(lambda x: match_parser(x), axis=1)

In [74]:
matches_df

Unnamed: 0,id,isResult,h,a,goals,xG,datetime,forecast,home_id,away_id,home_team,away_team,home_goals,away_goals,home_xg,away_xg,is_draw,home_win,away_win
0,22275,True,"{'id': '92', 'title': 'Burnley', 'short_title'...","{'id': '88', 'title': 'Manchester City', 'shor...","{'h': '0', 'a': '3'}","{'h': '0.311032', 'a': '2.40074'}",2023-08-11 19:00:00,"{'w': '0.0177', 'd': '0.0854', 'l': '0.8969'}",92,88,Burnley,Manchester City,0,3,0.311032,2.40074,False,False,True
1,22276,True,"{'id': '83', 'title': 'Arsenal', 'short_title'...","{'id': '249', 'title': 'Nottingham Forest', 's...","{'h': '2', 'a': '1'}","{'h': '0.84262', 'a': '0.966305'}",2023-08-12 11:30:00,"{'w': '0.2797', 'd': '0.3363', 'l': '0.384'}",83,249,Arsenal,Nottingham Forest,2,1,0.84262,0.966305,False,True,False
2,22277,True,"{'id': '73', 'title': 'Bournemouth', 'short_ti...","{'id': '81', 'title': 'West Ham', 'short_title...","{'h': '1', 'a': '1'}","{'h': '1.51025', 'a': '1.4834'}",2023-08-12 14:00:00,"{'w': '0.3559', 'd': '0.3169', 'l': '0.3272'}",73,81,Bournemouth,West Ham,1,1,1.51025,1.4834,True,False,False
3,22278,True,"{'id': '220', 'title': 'Brighton', 'short_titl...","{'id': '256', 'title': 'Luton', 'short_title':...","{'h': '4', 'a': '1'}","{'h': '4.36748', 'a': '1.88594'}",2023-08-12 14:00:00,"{'w': '0.879', 'd': '0.0876', 'l': '0.0334'}",220,256,Brighton,Luton,4,1,4.36748,1.88594,False,True,False
4,22279,True,"{'id': '72', 'title': 'Everton', 'short_title'...","{'id': '228', 'title': 'Fulham', 'short_title'...","{'h': '0', 'a': '1'}","{'h': '2.59001', 'a': '1.58144'}",2023-08-12 14:00:00,"{'w': '0.6371', 'd': '0.1989', 'l': '0.164'}",72,228,Everton,Fulham,0,1,2.59001,1.58144,False,False,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
193,22091,True,"{'id': '249', 'title': 'Nottingham Forest', 's...","{'id': '89', 'title': 'Manchester United', 'sh...","{'h': '2', 'a': '1'}","{'h': '0.744348', 'a': '0.651905'}",2023-12-30 17:30:00,"{'w': '0.3432', 'd': '0.3817', 'l': '0.2751'}",249,89,Nottingham Forest,Manchester United,2,1,0.744348,0.651905,False,True,False
194,22087,True,"{'id': '228', 'title': 'Fulham', 'short_title'...","{'id': '83', 'title': 'Arsenal', 'short_title'...","{'h': '2', 'a': '1'}","{'h': '1.52829', 'a': '1.6591'}",2023-12-31 14:00:00,"{'w': '0.3212', 'd': '0.286', 'l': '0.3928'}",228,83,Fulham,Arsenal,2,1,1.52829,1.6591,False,True,False
195,22092,True,"{'id': '82', 'title': 'Tottenham', 'short_titl...","{'id': '73', 'title': 'Bournemouth', 'short_ti...","{'h': '3', 'a': '1'}","{'h': '2.15027', 'a': '1.58229'}",2023-12-31 14:00:00,"{'w': '0.5177', 'd': '0.2287', 'l': '0.2536'}",82,73,Tottenham,Bournemouth,3,1,2.15027,1.58229,False,True,False
196,22088,True,"{'id': '87', 'title': 'Liverpool', 'short_titl...","{'id': '86', 'title': 'Newcastle United', 'sho...","{'h': '4', 'a': '2'}","{'h': '6.67151', 'a': '0.910974'}",2024-01-01 20:00:00,"{'w': '0.9981', 'd': '0.0015', 'l': '0.0004'}",87,86,Liverpool,Newcastle United,4,2,6.67151,0.910974,False,True,False
