In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import asyncio
import json
import aiohttp
from understat import Understat
import nest_asyncio

In [2]:
def total_players():    
    nest_asyncio.apply()
    async def all_understat_players():
        async with aiohttp.ClientSession() as session:
            understat = Understat(session)
            players = await understat.get_league_players(
                "epl",
                2024,
            )
            return json.dumps(players)

    players = asyncio.run(all_understat_players())
    players = json.loads(players)
    return players

In [3]:
players = total_players()
cols = list(players[0].keys())
vals = []

for x in players:
    vals.append(list(x.values()))

players_df = pd.DataFrame(vals, columns = cols)

In [4]:
players_df

Unnamed: 0,id,player_name,games,time,goals,xG,assists,xA,shots,key_passes,yellow_cards,red_cards,position,team_title,npg,npxG,xGChain,xGBuildup
0,1250,Mohamed Salah,20,1768,14,13.610325902700424,8,9.57847910746932,65,47,2,0,F,Liverpool,10,9.043387070298195,19.68034301698208,5.720102474093437
1,8260,Erling Haaland,16,1322,14,16.90462365746498,4,3.2472310978919268,60,16,1,0,F S,Manchester City,11,13.859948486089706,16.391588754951954,1.3705047406256199
2,453,Son Heung-Min,20,1739,12,7.47495724260807,5,6.843130592256784,52,37,1,0,F M,Tottenham,11,6.713788405060768,16.887082293629646,6.178518561646342
3,1679,Dominic Solanke,20,1793,12,12.433437447994947,1,0.9976950995624065,67,14,1,0,F,Bournemouth,11,11.672268595546484,12.4195606559515,1.9994132556021214
4,1776,Jarrod Bowen,20,1800,11,8.646546997129917,2,3.144163405522704,51,14,1,0,F M,West Ham,11,8.646546997129917,10.23329196497798,2.032880038022995
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
515,12215,Michael Ndiweni,1,1,0,0,0,0,0,0,0,0,S,Newcastle United,0,0,0,0
516,12234,Tawanda Chirewa,1,1,0,0,0,0,0,0,0,0,S,Wolverhampton Wanderers,0,0,0,0
517,12251,Sydie Peck,1,6,0,0,0,0,0,0,0,0,S,Sheffield United,0,0,0,0
518,12275,Willy Kambwala,2,88,0,0,0,0,0,0,0,0,D S,Manchester United,0,0,0.07591637223958969,0.07591637223958969


In [16]:
async def league_table():
    async with aiohttp.ClientSession() as session:
        understat = Understat(session)
        table = await understat.get_league_table("EPL", "2024")
        league_table_df = pd.DataFrame(table[1:], columns = table[0])
        return league_table_df

league_table_df = asyncio.run(league_table())
league_table_df["Position"] = np.arange(1,21)


In [17]:
league_table_df

Unnamed: 0,Team,M,W,D,L,G,GA,PTS,xG,NPxG,xGA,NPxGA,NPxGD,PPDA,OPPDA,DC,ODC,xPTS,Position
0,Liverpool,22,15,6,1,51,19,51,51.51,45.89,26.19,25.43,20.46,8.18,16.26,267,133,43.67,1
1,Manchester City,21,14,4,3,51,24,46,47.19,43.38,21.23,19.71,23.68,11.64,20.98,251,92,44.73,2
2,Arsenal,22,14,4,4,44,21,46,45.47,40.91,20.63,19.11,21.79,9.11,17.34,270,93,44.13,3
3,Aston Villa,22,13,4,5,44,30,43,41.31,38.27,32.18,32.18,6.09,12.26,10.3,225,141,37.35,4
4,Tottenham,22,13,4,5,47,33,43,42.35,41.59,39.18,35.38,6.22,8.1,11.14,288,164,33.59,5
5,West Ham,21,10,5,6,35,32,35,31.83,29.55,39.1,35.29,-5.75,17.08,9.81,95,227,25.13,6
6,Newcastle United,22,10,2,10,44,33,32,48.18,45.13,37.92,35.51,9.62,10.25,11.54,191,166,37.66,7
7,Brighton,22,8,8,6,38,37,32,39.04,35.99,36.55,32.74,3.25,9.07,17.13,237,188,32.13,8
8,Manchester United,21,10,2,9,24,29,32,30.22,27.92,36.87,35.35,-7.43,12.05,13.21,184,167,24.79,9
9,Chelsea,22,9,4,9,36,35,31,44.49,38.4,34.93,31.87,6.52,9.2,13.79,228,146,36.85,10


In [18]:
total_shots = []
players_id = list(players_df["id"])


async def get_player_shots(player_id):
    async with aiohttp.ClientSession() as session:
        understat = Understat(session)
        shots = await understat.get_player_shots(
            player_id
        )
        return json.dumps(shots)
for x in players_id:
    shots = asyncio.run(get_player_shots(x))
    shots = json.loads(shots)
    total_shots.append(shots)

In [19]:
def flatten_extend(matrix):
    flat_list = []
    for row in matrix:
        flat_list.extend(row)
    return flat_list

total_shots = flatten_extend(total_shots)

In [20]:
cols2 = list(total_shots[0].keys())
vals2 = []

for x in total_shots:
    vals2.append(list(x.values()))

total_player_shots_df = pd.DataFrame(vals2, columns = cols2)

In [21]:
total_player_shots_df['date']= pd.to_datetime(total_player_shots_df['date'])
total_player_shots_df = total_player_shots_df[total_player_shots_df["date"] > '2023-07-01']

In [22]:
total_player_shots_df

Unnamed: 0,id,minute,result,X,Y,xG,player,h_a,player_id,situation,season,shotType,match_id,h_team,a_team,h_goals,a_goals,date,player_assisted,lastAction
985,532588,11,ShotOnPost,0.8190000152587891,0.5279999923706055,0.06408040970563889,Mohamed Salah,a,1250,OpenPlay,2023,RightFoot,22283,Chelsea,Liverpool,1,1,2023-08-13 15:30:00,Cody Gakpo,Pass
986,532592,25,BlockedShot,0.850999984741211,0.445,0.10087071359157562,Mohamed Salah,a,1250,OpenPlay,2023,LeftFoot,22283,Chelsea,Liverpool,1,1,2023-08-13 15:30:00,Luis Díaz,Pass
987,532594,40,MissedShots,0.8590000152587891,0.315,0.04292548820376396,Mohamed Salah,a,1250,OpenPlay,2023,LeftFoot,22283,Chelsea,Liverpool,1,1,2023-08-13 15:30:00,,
988,533205,33,MissedShots,0.8190000152587891,0.5720000076293945,0.05514945462346077,Mohamed Salah,h,1250,OpenPlay,2023,LeftFoot,22288,Liverpool,Bournemouth,3,1,2023-08-19 14:00:00,Diogo Jota,Pass
989,533207,35,Goal,0.9569999694824218,0.5259999847412109,0.6668094396591187,Mohamed Salah,h,1250,SetPiece,2023,LeftFoot,22288,Liverpool,Bournemouth,3,1,2023-08-19 14:00:00,,Rebound
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
51791,556621,96,BlockedShot,0.8019999694824219,0.655,0.021309370175004005,Matheus França,a,12152,OpenPlay,2023,RightFoot,22080,Chelsea,Crystal Palace,2,1,2023-12-27 19:30:00,Michael Olise,Chipped
51792,557870,40,BlockedShot,0.9119999694824219,0.3870000076293945,0.09613598138093948,Jordan Clark,a,12187,OpenPlay,2023,RightFoot,22096,Burnley,Luton,1,1,2024-01-12 19:45:00,Chiedozie Ogbene,Pass
51793,557877,57,MissedShots,0.8209999847412109,0.32299999237060545,0.032191190868616104,Jordan Clark,a,12187,OpenPlay,2023,RightFoot,22096,Burnley,Luton,1,1,2024-01-12 19:45:00,Ross Barkley,Chipped
51794,560596,22,SavedShot,0.9119999694824219,0.40299999237060546,0.12998487055301666,Jordan Clark,h,12187,OpenPlay,2023,LeftFoot,22107,Luton,Brighton,4,0,2024-01-30 19:45:00,Ross Barkley,Pass


In [23]:
async def league_results():
    async with aiohttp.ClientSession() as session:
        understat = Understat(session)
        fixtures = await understat.get_league_results(
            "epl",
            2024,
            {
            }
        )
        return(json.dumps(fixtures))

matches = asyncio.run(league_results())
matches = json.loads(matches)

In [24]:
cols3 = list(matches[0].keys())
vals3 = []

for x in matches:
    vals3.append(list(x.values()))

matches_df = pd.DataFrame(vals3, columns = cols3)

In [25]:
def match_parser(row):
    row["home_id"] = row["h"]["id"]
    row["away_id"] = row["a"]["id"]
    row["home_team"] = row["h"]["title"]
    row["away_team"] = row["a"]["title"]
    row["home_goals"] = row["goals"]["h"]
    row["away_goals"] = row["goals"]["a"]
    row["home_xg"] = row["xG"]["h"]
    row["away_xg"] = row["xG"]["a"]
    if row["home_goals"] == row["away_goals"]:
        row["is_draw"] = True
    else:
        row["is_draw"] = False
    if row["home_goals"] > row["away_goals"]:
        row["home_win"] = True
    else:
        row["home_win"] = False
    if row["home_goals"] < row["away_goals"]:
        row["away_win"] = True
    else:
        row["away_win"] = False
    return row

matches_df = matches_df.apply(lambda x: match_parser(x), axis=1)

In [26]:
matches_df

Unnamed: 0,id,isResult,h,a,goals,xG,datetime,forecast,home_id,away_id,home_team,away_team,home_goals,away_goals,home_xg,away_xg,is_draw,home_win,away_win
0,22275,True,"{'id': '92', 'title': 'Burnley', 'short_title'...","{'id': '88', 'title': 'Manchester City', 'shor...","{'h': '0', 'a': '3'}","{'h': '0.311032', 'a': '2.40074'}",2023-08-11 19:00:00,"{'w': '0.0177', 'd': '0.0854', 'l': '0.8969'}",92,88,Burnley,Manchester City,0,3,0.311032,2.40074,False,False,True
1,22276,True,"{'id': '83', 'title': 'Arsenal', 'short_title'...","{'id': '249', 'title': 'Nottingham Forest', 's...","{'h': '2', 'a': '1'}","{'h': '0.84262', 'a': '0.966305'}",2023-08-12 11:30:00,"{'w': '0.2797', 'd': '0.3363', 'l': '0.384'}",83,249,Arsenal,Nottingham Forest,2,1,0.84262,0.966305,False,True,False
2,22277,True,"{'id': '73', 'title': 'Bournemouth', 'short_ti...","{'id': '81', 'title': 'West Ham', 'short_title...","{'h': '1', 'a': '1'}","{'h': '1.51025', 'a': '1.4834'}",2023-08-12 14:00:00,"{'w': '0.3559', 'd': '0.3169', 'l': '0.3272'}",73,81,Bournemouth,West Ham,1,1,1.51025,1.4834,True,False,False
3,22278,True,"{'id': '220', 'title': 'Brighton', 'short_titl...","{'id': '256', 'title': 'Luton', 'short_title':...","{'h': '4', 'a': '1'}","{'h': '4.36748', 'a': '1.88594'}",2023-08-12 14:00:00,"{'w': '0.879', 'd': '0.0876', 'l': '0.0334'}",220,256,Brighton,Luton,4,1,4.36748,1.88594,False,True,False
4,22279,True,"{'id': '72', 'title': 'Everton', 'short_title'...","{'id': '228', 'title': 'Fulham', 'short_title'...","{'h': '0', 'a': '1'}","{'h': '2.59001', 'a': '1.58144'}",2023-08-12 14:00:00,"{'w': '0.6371', 'd': '0.1989', 'l': '0.164'}",72,228,Everton,Fulham,0,1,2.59001,1.58144,False,False,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
211,22112,True,"{'id': '78', 'title': 'Crystal Palace', 'short...","{'id': '238', 'title': 'Sheffield United', 'sh...","{'h': '3', 'a': '2'}","{'h': '1.1601', 'a': '0.768467'}",2024-01-30 20:00:00,"{'w': '0.4684', 'd': '0.3204', 'l': '0.2112'}",78,238,Crystal Palace,Sheffield United,3,2,1.1601,0.768467,False,True,False
212,22105,True,"{'id': '71', 'title': 'Aston Villa', 'short_ti...","{'id': '86', 'title': 'Newcastle United', 'sho...","{'h': '1', 'a': '3'}","{'h': '1.44485', 'a': '3.3224'}",2024-01-30 20:15:00,"{'w': '0.0747', 'd': '0.1228', 'l': '0.8025'}",71,86,Aston Villa,Newcastle United,1,3,1.44485,3.3224,False,False,True
213,22109,True,"{'id': '82', 'title': 'Tottenham', 'short_titl...","{'id': '244', 'title': 'Brentford', 'short_tit...","{'h': '3', 'a': '2'}","{'h': '3.36194', 'a': '1.59637'}",2024-01-31 19:30:00,"{'w': '0.7778', 'd': '0.1393', 'l': '0.0829'}",82,244,Tottenham,Brentford,3,2,3.36194,1.59637,False,True,False
214,22114,True,"{'id': '88', 'title': 'Manchester City', 'shor...","{'id': '92', 'title': 'Burnley', 'short_title'...","{'h': '3', 'a': '1'}","{'h': '1.84756', 'a': '1.49114'}",2024-01-31 19:30:00,"{'w': '0.45', 'd': '0.2838', 'l': '0.2662'}",88,92,Manchester City,Burnley,3,1,1.84756,1.49114,False,True,False


### Output

In [27]:
players_df.to_csv('data/players.csv', index=False)
league_table_df.to_csv('data/league_table.csv', index=False)
total_player_shots_df.to_csv('data/player_shots.csv', index=False)
matches_df.to_csv('data/matches.csv', index=False)