In [19]:
from footbot.data.understat_data import (
    get_league_players_async, get_teams_async, get_league_results_async, get_match_players_async)
from footbot.data.utils import set_up_bigquery, run_query, get_safe_web_name
import pandas as pd
import asyncio


In [15]:
client = set_up_bigquery('../../secrets/service_account.json')

In [78]:
pd.set_option('display.max_rows', 1000)

# Exploring data

## Players

In [96]:
players_arr = []

for season in ['2016', '2017', '2018', '2019', '2020']:
    players = await get_league_players_async(season)
    for player in players:
        player['season'] = season
    players_arr.append(players)
    
players = [item for sublist in players_arr for item in sublist]

In [97]:
players[0]

{'id': '647',
 'player_name': 'Harry Kane',
 'games': '30',
 'time': '2556',
 'goals': '29',
 'xG': '19.82009919732809',
 'assists': '7',
 'xA': '5.5538915153592825',
 'shots': '110',
 'key_passes': '41',
 'yellow_cards': '3',
 'red_cards': '0',
 'position': 'F M S',
 'team_title': 'Tottenham',
 'npg': '24',
 'npxG': '15.253085978329182',
 'xGChain': '21.94719305820763',
 'xGBuildup': '4.12599990144372',
 'season': '2016'}

## Teams

In [17]:
teams = await get_teams_async(2020)

In [19]:
teams[0].keys()

dict_keys(['id', 'title', 'history'])

In [20]:
teams[0]['history'][0]

{'h_a': 'h',
 'xG': 0.80527,
 'xGA': 0.849709,
 'npxG': 0.80527,
 'npxGA': 0.0885404,
 'ppda': {'att': 89, 'def': 20},
 'ppda_allowed': {'att': 247, 'def': 14},
 'deep': 17,
 'deep_allowed': 2,
 'scored': 1,
 'missed': 0,
 'xpts': 1.1601,
 'result': 'w',
 'date': '2020-09-21 17:00:00',
 'wins': 1,
 'draws': 0,
 'loses': 0,
 'pts': 3,
 'npxGD': 0.7167296000000001}

## Results

In [24]:
results = await get_league_results_async(2020)

In [26]:
results[0]

{'id': '14086',
 'isResult': True,
 'h': {'id': '228', 'title': 'Fulham', 'short_title': 'FLH'},
 'a': {'id': '83', 'title': 'Arsenal', 'short_title': 'ARS'},
 'goals': {'h': '0', 'a': '3'},
 'xG': {'h': '0.126327', 'a': '2.16287'},
 'datetime': '2020-09-12 11:30:00',
 'forecast': {'w': '0.0037', 'd': '0.0476', 'l': '0.9487'}}

## Player results

In [2]:
player_results = await get_match_players_async(14086)

In [4]:
player_results.keys()

dict_keys(['h', 'a'])

In [5]:
player_results['a']

{'414514': {'id': '414514',
  'goals': '0',
  'own_goals': '0',
  'shots': '0',
  'xG': '0',
  'time': '90',
  'player_id': '181',
  'team_id': '83',
  'position': 'GK',
  'player': 'Bernd Leno',
  'h_a': 'a',
  'yellow_card': '0',
  'red_card': '0',
  'roster_in': '0',
  'roster_out': '0',
  'key_passes': '0',
  'assists': '0',
  'xA': '0',
  'xGChain': '0.2652241289615631',
  'xGBuildup': '0.2652241289615631',
  'positionOrder': '1'},
 '414516': {'id': '414516',
  'goals': '0',
  'own_goals': '0',
  'shots': '0',
  'xG': '0',
  'time': '90',
  'player_id': '8089',
  'team_id': '83',
  'position': 'DC',
  'player': 'Kieran Tierney',
  'h_a': 'a',
  'yellow_card': '0',
  'red_card': '0',
  'roster_in': '0',
  'roster_out': '0',
  'key_passes': '0',
  'assists': '0',
  'xA': '0',
  'xGChain': '0.25259533524513245',
  'xGBuildup': '0.25259533524513245',
  'positionOrder': '3'},
 '414517': {'id': '414517',
  'goals': '1',
  'own_goals': '0',
  'shots': '1',
  'xG': '0.5060973167419434',
 

In [10]:
player_results['a']['414524']

{'id': '414524',
 'goals': '1',
 'own_goals': '0',
 'shots': '3',
 'xG': '0.28258055448532104',
 'time': '90',
 'player_id': '318',
 'team_id': '83',
 'position': 'FWL',
 'player': 'Pierre-Emerick Aubameyang',
 'h_a': 'a',
 'yellow_card': '1',
 'red_card': '0',
 'roster_in': '0',
 'roster_out': '0',
 'key_passes': '1',
 'assists': '0',
 'xA': '0.06435254961252213',
 'xGChain': '0.3469330966472626',
 'xGBuildup': '0.08170896768569946',
 'positionOrder': '16'}

In [12]:
pd.DataFrame(player_results['a'].values())

Unnamed: 0,id,goals,own_goals,shots,xG,time,player_id,team_id,position,player,...,yellow_card,red_card,roster_in,roster_out,key_passes,assists,xA,xGChain,xGBuildup,positionOrder
0,414514,0,0,0,0.0,90,181,83,GK,Bernd Leno,...,0,0,0,0,0,0,0.0,0.2652241289615631,0.2652241289615631,1
1,414516,0,0,0,0.0,90,8089,83,DC,Kieran Tierney,...,0,0,0,0,0,0,0.0,0.2525953352451324,0.2525953352451324,3
2,414517,1,0,1,0.5060973167419434,90,5613,83,DC,Gabriel,...,0,0,0,0,0,0,0.0,0.3469330966472626,0.3469330966472626,3
3,414515,0,0,0,0.0,90,1749,83,DC,Rob Holding,...,0,0,0,0,0,0,0.0,0.1760467439889907,0.1760467439889907,3
4,414518,0,0,0,0.0,90,492,83,DMR,Héctor Bellerín,...,1,0,0,0,1,0,0.1065338179469108,0.457085371017456,0.3505515456199646,5
5,414519,0,0,0,0.0,90,1750,83,DML,Ainsley Maitland-Niles,...,0,0,0,0,0,0,0.0,0.2525953352451324,0.2525953352451324,6
6,414520,0,0,1,0.1101522594690322,90,496,83,MC,Mohamed Elneny,...,0,0,0,0,0,0,0.0,0.457085371017456,0.3469330966472626,9
7,414521,0,0,4,0.2083705365657806,80,204,83,MC,Granit Xhaka,...,0,0,414525,0,1,0,0.0817089676856994,1.0529390573501587,0.9957149624824524,9
8,414522,0,0,2,0.7844159603118896,77,700,83,FWR,Willian,...,0,0,414527,0,3,2,0.6345871090888977,0.999333381652832,0.2562137842178345,14
9,414523,1,0,2,0.8282726407051086,88,3277,83,FW,Alexandre Lacazette,...,0,0,414526,0,1,0,0.1101522594690322,1.0691125392913818,0.2008715867996215,15


# Mapping to FPL API

In [105]:
elements_df = run_query(
    '''
    SELECT
    element_all,
    MIN(first_name) AS first_name,
    MIN(second_name) AS second_name,
    MIN(safe_web_name) AS safe_web_name
    FROM `footbot-001.fpl.elements_all`
    GROUP BY 1
    ''',
    client
)

In [106]:
elements_df['safe_element_name'] = elements_df.apply(
    lambda row: get_safe_web_name(row['first_name'] + ' ' + row['second_name']),
    axis = 1
)


elements_df

Unnamed: 0,element_all,first_name,second_name,safe_web_name,safe_element_name
0,1306,Takumi,Minamino,minamino,takumi minamino
1,1008,Michael,Obafemi,obafemi,michael obafemi
2,135,Ashley,Westwood,westwood,ashley westwood
3,695,John,Stones,stones,john stones
4,345,Davinson,Sánchez,sanchez,davinson sanchez
...,...,...,...,...,...
1420,1312,Taylor,Perry,perry,taylor perry
1421,861,Lewis,Richardson,richardson,lewis richardson
1422,1425,Ørjan,Nyland,nyland,orjan nyland
1423,1357,Tyler,Roberts,roberts,tyler roberts


In [108]:
understat_elements_df = pd.DataFrame(players)
understat_elements_df['safe_element_name'] = understat_elements_df['player_name'].apply(get_safe_web_name)

understat_elements_df = understat_elements_df[['safe_element_name', 'id']].drop_duplicates()

understat_elements_df

Unnamed: 0,safe_element_name,id
0,harry kane,647
1,romelu lukaku,594
2,alexis sanchez,498
3,sergio aguero,619
4,diego costa,802
...,...,...
2487,ruben dias,8961
2488,vladimir coufal,8965
2489,filip krovinovic,8966
2490,robert sanchez,9098


In [109]:
elements_df = elements_df.join(
    understat_elements_df.set_index('safe_element_name'), on='safe_element_name'
)

elements_df = elements_df.join(
    understat_elements_df.set_index('safe_element_name'), on='safe_web_name', rsuffix='_c'
)

In [112]:
elements_df[[
    'element_all',
    'safe_element_name',
    'id',
    'id_c'
]]

Unnamed: 0,element_all,safe_element_name,id,id_c
0,1306,takumi minamino,8239,
1,1008,michael obafemi,6504,
2,135,ashley westwood,669,
3,695,john stones,586,
4,345,davinson sanchez,6249,
...,...,...,...,...
1420,1312,taylor perry,,
1421,861,lewis richardson,,
1422,1425,orjan nyland,250,
1423,1357,tyler roberts,1014,


In [113]:
filled_ids = set(elements_df['id'].unique()).union(set(elements_df['id_c'].unique()))
print(len(filled_ids))

understat_elements_df[
    ~understat_elements_df['id'].isin(filled_ids)][['safe_element_name']].sort_values('safe_element_name')

1039


Unnamed: 0,safe_element_name
692,ahmed hegazy
447,aiden o&#039;neill
440,aleix garcia
2332,alex telles
261,alfred n&#039;diaye
216,ben chilwell
348,brad guzan
1579,bruno fernandes
1976,bruno jordao
1937,dani ceballos


In [85]:
elements_df[['safe_element_name', 'safe_web_name']].sort_values('safe_element_name')

Unnamed: 0,safe_element_name,safe_web_name
2684,aaron connolly,connolly
2212,aaron cresswell,cresswell
3128,aaron mooy,mooy
2461,aaron ramsdale,ramsdale
3088,aaron wan-bissaka,wan-bissaka
2347,abdoulaye doucoure,doucoure
2938,aboubakar kamara,kamara
2543,adam forshaw,forshaw
2472,adam lallana,lallana
2176,adam webster,webster


In [86]:
elements_df[elements_df['safe_element_name'].str.contains('alli')]

Unnamed: 0,element,safe_web_name,element_type,team,safe_team_name,first_name,second_name,season,element_all,safe_element_name,...,shots_c,key_passes_c,yellow_cards_c,red_cards_c,position_c,team_title_c,npg_c,npxG_c,xGChain_c,xGBuildup_c
1431,292,smalling,2,13,man utd,Chris,Smalling,2021,251,chris smalling,...,,,,,,,,,,
2308,394,alli,3,17,spurs,Bamidele,Alli,2021,149,bamidele alli,...,,,,,,,,,,
3129,80,mac allister,3,3,brighton,Alexis,Mac Allister,2021,72,alexis mac allister,...,,,,,,,,,,
