In [1]:
import asyncio
import json
try:
   import cPickle as pickle
except:
   import pickle
import pandas as pd
import aiohttp
import requests

from understat import Understat

In [17]:
def update_id_map():
    
    # FPL team codes
    LEI = 9
    LEE = 10
    
    with open('id_map', 'rb') as input_file:
        id_map = pickle.load(input_file)
    new_df = fpl_df[~fpl_df['id'].isin(id_map.keys())]
    new_u_df = df[~df['id'].isin(id_map.values())]
    
    if new_u_df.shape[0] > 0 and new_df.shape[0] == new_u_df.shape[0]:
        lei_lee_swap = dict(enumerate([LEE, LEI], LEI)) # Mapping to swap LEE AND LEI
        new_df['team'] = new_df['team'].map(lei_lee_swap)
        new_df.sort_values(['team', 'first_name', 'second_name'], inplace=True)
        new_u_df.sort_values(['team_title', 'player_name'], inplace=True)
        new_ids = {k:v for k, v in zip(new_df['id'], new_u_df['id'])}
        new_id_map = {**id_map, **new_ids}
        with open('id_map', 'wb') as output_file:
            pickle.dump(new_id_map, output_file)

In [19]:
async def main():
    async with aiohttp.ClientSession() as session:
        understat = Understat(session)
        players = await understat.get_league_players(
            "epl", 2020,
        )
        return json.dumps(players)

In [4]:
df = pd.read_json(await main())

fpl_data = requests.get(url='https://fantasy.premierleague.com/api/bootstrap-static/').json()
fpl_df = pd.DataFrame(fpl_data['elements'])
fpl_df = fpl_df[['first_name', 'second_name', 'web_name', 'element_type', 
                 'ep_next', 'ep_this', 'event_points', 'form', 'id',
                 'now_cost', 'points_per_game', 'selected_by_percent',
                 'team', 'total_points', 'value_form', 'value_season',
                 'minutes', 'goals_scored', 'assists', 'clean_sheets',
                 'goals_conceded', 'yellow_cards', 'red_cards','saves',
                 'bonus', 'bps', 'influence', 'creativity', 'threat',
                 'ict_index']]
fpl_df = fpl_df[fpl_df['minutes'] > 0]

In [5]:
update_id_map()
with open('id_map', 'rb') as file:
    id_map = pickle.load(file)
fpl_df = fpl_df.assign(understat_id=fpl_df['id'].map(id_map))
combined_df = df.join(fpl_df.set_index('understat_id'), on='id', rsuffix='_fpl')
combined_df.columns

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_df['team'] = new_df['team'].map(lei_lee_swap)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_df.sort_values(['team', 'first_name', 'second_name'], inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_u_df.sort_values(['team_title', 'player_name'], inplace=True)


Index(['id', 'player_name', 'games', 'time', 'goals', 'xG', 'assists', 'xA',
       'shots', 'key_passes', 'yellow_cards', 'red_cards', 'position',
       'team_title', 'npg', 'npxG', 'xGChain', 'xGBuildup', 'first_name',
       'second_name', 'web_name', 'element_type', 'ep_next', 'ep_this',
       'event_points', 'form', 'id_fpl', 'now_cost', 'points_per_game',
       'selected_by_percent', 'team', 'total_points', 'value_form',
       'value_season', 'minutes', 'goals_scored', 'assists_fpl',
       'clean_sheets', 'goals_conceded', 'yellow_cards_fpl', 'red_cards_fpl',
       'saves', 'bonus', 'bps', 'influence', 'creativity', 'threat',
       'ict_index'],
      dtype='object')

In [6]:
combined_df = combined_df[['id', 'id_fpl', 'player_name', 'web_name', 'team_title',
                     'team', 'element_type', 'games', 'minutes', 'goals_scored',
                     'xG', 'assists_fpl', 'xA', 'shots', 'key_passes', 'npg',
                     'npxG', 'xGChain', 'xGBuildup', 'ep_next',
                     'ep_this', 'event_points', 'form', 'now_cost',
                     'points_per_game', 'selected_by_percent', 'total_points',
                     'value_form', 'value_season', 'clean_sheets',
                     'yellow_cards_fpl', 'red_cards_fpl', 'saves', 'bonus',
                     'bps', 'influence', 'creativity', 'threat', 'ict_index']]

In [7]:
current_column_names = combined_df.columns
new_column_names = ['player_id', 'player_id_fpl', 'player_name', 'web_name',
               'Team', 'team_id', 'Position', 'Games', 'Minutes',
               'Goals', 'Expected Goals', 'Assists', 'Expected Assists',
               'Shots', 'Key Passes', 'Non-Penalty Goals',
               'Expected Goals (Non-Penalty)', 'Expected Goals (Chain)',
               'Expected Goals (Build-Up)', 'Expected Points (Next GW)',
               'Expected Points (This GW)', 'Event Points', 'Form', 'Cost',
               'PPG', 'Selected By (%)', 'Total Points', 'Value (Form)',
               'Value (Season)', 'Clean Sheets', 'Yellow Cards', 'Red Cards',
               'Saves', 'Bonus', 'BPS', 'Influence', 'Creativity', 'Threat',
               'ICT Index']
column_map = dict(zip(current_column_names, new_column_names))
combined_df = combined_df.rename(columns=column_map)

In [12]:
combined_df[combined_df['Team'].str.contains(',')]

Unnamed: 0,player_id,player_id_fpl,player_name,web_name,Team,team_id,Position,Games,Minutes,Goals,...,Clean Sheets,Yellow Cards,Red Cards,Saves,Bonus,BPS,Influence,Creativity,Threat,ICT Index
32,592,106,Ross Barkley,Barkley,"Aston Villa,Chelsea",2,3,8,494,2,...,2,0,0,0,4,116,148.8,223.9,205.0,57.8
55,503,569,Theo Walcott,Cavani,"Everton,Southampton",13,4,5,82,1,...,0,1,0,0,0,29,34.2,2.4,83.0,11.9
63,688,115,Ruben Loftus-Cheek,Loftus-Cheek,"Chelsea,Fulham",8,3,5,252,1,...,1,1,0,0,0,53,54.4,37.4,76.0,16.7


In [8]:
combined_df.to_csv('combined_df.csv', index=False)

In [27]:
combined_df.loc[:, 'Games':].columns

Index(['Games', 'Minutes', 'Goals', 'Expected Goals', 'Assists',
       'Expected Assists', 'Shots', 'Key Passes', 'Non-Penalty Goals',
       'Expected Goals (Non-Penalty)', 'Expected Goals (Chain)',
       'Expected Goals (Build-Up)', 'Expected Points (Next GW)',
       'Expected Points (This GW)', 'Event Points', 'Form', 'Cost', 'PPG',
       'Selected By (%)', 'Total Points', 'Value (Form)', 'Value (Season)',
       'Clean Sheets', 'Yellow Cards', 'Red Cards', 'Saves', 'Bonus', 'BPS',
       'Influence', 'Creativity', 'Threat', 'ICT Index'],
      dtype='object')

In [35]:
combined_df.loc[:, 'web_name': 'position']

Unnamed: 0,web_name,team_title,team_id,position
0,Vardy,Leicester,9,4
1,Calvert-Lewin,Everton,7,4
2,Son,Tottenham,17,3
3,Zaha,Crystal Palace,6,3
4,Bamford,Leeds,10,4
...,...,...,...,...
332,Cash,Aston Villa,2,2
333,Watkins,Aston Villa,2,4
334,Delap,Manchester City,12,4
335,Townsend,West Bromwich Albion,18,2


In [18]:
update_id_map()

None


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  print(new_df.sort_values(['team', 'first_name', 'second_name'], inplace=True))
