In [44]:
import requests
from bs4 import BeautifulSoup
import json
import pandas as pd
from datetime import datetime

# pd.set_option('display.max_columns',None)

def get_team_data():
    current_year = datetime.now().year
    current_month = datetime.now().month

    if current_month < 8:
        current_year -= 1

    df_master = pd.DataFrame()

    for season in [current_year-1,current_year] :
        # Step 1: Fetch the website
        url = f'https://understat.com/league/EPL/{season}'
        response = requests.get(url)
        soup = BeautifulSoup(response.content, 'html.parser')

        # Step 2: Find the specific <script> tag containing the JSON data
        scripts = soup.find_all('script')
        json_data = None

        # Step 3: Look for the script containing the teamsData
        for script in scripts:
            if 'teamsData' in script.text:
                # Extract the content of the script tag as text
                json_text = script.text
                # Step 4: Isolate the JSON data by splitting the string
                json_text = json_text.split("JSON.parse('")[1].split("')")[0]
                # Step 5: Decode the string by replacing escape characters
                json_text = json_text.encode('utf-8').decode('unicode_escape')
                # Step 6: Load the decoded text as a Python dictionary
                json_data = json.loads(json_text)
                break

        # Step 7: Extract match data for each team and identify the opponent
        if json_data:
            data = []
            
            # Step 8: Create a reverse lookup dictionary for matches based on date and home/away status
            matches_by_date = {}
            
            for team_id, team_info in json_data.items():
                team_name = team_info['title']
                for match in team_info['history']:
                    match_key = (match['date'], match['xG']+match['xGA'])  # Date and home/away key
                    if match_key not in matches_by_date:
                        matches_by_date[match_key] = []
                    matches_by_date[match_key].append({
                        'team': team_name,
                        'h_a': match['h_a'],
                        'date': match['date'],
                        'xG': match['xG'],
                        'xGA': match['xGA'],
                        'npxG': match['npxG'],
                        'npxGA': match['npxGA'],
                        'ppda_att': match['ppda']['att'],
                        'ppda_def': match['ppda']['def'],
                        'ppda_allowed_att': match['ppda_allowed']['att'],
                        'ppda_allowed_def': match['ppda_allowed']['def'],
                        'deep': match['deep'],
                        'deep_allowed': match['deep_allowed'],
                        'scored': match['scored'],
                        'missed': match['missed'],
                        'xpts': match['xpts'],
                        'result': match['result'],
                        'wins': match['wins'],
                        'draws': match['draws'],
                        'loses': match['loses'],
                        'pts': match['pts'],
                        'npxGD': match['npxGD'],  # home/away status
                    })

            # Step 9: Construct final data with opponents
            for match_key, matches in matches_by_date.items():
                if len(matches) == 2:
                    # We found both home and away team for the match
                    home_match = [m for m in matches if m['h_a'] == 'h'][0]
                    away_match = [m for m in matches if m['h_a'] == 'a'][0]
                    
                    # Add home team data with away team as opponent
                    data.append({
                        'team': home_match['team'],
                        'opponent': away_match['team'],
                        'date': match_key[0],
                        'season': season,
                        'result_h': home_match['result'],
                        'xG_h': home_match['xG'],
                        'xG_a': home_match['xGA'],
                        'npxG_h': home_match['npxG'],
                        'npxG_a': home_match['npxGA'],
                        'npxGD_h': home_match['npxGD'],
                        'ppda_att_h': home_match['ppda_att'],
                        'ppda_def_h': home_match['ppda_def'],
                        'ppda_att_a': away_match['ppda_att'],
                        'ppda_def_a': away_match['ppda_def'],
                        'deep_h': home_match['deep'],
                        'deep_a': home_match['deep_allowed'],
                        'scored_h': home_match['scored'],
                        'scored_a': away_match['scored'],
                        'xpts_h': home_match['xpts'],
                        'xpts_a': away_match['xpts'],
                    })
                

            # Step 10: Convert the list of match data into a pandas DataFrame
            df_season = pd.DataFrame(data)
        df_master = pd.concat([df_master,df_season])
    return df_master

In [None]:
pd.set_option('display.max_columns',None)

# Step 1: Fetch the website
url = f'https://understat.com/league/EPL/{2024}'
response = requests.get(url)
soup = BeautifulSoup(response.content, 'html.parser')

# Step 2: Find the specific <script> tag containing the JSON data
scripts = soup.find_all('script')
json_data = None

# Step 3: Look for the script containing the teamsData
for script in scripts:
    if 'teamsData' in script.text:
        # Extract the content of the script tag as text
        json_text = script.text
        # Step 4: Isolate the JSON data by splitting the string
        json_text = json_text.split("JSON.parse('")[1].split("')")[0]
        # Step 5: Decode the string by replacing escape characters
        json_text = json_text.encode('utf-8').decode('unicode_escape')
        # Step 6: Load the decoded text as a Python dictionary
        json_data = json.loads(json_text)
        break

# Step 7: Extract match data for each team and identify the opponent
if json_data:
    data = []
    
    # Step 8: Create a reverse lookup dictionary for matches based on date and home/away status
    matches_by_date = {}
    
    for team_id, team_info in json_data.items():
        team_name = team_info['title']
        for match in team_info['history']:
            match_key = (match['date'], match['xG']+match['xGA'])  # Date and home/away key
            if match_key not in matches_by_date:
                matches_by_date[match_key] = []
            matches_by_date[match_key].append({
                'team': team_name,
                'h_a': match['h_a'],
                'date': match['date'],
                'xG': match['xG'],
                'xGA': match['xGA'],
                'npxG': match['npxG'],
                'npxGA': match['npxGA'],
                'ppda_att': match['ppda']['att'],
                'ppda_def': match['ppda']['def'],
                'ppda_allowed_att': match['ppda_allowed']['att'],
                'ppda_allowed_def': match['ppda_allowed']['def'],
                'deep': match['deep'],
                'deep_allowed': match['deep_allowed'],
                'scored': match['scored'],
                'missed': match['missed'],
                'xpts': match['xpts'],
                'result': match['result'],
                'wins': match['wins'],
                'draws': match['draws'],
                'loses': match['loses'],
                'pts': match['pts'],
                'npxGD': match['npxGD'],  # home/away status
            })

    # Step 9: Construct final data with opponents
    for match_key, matches in matches_by_date.items():
        if len(matches) == 2:
            # We found both home and away team for the match
            home_match = [m for m in matches if m['h_a'] == 'h'][0]
            away_match = [m for m in matches if m['h_a'] == 'a'][0]
            
            # Add home team data with away team as opponent
            data.append({
                'team': home_match['team'],
                'opponent': away_match['team'],
                'date': match_key[0],
                'season': season,
                'result_h': home_match['result'],
                'xG_h': home_match['xG'],
                'xG_a': home_match['xGA'],
                'npxG_h': home_match['npxG'],
                'npxG_a': home_match['npxGA'],
                'npxGD_h': home_match['npxGD'],
                'ppda_att_h': home_match['ppda_att'],
                'ppda_def_h': home_match['ppda_def'],
                'ppda_att_a': away_match['ppda_att'],
                'ppda_def_a': away_match['ppda_def'],
                'deep_h': home_match['deep'],
                'deep_a': home_match['deep_allowed'],
                'scored_h': home_match['scored'],
                'scored_a': away_match['scored'],
                'xpts_h': home_match['xpts'],
                'xpts_a': away_match['xpts'],
            })
        

    # Step 10: Convert the list of match data into a pandas DataFrame
    df_season = pd.DataFrame(data)
df_master = pd.concat([df_master,df_season])

In [54]:
# Step 1: Fetch the website
url = f'https://understat.com/league/EPL/2023'
response = requests.get(url)
soup = BeautifulSoup(response.content, 'html.parser')

# Step 2: Find the specific <script> tag containing the JSON data
scripts = soup.find_all('script')
json_data = None

# Step 3: Look for the script containing the teamsData
for script in scripts:
    if 'datesData' in script.text:
        # Extract the content of the script tag as text
        json_text = script.text
        # Step 4: Isolate the JSON data by splitting the string
        json_text = json_text.split("JSON.parse('")[1].split("')")[0]
        # Step 5: Decode the string by replacing escape characters
        json_text = json_text.encode('utf-8').decode('unicode_escape')
        # Step 6: Load the decoded text as a Python dictionary
        json_data = json.loads(json_text)
        break

data = []
for match in json_data:
    data.append({
        'home_team': match['h']['title'],
        'away_team': match['a']['title'],
        'home_goals': match['goals']['h'],
        'away_goals': match['goals']['a'],
        'xG_home': match['xG']['h'],
        'xG_away': match['xG']['a'],
        'datetime': match['datetime']
    })

df = pd.DataFrame(data)
df

Unnamed: 0,home_team,away_team,home_goals,away_goals,xG_home,xG_away,datetime
0,Burnley,Manchester City,0,3,0.311032,2.40074,2023-08-11 19:00:00
1,Arsenal,Nottingham Forest,2,1,0.84262,0.966305,2023-08-12 11:30:00
2,Bournemouth,West Ham,1,1,1.51025,1.4834,2023-08-12 14:00:00
3,Brighton,Luton,4,1,4.36748,1.88594,2023-08-12 14:00:00
4,Everton,Fulham,0,1,2.59001,1.58144,2023-08-12 14:00:00
...,...,...,...,...,...,...,...
375,Crystal Palace,Aston Villa,5,0,2.23189,0.598846,2024-05-19 15:00:00
376,Liverpool,Wolverhampton Wanderers,2,0,5.65937,0.731254,2024-05-19 15:00:00
377,Luton,Fulham,2,4,2.06743,1.11598,2024-05-19 15:00:00
378,Manchester City,West Ham,3,1,2.31504,0.240159,2024-05-19 15:00:00


In [60]:
# Step 1: Fetch the website
url = f'https://understat.com/league/EPL/2023'
response = requests.get(url)
soup = BeautifulSoup(response.content, 'html.parser')

# Step 2: Find the specific <script> tag containing the JSON data
scripts = soup.find_all('script')
json_data = None

# Step 3: Look for the script containing the teamsData
for script in scripts:
    if 'playersData' in script.text:
        # Extract the content of the script tag as text
        json_text = script.text
        # Step 4: Isolate the JSON data by splitting the string
        json_text = json_text.split("JSON.parse('")[1].split("')")[0]
        # Step 5: Decode the string by replacing escape characters
        json_text = json_text.encode('utf-8').decode('unicode_escape')
        # Step 6: Load the decoded text as a Python dictionary
        json_data = json.loads(json_text)
        break

data = []
for player in json_data:
    data.append({
        'player_name': player['player_name'],
        'games': player['games'],
        'time': player['time'],
        'goals': player['goals'],
        'xG': player['xG'],
        'assists': player['assists'],
        'xA': player['xA'],
        'shots': player['shots'],
        'key_passes': player['key_passes'],
        'yellow_cards': player['yellow_cards'],
        'red_cards': player['red_cards'],
        'position': player['position'],
        'team_title': player['team_title'],
        'npg': player['npg'],
        'npxG': player['npxG'],
        'xGChain': player['xGChain'],
        'xGBuildup': player['xGBuildup'],
    })

df = pd.DataFrame(data)
df

Unnamed: 0,player_name,games,time,goals,xG,assists,xA,shots,key_passes,yellow_cards,red_cards,position,team_title,npg,npxG,xGChain,xGBuildup
0,Erling Haaland,31,2581,27,31.65399668365717,5,4.7517555598169565,122,29,1,0,F S,Manchester City,20,25.564646281301975,30.19725350290537,3.128645434975624
1,Cole Palmer,34,2640,22,17.83224501274526,11,11.869294486939907,108,72,7,0,F M S,"Chelsea,Manchester City",13,10.981725485995412,31.036520244553685,15.302413104102015
2,Alexander Isak,30,2305,21,22.07426631450653,2,3.6520401313900948,78,27,1,0,F S,Newcastle United,16,17.507253035902977,24.041151970624924,5.754798140376806
3,Dominic Solanke,38,3346,19,21.40683087334037,3,3.5437498800456524,109,39,3,0,F S,Bournemouth,17,19.12332433462143,22.5880119279027,4.080137750133872
4,Phil Foden,35,2895,19,11.307983005419374,8,8.51623285189271,105,73,2,0,F M S,Manchester City,19,11.307983005419374,30.41696721315384,16.99899828247726
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
565,Josh Acheampong,1,1,0,0,0,0,0,0,0,0,S,Chelsea,0,0,0,0
566,Fred Onyedinma,8,419,0,0.18410924077033997,0,0.13074029795825481,5,2,1,0,D S,Luton,0,0.18410924077033997,0.5668405517935753,0.25199100375175476
567,Ethan Wheatley,3,9,0,0,0,0,0,0,0,0,S,Manchester United,0,0,0,0
568,Finley Munroe,1,1,0,0,0,0,0,0,0,0,S,Aston Villa,0,0,0,0


In [61]:
df['position'].value_counts()

position
D S        124
M S        117
S           82
F M S       65
F S         47
D M S       36
D           33
GK          30
M           11
D F M S      8
GK S         8
F M          4
F            2
D M          2
D F S        1
Name: count, dtype: int64

In [63]:
df[df['position'] == 'GK S']

Unnamed: 0,player_name,games,time,goals,xG,assists,xA,shots,key_passes,yellow_cards,red_cards,position,team_title,npg,npxG,xGChain,xGBuildup
324,Lukasz Fabianski,10,706,0,0,0,0.0,0,0,0,0,GK S,West Ham,0,0,0.3157586343586445,0.3157586343586445
342,Thomas Strakosha,2,127,0,0,0,0.0,0,0,0,0,GK S,Brentford,0,0,0.0,0.0
382,Martin Dubravka,23,1983,0,0,0,0.1200264021754264,0,1,1,0,GK S,Newcastle United,0,0,3.5433348193764687,3.4233084097504616
386,Robin Olsen,5,398,0,0,0,0.0,0,0,0,0,GK S,Aston Villa,0,0,0.274754025042057,0.274754025042057
428,Wes Foderingham,30,2644,0,0,0,0.0650442615151405,0,1,2,0,GK S,Sheffield United,0,0,0.9430307522416116,0.877986490726471
430,Stefan Ortega Moreno,9,626,0,0,0,0.0,0,0,0,0,GK S,Manchester City,0,0,1.7157262079417706,1.7157262079417706
505,Daniel Bentley,5,379,0,0,0,0.0,0,0,0,0,GK S,Wolverhampton Wanderers,0,0,0.4679479077458381,0.4679479077458381
533,Djordje Petrovic,23,1982,0,0,0,0.0,0,0,2,0,GK S,Chelsea,0,0,3.312307056039572,3.312307056039572


In [102]:
import requests
import json
from pprint import pprint

base_url = 'https://fantasy.premierleague.com/api/'

url_bs = base_url + 'bootstrap-static/'
r = requests.get(url_bs).json()
pprint(r, indent=2, depth=1, compact=True)

{ 'element_stats': [...],
  'element_types': [...],
  'elements': [...],
  'events': [...],
  'game_settings': {...},
  'phases': [...],
  'teams': [...],
  'total_players': 10542514}


In [103]:
def get_gameweek_history(player_id):
    '''get all gameweek info for a given player_id'''
    
    # send GET request to
    # https://fantasy.premierleague.com/api/element-summary/{PID}/
    r = requests.get(
            base_url + 'element-summary/' + str(player_id) + '/'
    ).json()
    
    # extract 'history' data from response into dataframe
    df = pd.json_normalize(r['history'])
    
    return df


# show player #4's gameweek history
get_gameweek_history(4)[
    [
        'round',
        'total_points',
        'minutes',
        'goals_scored',
        'assists'
    ]
]

Unnamed: 0,round,total_points,minutes,goals_scored,assists
0,1,12,90,1,1
1,2,2,90,0,0
2,3,8,90,1,0
3,4,2,90,0,0
4,5,2,90,0,0
5,6,6,90,1,0
6,7,8,90,1,0


In [104]:
# create players dataframe
players = pd.json_normalize(r['elements'])

# create teams dataframe
teams = pd.json_normalize(r['teams'])

# get position information from 'element_types' field
positions = pd.json_normalize(r['element_types'])

# select columns of interest from players df
players = players[
    ['id', 'first_name', 'second_name', 'web_name', 'team',
     'element_type']
]

# join team name
players = players.merge(
    teams[['id', 'name','strength']],
    left_on='team',
    right_on='id',
    suffixes=['_player', None]
).drop(['team', 'id'], axis=1)

# join player positions
players = players.merge(
    positions[['id', 'singular_name_short']],
    left_on='element_type',
    right_on='id'
).drop(['element_type', 'id'], axis=1)

players.head()

Unnamed: 0,id_player,first_name,second_name,web_name,name,strength,singular_name_short
0,1,Fábio,Ferreira Vieira,Fábio Vieira,Arsenal,5,MID
1,2,Gabriel,Fernando de Jesus,G.Jesus,Arsenal,5,FWD
2,3,Gabriel,dos Santos Magalhães,Gabriel,Arsenal,5,DEF
3,4,Kai,Havertz,Havertz,Arsenal,5,FWD
4,5,Karl,Hein,Hein,Arsenal,5,GKP


In [105]:
teams.head()

Unnamed: 0,code,draw,form,id,loss,name,played,points,position,short_name,strength,team_division,unavailable,win,strength_overall_home,strength_overall_away,strength_attack_home,strength_attack_away,strength_defence_home,strength_defence_away,pulse_id
0,3,0,,1,0,Arsenal,0,0,0,ARS,5,,False,0,1350,1380,1370,1370,1330,1390,1
1,7,0,,2,0,Aston Villa,0,0,0,AVL,3,,False,0,1120,1245,1110,1140,1130,1350,2
2,91,0,,3,0,Bournemouth,0,0,0,BOU,3,,False,0,1100,1100,1075,1100,1130,1105,127
3,94,0,,4,0,Brentford,0,0,0,BRE,3,,False,0,1100,1100,1105,1095,1100,1110,130
4,36,0,,5,0,Brighton,0,0,0,BHA,3,,False,0,1100,1100,1100,1105,1100,1100,131


In [106]:
from tqdm.auto import tqdm
tqdm.pandas()

# get gameweek histories for each player
points = players['id_player'].progress_apply(get_gameweek_history)

# combine results into single dataframe
points = pd.concat(df for df in points)

# join web_name
points = players[['id_player', 'web_name', 'name', 'strength']].merge(
    points,
    left_on='id_player',
    right_on='element'
).rename(columns={'name':'team_name', 'strength':'team_strength'})

100%|██████████| 666/666 [00:47<00:00, 14.03it/s]


In [107]:
points

Unnamed: 0,id_player,web_name,team_name,team_strength,element,fixture,opponent_team,total_points,was_home,kickoff_time,team_h_score,team_a_score,round,minutes,goals_scored,assists,clean_sheets,goals_conceded,own_goals,penalties_saved,penalties_missed,yellow_cards,red_cards,saves,bonus,bps,influence,creativity,threat,ict_index,starts,expected_goals,expected_assists,expected_goal_involvements,expected_goals_conceded,value,transfers_balance,selected,transfers_in,transfers_out
0,1,Fábio Vieira,Arsenal,5,1,2,20,0,True,2024-08-17T14:00:00Z,2,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0,0.00,0.00,0.00,0.00,55,0,2923,0,0
1,1,Fábio Vieira,Arsenal,5,1,11,2,0,False,2024-08-24T16:30:00Z,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0,0.00,0.00,0.00,0.00,55,-790,2321,84,874
2,1,Fábio Vieira,Arsenal,5,1,21,5,0,True,2024-08-31T11:30:00Z,1,1,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0,0.00,0.00,0.00,0.00,54,-279,2397,355,634
3,1,Fábio Vieira,Arsenal,5,1,39,18,0,False,2024-09-15T13:00:00Z,0,1,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0,0.00,0.00,0.00,0.00,54,-747,1650,0,747
4,1,Fábio Vieira,Arsenal,5,1,47,13,0,False,2024-09-22T15:30:00Z,2,2,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0,0.00,0.00,0.00,0.00,54,-174,1494,0,174
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4536,655,Forbs,Wolves,3,655,41,2,0,False,2024-09-21T14:00:00Z,3,1,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0,0.00,0.00,0.00,0.00,55,79,856,192,113
4537,655,Forbs,Wolves,3,655,60,12,0,True,2024-09-28T16:30:00Z,1,2,6,38,0,0,0,1,0,0,0,1,0,0,0,-1,0.0,0.8,9.0,0.9,0,0.11,0.01,0.12,0.98,55,-92,996,89,181
4538,655,Forbs,Wolves,3,655,63,4,1,False,2024-10-05T14:00:00Z,5,3,7,67,0,0,0,4,0,0,0,1,0,0,0,14,14.2,16.7,4.0,3.5,1,0.00,0.07,0.07,2.69,55,153,1382,318,165
4539,664,Pond,Wolves,3,664,60,12,0,True,2024-09-28T16:30:00Z,1,2,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0,0.00,0.00,0.00,0.00,40,0,0,0,0


In [108]:
points = points.merge(teams[['id','name','strength']],
                      how='inner',
                      left_on='opponent_team',
                      right_on='id'
                      ).rename(columns={'name':'opponent_name','strength':'opponent_strength'})

In [115]:
points_red = points[[
    'web_name',
    'team_name',
    'team_strength',
    'opponent_name',
    'opponent_strength',
    'was_home',
    'team_h_score',
    'team_a_score',
    'round',
    'minutes',
    'goals_scored',
    'assists',
    'clean_sheets',
    'goals_conceded',
    'own_goals',
    'penalties_saved',
    'penalties_missed',
    'yellow_cards',
    'red_cards',
    'saves',
    'bonus',
    'expected_goals',
    'expected_assists',
    'expected_goal_involvements',
    'expected_goals_conceded'
]]

points_red.head()

Unnamed: 0,web_name,team_name,team_strength,opponent_name,opponent_strength,was_home,team_h_score,team_a_score,round,minutes,goals_scored,assists,clean_sheets,goals_conceded,own_goals,penalties_saved,penalties_missed,yellow_cards,red_cards,saves,bonus,expected_goals,expected_assists,expected_goal_involvements,expected_goals_conceded
0,Fábio Vieira,Arsenal,5,Wolves,3,True,2,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0
1,Fábio Vieira,Arsenal,5,Aston Villa,3,False,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0
2,Fábio Vieira,Arsenal,5,Brighton,3,True,1,1,3,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0
3,Fábio Vieira,Arsenal,5,Spurs,3,False,0,1,4,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0
4,Fábio Vieira,Arsenal,5,Man City,5,False,2,2,5,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0


In [118]:
points_red[points_red['web_name'] == 'A.Becker']

Unnamed: 0,web_name,team_name,team_strength,opponent_name,opponent_strength,was_home,team_h_score,team_a_score,round,minutes,goals_scored,assists,clean_sheets,goals_conceded,own_goals,penalties_saved,penalties_missed,yellow_cards,red_cards,saves,bonus,expected_goals,expected_assists,expected_goal_involvements,expected_goals_conceded
2532,A.Becker,Liverpool,4,Ipswich,2,False,0,2,1,90,0,0,1,0,0,0,0,0,0,2,1,0.0,0.0,0.0,0.45
2533,A.Becker,Liverpool,4,Brentford,3,True,2,0,2,90,0,0,1,0,0,0,0,0,0,2,0,0.0,0.0,0.0,0.36
2534,A.Becker,Liverpool,4,Man Utd,3,False,0,3,3,90,0,0,1,0,0,0,0,0,0,3,1,0.0,0.0,0.0,1.36
2535,A.Becker,Liverpool,4,Nott'm Forest,3,True,0,1,4,90,0,0,0,1,0,0,0,0,0,2,0,0.0,0.0,0.0,0.44
2536,A.Becker,Liverpool,4,Bournemouth,3,True,3,0,5,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0
2537,A.Becker,Liverpool,4,Wolves,3,False,1,2,6,90,0,0,0,1,0,0,0,0,0,2,0,0.0,0.0,0.0,0.63
2538,A.Becker,Liverpool,4,Crystal Palace,3,False,0,1,7,78,0,0,1,0,0,0,0,0,0,4,3,0.0,0.0,0.0,0.5


In [120]:
current_round = points_red['round'].max()

points_l5 = points_red[points_red['round'] > current_round - 5]

cols_to_drop = ['opponent_name','opponent_strength','round']

points_l5.drop(columns=cols_to_drop, inplace=True)

for col in points_l5:
    if col in ['web_name','team_name','team_strength','was_home']:
        continue
    elif col in ['expected_goals','expected_assists','expected_goal_involvements','expected_goals_conceded']:
        points_l5[col] = points_l5[col].astype(float)
    else:
        points_l5[col] = points_l5[col].astype(int)

points_l5_agg = points_l5.groupby(by=['web_name',
                                      'team_name',
                                      'team_strength',
                                      'was_home'],
                                    as_index=False).sum()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  points_l5.drop(columns=cols_to_drop, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  points_l5[col] = points_l5[col].astype(int)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  points_l5[col] = points_l5[col].astype(int)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead


Unnamed: 0,web_name,team_name,team_strength,was_home,team_h_score,team_a_score,minutes,goals_scored,assists,clean_sheets,goals_conceded,own_goals,penalties_saved,penalties_missed,yellow_cards,red_cards,saves,bonus,expected_goals,expected_assists,expected_goal_involvements,expected_goals_conceded
0,A.Becker,Liverpool,4,False,1,6,258,0,0,2,1,0,0,0,0,0,9,4,0.0,0.0,0.0,2.49
1,A.Becker,Liverpool,4,True,3,1,90,0,0,0,1,0,0,0,0,0,2,0,0.0,0.0,0.0,0.44
2,A.Doucoure,Everton,3,False,4,3,90,0,0,0,1,0,0,0,0,0,0,0,0.02,0.01,0.03,0.66
3,A.Doucoure,Everton,3,True,4,4,187,0,0,1,4,0,0,0,0,0,0,0,0.0,0.05,0.05,4.54
4,A.Fatawu,Leicester,2,False,6,4,19,0,0,0,2,0,0,0,0,0,0,0,0.01,0.0,0.01,2.07


In [121]:
points_l5_agg.head()

Unnamed: 0,web_name,team_name,team_strength,was_home,team_h_score,team_a_score,minutes,goals_scored,assists,clean_sheets,goals_conceded,own_goals,penalties_saved,penalties_missed,yellow_cards,red_cards,saves,bonus,expected_goals,expected_assists,expected_goal_involvements,expected_goals_conceded
0,A.Becker,Liverpool,4,False,1,6,258,0,0,2,1,0,0,0,0,0,9,4,0.0,0.0,0.0,2.49
1,A.Becker,Liverpool,4,True,3,1,90,0,0,0,1,0,0,0,0,0,2,0,0.0,0.0,0.0,0.44
2,A.Doucoure,Everton,3,False,4,3,90,0,0,0,1,0,0,0,0,0,0,0,0.02,0.01,0.03,0.66
3,A.Doucoure,Everton,3,True,4,4,187,0,0,1,4,0,0,0,0,0,0,0,0.0,0.05,0.05,4.54
4,A.Fatawu,Leicester,2,False,6,4,19,0,0,0,2,0,0,0,0,0,0,0,0.01,0.0,0.01,2.07


In [122]:
points_l5_agg.sort_values(by='expected_goals', ascending=False).head()

Unnamed: 0,web_name,team_name,team_strength,was_home,team_h_score,team_a_score,minutes,goals_scored,assists,clean_sheets,goals_conceded,own_goals,penalties_saved,penalties_missed,yellow_cards,red_cards,saves,bonus,expected_goals,expected_assists,expected_goal_involvements,expected_goals_conceded
512,Havertz,Arsenal,5,True,8,4,270,3,0,0,4,0,0,0,0,0,0,4,3.43,0.13,3.56,2.8
885,N.Jackson,Chelsea,3,True,6,4,248,1,1,0,4,0,0,0,1,0,0,1,3.21,0.09,3.3,2.0
966,Palmer,Chelsea,3,True,6,4,270,4,2,0,4,0,0,0,2,0,0,7,2.91,1.42,4.33,2.4
496,Haaland,Man City,5,True,7,5,270,3,0,0,5,0,0,0,0,0,0,6,2.16,0.12,2.28,4.21
495,Haaland,Man City,5,False,2,4,180,3,0,0,2,0,0,0,0,0,0,3,2.16,0.21,2.37,2.3
