## Date of results 2022-03-19

In [1]:
import requests
import json
import pandas as pd
import time
import datetime
from bs4 import BeautifulSoup
import numpy as np

SLEEPING_TIME = 3


In [2]:
# this is used to get the best attacking players and the defending aptitude of each team
base_url = 'https://understat.com/league/EPL/'
url = base_url

res = requests.get(url)
soup = BeautifulSoup(res.content, 'html.parser')
scripts = soup.find_all('script')


In [3]:
"""
    params:
        - best_players_df: the best attacking players overall this season
        - scripts[3]
"""
matches = scripts[3]
strings = matches.string
start = strings.index("('")+2
end = strings.index("')")
json_data = strings[start:end]
json_data = json_data.encode('utf8').decode('unicode_escape')

data = json.loads(json_data)

columns = data[0].keys()
dc = {c:[] for c in columns}

for player in data:
    for c in columns:
        dc[c].append(player[c])
best_players_df = pd.DataFrame(dc)
best_players_df.head(3)

Unnamed: 0,id,player_name,games,time,goals,xG,assists,xA,shots,key_passes,yellow_cards,red_cards,position,team_title,npg,npxG,xGChain,xGBuildup
0,1250,Mohamed Salah,27,2265,20,21.47552203387022,10,7.872370472177863,115,51,1,0,F S,Liverpool,15,16.908509023487568,27.58283654600382,9.217943489551544
1,6854,Diogo Jota,26,1777,13,13.961289312690496,1,4.069446869194508,65,28,2,0,F S,Liverpool,13,13.961289312690496,20.360280767083168,5.880793909542263
2,647,Harry Kane,27,2329,12,13.987340405583382,3,6.052424713969231,91,29,3,0,F M S,Tottenham,10,12.465002793818712,21.49537392705679,6.383346496149898


## Best Attacking Players Overall

based on attacking rating 

In [4]:
best_players_df['xG'] = best_players_df['xG'].apply(lambda x: float(x))
best_players_df['xA'] = best_players_df['xA'].apply(lambda x: float(x))
best_players_df['goals'] = best_players_df['goals'].apply(lambda x: int(x))
best_players_df['assists'] = best_players_df['assists'].apply(lambda x: int(x))
best_players_df['shots'] = best_players_df['shots'].apply(lambda x: int(x))
best_players_df['attacking_rating'] = best_players_df.apply(lambda x: x['xG'] *1.2 + x['xA'], axis = 1)

columns = ['id', 'player_name', 'position', 'team_title', 'games', 'time', 'attacking_rating', 'xG', 'xA', 'goals', 'assists']
best_players_df[columns].sort_values(by='attacking_rating', ascending=False, ignore_index=True).head(25)

Unnamed: 0,id,player_name,position,team_title,games,time,attacking_rating,xG,xA,goals,assists
0,1250,Mohamed Salah,F S,Liverpool,27,2265,33.642997,21.475522,7.87237,20,10
1,647,Harry Kane,F M S,Tottenham,27,2329,22.837233,13.98734,6.052425,12,3
2,838,Sadio Mané,F S,Liverpool,26,2232,22.30169,14.840229,4.493415,12,1
3,6854,Diogo Jota,F S,Liverpool,26,1777,20.822994,13.961289,4.069447,13,1
4,2371,Cristiano Ronaldo,F S,Manchester United,24,1928,20.751624,14.229021,3.676798,12,3
5,1776,Jarrod Bowen,F M S,West Ham,28,2377,17.743991,10.571114,5.058655,8,8
6,531,Michail Antonio,F S,West Ham,28,2440,17.510238,10.042948,5.4587,8,7
7,618,Raheem Sterling,F M S,Manchester City,23,1561,17.15021,11.372314,3.503434,10,1
8,453,Son Heung-Min,F M,Tottenham,25,2181,17.089177,9.601111,5.567844,11,5
9,1228,Bruno Fernandes,F M S,Manchester United,27,2326,16.645349,7.061856,8.171122,9,6


## Choose the most suitable attacking players by looking at the worst defending teams

Get the teams that are facing opponents with weak defence in the next `NUMBER_OF_FUTURE_MATCHES`. Get the best players of the team by looking at their xGoals + xAssists in the past `NUMBER_OF_MATCHES` games.

Penalise the team if it concedes a goal at home. Every xgoal at home is multiplied by 1.3.
Boost the team if it makes a goal away. Every xgoal away is multiplied by 1.3.


In [5]:
"""
    params: 
        - matches_df: contains xGA for each team
"""
matches = scripts[2]
strings = matches.string
start = strings.index("('")+2
end = strings.index("')")
json_data = strings[start:end]
json_data = json_data.encode('utf8').decode('unicode_escape')

data = json.loads(json_data)
columns = ['h_a', 'xG', 'xGA', 'npxG', 'npxGA', 'scored','missed', 'xpts', 'result']

dc = {}
dc['team'] = []
for c in columns:
    dc[c] = []

team_list = []
for team_id in data:
    team_name = data[team_id]['title']
    team_list.append(team_name)
    for match in data[team_id]['history']:
        dc['team'].append(team_name)
        for c in columns:
            dc[c].append(match[c]) 
            
matches_df = pd.DataFrame(dc)


In [6]:
## number of games to look the xgoals against for each team
NUMBER_OF_GAMES = 8
print(f'Looking back at {NUMBER_OF_GAMES} games.')
matches_df['matches_home'] = matches_df.apply(lambda row: 1 if row['h_a'] == 'h' else 0, axis=1)
matches_df['matches_away'] = matches_df.apply(lambda row: 1 if row['h_a'] == 'a' else 0, axis=1)
matches_df['attack_home'] = matches_df.apply(lambda row: row['xG'] if row['h_a'] == 'h' else 0, axis=1)
matches_df['attack_away'] = matches_df.apply(lambda row: row['xG'] if row['h_a'] == 'a' else 0, axis=1)
matches_df['defence_home'] = matches_df.apply(lambda row: row['xGA'] if row['h_a'] == 'h' else 0, axis=1)
matches_df['defence_away'] = matches_df.apply(lambda row: row['xGA'] if row['h_a'] == 'a' else 0, axis=1)
defence_df = matches_df.groupby('team').tail(NUMBER_OF_GAMES).groupby('team', as_index=False).apply(
        lambda row: pd.Series(
            {
                'xG':sum(row.xG)
                , 'xGA': sum(row.xGA)
                , 'xpts': sum(row.xpts)
                , 'attack_home':sum(row.attack_home)
                , 'attack_away':sum(row.attack_away)
                , 'defence_home': sum(row.defence_home)
                , 'defence_away': sum(row.defence_away)
                , 'matches_home': sum(row.matches_home)
                , 'matches_away': sum(row.matches_away)
            }
        )
    )# .sort_values('defending_rating', ascending=False, ignore_index=True)[:]
defence_df['avg_home_att'] = defence_df['attack_home'] / defence_df['matches_home']
defence_df['avg_away_att'] = defence_df['attack_away'] / defence_df['matches_away']
defence_df['avg_home_def'] = defence_df['defence_home'] / defence_df['matches_home']
defence_df['avg_away_def'] = defence_df['defence_away'] / defence_df['matches_away']
defence_df['defending_rating'] = defence_df['avg_home_def'] * 1.3 + defence_df['avg_away_def']
defence_df['attacking_rating'] = defence_df['avg_away_att'] * 1.3 + defence_df['avg_home_att']
defence_df = defence_df.sort_values(by='defending_rating', ascending=False, ignore_index=True)
defence_df

Looking back at 8 games.


Unnamed: 0,team,xG,xGA,xpts,attack_home,attack_away,defence_home,defence_away,matches_home,matches_away,avg_home_att,avg_away_att,avg_home_def,avg_away_def,defending_rating,attacking_rating
0,Norwich,5.813548,20.69353,2.9264,2.392229,3.421319,10.0268,10.66673,4.0,4.0,0.598057,0.85533,2.5067,2.666682,5.925393,1.709986
1,Leeds,12.301163,17.270409,9.901,6.952028,5.349135,7.75462,9.515789,4.0,4.0,1.738007,1.337284,1.938655,2.378947,4.899199,3.476476
2,Leicester,9.866283,15.514985,8.9284,5.426159,4.440124,8.333448,7.181537,4.0,4.0,1.35654,1.110031,2.083362,1.795384,4.503755,2.79958
3,Everton,6.932686,13.165918,7.621,5.700906,1.23178,4.507298,8.65862,5.0,3.0,1.140181,0.410593,0.90146,2.886207,4.058104,1.673953
4,Brighton,8.657683,13.231417,7.8775,2.744003,5.91368,6.659008,6.572409,4.0,4.0,0.686001,1.47842,1.664752,1.643102,3.80728,2.607947
5,Wolverhampton Wanderers,8.558515,12.775281,8.4166,3.974762,4.583753,7.407796,5.367485,4.0,4.0,0.993691,1.145938,1.851949,1.341871,3.749405,2.48341
6,Southampton,13.614456,11.94866,12.0995,9.87467,3.739786,4.35994,7.58872,5.0,3.0,1.974934,1.246595,0.871988,2.529573,3.663158,3.595508
7,Burnley,7.28319,11.868665,7.2576,5.380663,1.902527,8.565125,3.30354,5.0,3.0,1.076133,0.634176,1.713025,1.10118,3.328113,1.900561
8,Tottenham,14.663802,11.640443,12.3788,5.823847,8.839955,3.811453,7.82899,3.0,5.0,1.941282,1.767991,1.270484,1.565798,3.217428,4.239671
9,West Ham,9.636466,10.836966,10.1518,7.315081,2.321385,5.482816,5.35415,5.0,3.0,1.463016,0.773795,1.096563,1.784717,3.210249,2.46895


## Get player data from team page & future matches for each team

To restrict the search space we just consider players that had at least xTotal = xgoals + xassists the whole season

In [7]:
"""
    params: 
        - team_data:
        - df:
        - scripts[1]: contains a list of matches of a team (future and past for the whole season)
            - isResult: if the match was done
            - 
        - scripts[3]:
        - xTotal
"""
team_rosters = {}
teams = ['Aston Villa', 'Everton', 'Southampton', 'Leicester', 'Crystal Palace', 'Norwich', 'Chelsea', 'West Ham', 'Tottenham', 'Arsenal', 'Newcastle United', 'Liverpool', 'Manchester City', 'Manchester United', 'Watford', 'Burnley', 'Brighton', 'Wolverhampton Wanderers', 'Brentford', 'Leeds']
teams.sort()

# Number of future matches for which we want to see the defence of the next teams
NUMBER_OF_FUTURE_MATCHES = 6
# Threshold of x goals + x assists that a player needs to have the whole season to be listed our shortlist
xTotal = 7
# we need this variable to compare the current datetime to decide if a match is next or not
now = datetime.datetime.now()

future_matches_dc = {
    'team':[], 
    'teams_against':[], 
    'home_or_away':[], 
    'when': [], 
    'defence_of_team_against':[], 
    'attack_of_team_against':[],
    'defending_rating':[],
    'attacking_rating':[],
}

df = pd.DataFrame()

for team in teams:
    team_name_url = team.replace(' ', '%20')
    base_url = f'https://understat.com/team/{team_name_url}/2021'
    url = base_url
    print(url)

    res = requests.get(url)
    soup = BeautifulSoup(res.content, 'html.parser')
    scripts = soup.find_all('script')
    
    script_data = scripts[1]
    strings = script_data.string
    start = strings.index("('")+2
    end = strings.index("')")
    json_data = strings[start:end]
    json_data = json_data.encode('utf8').decode('unicode_escape')
    data = json.loads(json_data)
    
    total_matches = 0
    for match in data:
        date_diff = (now - datetime.datetime.strptime(match['datetime'], '%Y-%m-%d %H:%M:%S')).days
        if date_diff < 0 and not match['isResult']:
            if match['h']['title'] == team:            
                team_against = match['a']['title']
                home_or_away = 'Home'
            else:
                team_against = match['h']['title']
                home_or_away = 'Away'
            # print(f"Team {team} is playing {home_or_away} against {team_against} at {match['datetime']}")

            future_matches_dc['team'].append(team)
            future_matches_dc['teams_against'].append(team_against)
            future_matches_dc['home_or_away'].append(home_or_away)
            future_matches_dc['when'].append(match['datetime'])

            row_against = defence_df[defence_df['team'] == team_against]
            row_team = defence_df[defence_df['team'] == team]

            if home_or_away == 'Home':
                future_matches_dc['defence_of_team_against'].append(row_against['avg_away_def'].iloc[0])
                future_matches_dc['attack_of_team_against'].append(row_against['avg_away_att'].iloc[0])
                future_matches_dc['attacking_rating'].append(row_team['avg_home_att'].iloc[0])
                future_matches_dc['defending_rating'].append(row_team['avg_home_def'].iloc[0])
            else:
                future_matches_dc['defence_of_team_against'].append(row_against['avg_home_def'].iloc[0])
                future_matches_dc['attack_of_team_against'].append(row_against['avg_home_att'].iloc[0])
                future_matches_dc['attacking_rating'].append(row_team['avg_away_att'].iloc[0])
                future_matches_dc['defending_rating'].append(row_team['avg_away_def'].iloc[0])

            total_matches += 1
            if total_matches >= NUMBER_OF_FUTURE_MATCHES:
                break

    script_data = scripts[3]
    strings = script_data.string
    start = strings.index("('")+2
    end = strings.index("')")
    json_data = strings[start:end]
    json_data = json_data.encode('utf8').decode('unicode_escape')
    data = json.loads(json_data)

    columns = data[0].keys()
    dc = {c:[] for c in columns}

    for player in data:
        for c in columns:
            dc[c].append(player[c])

    team_data = pd.DataFrame(dc)
    team_rosters[team] = team_data
    team_data['team'] = team
    team_data['xG'] = team_data['xG'].astype(float)
    team_data['xA'] = team_data['xA'].astype(float)
    team_data['xTotal'] = team_data['xG'] + team_data['xA']
    df = df.append(team_data.loc[team_data['xTotal'] > 4, ['id', 'player_name', 'team', 'xG', 'xA', 'xTotal', 'goals', 'assists']])
    time.sleep(SLEEPING_TIME)
    
future_matches_df = pd.DataFrame(future_matches_dc)
df = df.reset_index(drop=True)
print(f'Total players to receive: {df.shape[0]}')

https://understat.com/team/Arsenal/2021
https://understat.com/team/Aston%20Villa/2021
https://understat.com/team/Brentford/2021
https://understat.com/team/Brighton/2021
https://understat.com/team/Burnley/2021
https://understat.com/team/Chelsea/2021
https://understat.com/team/Crystal%20Palace/2021
https://understat.com/team/Everton/2021
https://understat.com/team/Leeds/2021
https://understat.com/team/Leicester/2021
https://understat.com/team/Liverpool/2021
https://understat.com/team/Manchester%20City/2021
https://understat.com/team/Manchester%20United/2021
https://understat.com/team/Newcastle%20United/2021
https://understat.com/team/Norwich/2021
https://understat.com/team/Southampton/2021
https://understat.com/team/Tottenham/2021
https://understat.com/team/Watford/2021
https://understat.com/team/West%20Ham/2021
https://understat.com/team/Wolverhampton%20Wanderers/2021
Total players to receive: 118


## Show Teams that have future matches against teams with bad defence

In [8]:
future_matches_df = pd.DataFrame(future_matches_dc)
future_matches_df = future_matches_df.groupby('team', as_index=False).apply(
        lambda row: pd.Series(
            {
                'teams_against': list(row.teams_against),
                'home_or_away': list(row.home_or_away),
                'when': list(row.when),
                'sum_of_defence_against': sum(row.defence_of_team_against),
                'sum_of_attack_against': sum(row.attack_of_team_against),
                'defending_rating': sum(row.defending_rating),
                'attacking_rating': sum(row.attacking_rating)
            }
        )
    ).sort_values(by='sum_of_defence_against', ignore_index=True, ascending=False)

future_matches_df

Unnamed: 0,team,teams_against,home_or_away,when,sum_of_defence_against,sum_of_attack_against,defending_rating,attacking_rating
0,Aston Villa,"[Wolverhampton Wanderers, Tottenham, Liverpool...","[Away, Home, Home, Away, Home, Away]","[2022-04-02 03:00:00, 2022-04-09 03:00:00, 202...",10.868452,7.853214,7.318295,7.331911
1,Brighton,"[Norwich, Arsenal, Tottenham, Southampton, Wol...","[Home, Away, Away, Home, Away, Home]","[2022-04-02 03:00:00, 2022-04-09 03:00:00, 202...",10.865619,8.410392,9.923563,6.493262
2,Liverpool,"[Manchester United, Watford, Manchester City, ...","[Home, Home, Away, Away, Home, Away]","[2022-03-20 16:30:00, 2022-04-02 03:00:00, 202...",9.613033,7.897357,4.983811,15.463148
3,Watford,"[Everton, Liverpool, Leeds, Brentford, Manches...","[Home, Away, Home, Home, Away, Home]","[2022-03-20 14:00:00, 2022-04-02 03:00:00, 202...",9.325858,8.82702,7.698208,6.485191
4,Tottenham,"[West Ham, Newcastle United, Aston Villa, Brig...","[Home, Home, Away, Home, Away, Home]","[2022-03-20 16:30:00, 2022-04-02 03:00:00, 202...",9.084292,7.208275,8.213533,11.301111
5,Newcastle United,"[Crystal Palace, Tottenham, Wolverhampton Wand...","[Home, Away, Home, Home, Away, Home]","[2022-03-20 14:00:00, 2022-04-02 03:00:00, 202...",9.077627,7.86955,5.873041,8.394603
6,Arsenal,"[Crystal Palace, Brighton, Southampton, Manche...","[Away, Home, Away, Home, Away, Home]","[2022-04-02 03:00:00, 2022-04-09 03:00:00, 202...",8.973681,8.795991,4.625733,8.917099
7,Manchester City,"[Burnley, Liverpool, Wolverhampton Wanderers, ...","[Away, Home, Away, Home, Away, Home]","[2022-04-02 03:00:00, 2022-04-09 03:00:00, 202...",8.906025,7.489024,4.532242,13.497197
8,Brentford,"[Leicester, Chelsea, West Ham, Watford, Totten...","[Away, Away, Home, Away, Home, Away]","[2022-03-20 14:00:00, 2022-04-02 03:00:00, 202...",8.278622,8.434866,8.046345,7.275662
9,Chelsea,"[Brentford, Southampton, Leeds, West Ham, Ever...","[Home, Away, Away, Home, Away, Home]","[2022-04-02 03:00:00, 2022-04-09 03:00:00, 202...",8.264781,7.880412,4.816768,7.538485


## fetch player data

In [9]:
## number of matches to look the form
NUMBER_OF_MATCHES = 5

player_data_all = pd.DataFrame()
for i in range(len(df)):
    player_id = df.at[i, 'id']
    player_name = df.at[i, 'player_name']
    team = df.at[i, 'team']
    print(player_id, player_name, team)
    
    base_url = f'https://understat.com/player/{player_id}'
    url = base_url
    print(url)

    res = requests.get(url)
    soup = BeautifulSoup(res.content, 'html.parser')
    scripts = soup.find_all('script')
    script_data = scripts[4]
    strings = script_data.string
    start = strings.index("('")+2
    end = strings.index("')")
    json_data = strings[start:end]
    json_data = json_data.encode('utf8').decode('unicode_escape')
    data = json.loads(json_data)

    # get data for every player for the past 5 matches
    
    columns = ['time', 'xG', 'xA', 'goals', 'assists', 'key_passes', 'shots', 'h_team', 'a_team', 'date']
    dc = {c:[] for c in columns}

    for match in data[:NUMBER_OF_MATCHES]:
        for c in columns:
            dc[c].append(match[c])
    player_data = pd.DataFrame(dc)
    player_data['player_id'] = player_id
    player_data['player_name'] = player_name
    player_data['team'] = team
    player_data['NUMBER_OF_MATCHES'] = NUMBER_OF_MATCHES
    player_data['time'] = player_data['time'].astype(float)
    player_data['xG'] = player_data['xG'].astype(float)
    player_data['xA'] = player_data['xA'].astype(float)
    player_data['goals'] = player_data['goals'].astype(float)
    player_data['assists'] = player_data['assists'].astype(float)
    player_data['key_passes'] = player_data['key_passes'].astype(float)
    player_data['shots'] = player_data['shots'].astype(float)
    player_data['team_played_against'] =  player_data.apply(lambda row: row['h_team'] if row['h_team'] != row['team'] else row['a_team'], axis=1)
    
    player_data_all = player_data_all.append(player_data)
    time.sleep(SLEEPING_TIME)


7230 Emile Smith-Rowe Arsenal
https://understat.com/player/7230
7322 Bukayo Saka Arsenal
https://understat.com/player/7322
2517 Martin Odegaard Arsenal
https://understat.com/player/2517
7752 Gabriel Martinelli Arsenal
https://understat.com/player/7752
318 Pierre-Emerick Aubameyang Arsenal
https://understat.com/player/318
3277 Alexandre Lacazette Arsenal
https://understat.com/player/3277
8865 Ollie Watkins Aston Villa
https://understat.com/player/8865
8941 Jacob Ramsey Aston Villa
https://understat.com/player/8941
986 Danny Ings Aston Villa
https://understat.com/player/986
488 Philippe Coutinho Aston Villa
https://understat.com/player/488
7723 John McGinn Aston Villa
https://understat.com/player/7723
8864 Matthew Cash Aston Villa
https://understat.com/player/8864
2203 Emiliano Buendía Aston Villa
https://understat.com/player/2203
998 Ivan Toney Brentford
https://understat.com/player/998
6552 Bryan Mbeumo Brentford
https://understat.com/player/6552
1078 Sergi Canos Brentford
https://unde

In [10]:
agg = player_data_all.groupby(['player_id', 'player_name', 'team', 'NUMBER_OF_MATCHES'], as_index=False).apply(
        lambda row: pd.Series(
            {
                'total_minutes_played':sum(row.time),
                'avg_minutes_played': np.mean(row.time),
                'xG':sum(row.xG),
                'xA':sum(row.xA),
                'key_passes': sum(row.key_passes),
                'shots':sum(row.shots),
                'goals':sum(row.goals),
                'assists':sum(row.assists),
                'team_played_against':list(row.team_played_against),
                'date_played': list(row.date),
                'oldest_match': min(row.date),
                'latest_match': max(row.date)
            }
        )
    )

agg['latest_match'] = agg['latest_match'].apply(lambda x: datetime.datetime.strptime(x, '%Y-%m-%d'))
today = datetime.datetime.today()
agg['days_since_last_match'] = agg['latest_match'].apply(lambda x: (today - x).days)
agg['xTotal'] = agg['xG'] + agg['xA']
agg['date_played'] = agg['date_played'].apply(lambda x: sorted(x, reverse=True))
agg = agg.sort_values(by='xTotal', ascending=False)
agg.head(20)


Unnamed: 0,player_id,player_name,team,NUMBER_OF_MATCHES,total_minutes_played,avg_minutes_played,xG,xA,key_passes,shots,goals,assists,team_played_against,date_played,oldest_match,latest_match,days_since_last_match,xTotal
3,1250,Mohamed Salah,Liverpool,5,361.0,72.2,5.180809,1.344369,9.0,30.0,4.0,1.0,"[Arsenal, Brighton, West Ham, Leeds, Norwich]","[2022-03-16, 2022-03-12, 2022-03-05, 2022-02-2...",2022-02-19,2022-03-16,3,6.525177
14,2371,Cristiano Ronaldo,Manchester United,5,445.0,89.0,3.720493,1.915097,7.0,21.0,4.0,0.0,"[Tottenham, Watford, Leeds, Brighton, Southamp...","[2022-03-12, 2022-02-26, 2022-02-20, 2022-02-1...",2022-02-12,2022-03-12,7,5.63559
117,998,Ivan Toney,Brentford,5,385.0,77.0,3.841804,1.060781,7.0,13.0,7.0,0.0,"[Burnley, Norwich, Newcastle United, Wolverham...","[2022-03-12, 2022-03-05, 2022-02-26, 2022-01-2...",2022-01-19,2022-03-12,7,4.902586
60,647,Harry Kane,Tottenham,5,450.0,90.0,3.105973,1.767844,9.0,17.0,5.0,1.0,"[Brighton, Manchester United, Everton, Leeds, ...","[2022-03-16, 2022-03-12, 2022-03-07, 2022-02-2...",2022-02-23,2022-03-16,3,4.873816
2,1228,Bruno Fernandes,Manchester United,5,450.0,90.0,2.594943,1.878678,12.0,16.0,2.0,1.0,"[Manchester City, Watford, Leeds, Brighton, So...","[2022-03-06, 2022-02-26, 2022-02-20, 2022-02-1...",2022-02-12,2022-03-06,13,4.473622
83,7700,Che Adams,Southampton,5,444.0,88.8,3.051829,1.077315,5.0,11.0,1.0,2.0,"[Watford, Newcastle United, Aston Villa, Norwi...","[2022-03-13, 2022-03-10, 2022-03-05, 2022-02-2...",2022-02-19,2022-03-13,6,4.129145
56,618,Raheem Sterling,Manchester City,5,391.0,78.2,3.516729,0.396984,5.0,10.0,3.0,0.0,"[Everton, Tottenham, Norwich, Brentford, South...","[2022-02-26, 2022-02-19, 2022-02-12, 2022-02-0...",2022-01-22,2022-02-26,21,3.913713
90,8026,Raphinha,Leeds,5,450.0,90.0,2.087959,1.762472,13.0,11.0,0.0,1.0,"[Norwich, Aston Villa, Leicester, Tottenham, L...","[2022-03-13, 2022-03-10, 2022-03-05, 2022-02-2...",2022-02-23,2022-03-13,6,3.850431
97,838,Sadio Mané,Liverpool,5,450.0,90.0,3.417831,0.41374,7.0,17.0,4.0,0.0,"[Arsenal, Brighton, West Ham, Leeds, Norwich]","[2022-03-16, 2022-03-12, 2022-03-05, 2022-02-2...",2022-02-19,2022-03-16,3,3.831572
21,318,Pierre-Emerick Aubameyang,Arsenal,5,349.0,69.8,3.24126,0.277235,2.0,12.0,4.0,0.0,"[Barcelona, Elche, Barcelona, Valencia, Espanyol]","[2022-03-13, 2022-03-06, 2022-02-27, 2022-02-2...",2022-02-13,2022-03-13,6,3.518495


## Final Decision Making

In [11]:
future_matches_df.sort_values(by='sum_of_attack_against')

Unnamed: 0,team,teams_against,home_or_away,when,sum_of_defence_against,sum_of_attack_against,defending_rating,attacking_rating
11,Norwich,"[Brighton, Burnley, Manchester United, Newcast...","[Away, Home, Away, Home, Away, Home]","[2022-04-02 03:00:00, 2022-04-09 03:00:00, 202...",8.088371,6.703817,15.520148,4.360161
12,West Ham,"[Tottenham, Everton, Brentford, Burnley, Chels...","[Away, Home, Away, Home, Away, Home]","[2022-03-20 16:30:00, 2022-04-02 03:00:00, 202...",7.965726,6.776802,8.64384,6.710434
4,Tottenham,"[West Ham, Newcastle United, Aston Villa, Brig...","[Home, Home, Away, Home, Away, Home]","[2022-03-20 16:30:00, 2022-04-02 03:00:00, 202...",9.084292,7.208275,8.213533,11.301111
7,Manchester City,"[Burnley, Liverpool, Wolverhampton Wanderers, ...","[Away, Home, Away, Home, Away, Home]","[2022-04-02 03:00:00, 2022-04-09 03:00:00, 202...",8.906025,7.489024,4.532242,13.497197
15,Southampton,"[Leeds, Chelsea, Arsenal, Brighton, Crystal Pa...","[Away, Home, Home, Away, Home, Away]","[2022-04-02 03:00:00, 2022-04-09 03:00:00, 202...",7.51652,7.663737,10.204684,9.664588
0,Aston Villa,"[Wolverhampton Wanderers, Tottenham, Liverpool...","[Away, Home, Home, Away, Home, Away]","[2022-04-02 03:00:00, 2022-04-09 03:00:00, 202...",10.868452,7.853214,7.318295,7.331911
5,Newcastle United,"[Crystal Palace, Tottenham, Wolverhampton Wand...","[Home, Away, Home, Home, Away, Home]","[2022-03-20 14:00:00, 2022-04-02 03:00:00, 202...",9.077627,7.86955,5.873041,8.394603
9,Chelsea,"[Brentford, Southampton, Leeds, West Ham, Ever...","[Home, Away, Away, Home, Away, Home]","[2022-04-02 03:00:00, 2022-04-09 03:00:00, 202...",8.264781,7.880412,4.816768,7.538485
2,Liverpool,"[Manchester United, Watford, Manchester City, ...","[Home, Home, Away, Away, Home, Away]","[2022-03-20 16:30:00, 2022-04-02 03:00:00, 202...",9.613033,7.897357,4.983811,15.463148
14,Burnley,"[Manchester City, Norwich, West Ham, Wolverham...","[Home, Away, Away, Home, Away, Home]","[2022-04-02 03:00:00, 2022-04-09 03:00:00, 202...",7.538608,7.923425,8.442615,5.130925


Get the best 3 players for each of the 5 teams that play against with the worst combined defences. Exclude players that have not played in the past 15 days

In [12]:
PLAYERS_TO_SHOW = 3
TEAMS_SELECTED = 20
DAYS_NOT_PLAYED = 15

selected_teams = [team for team in future_matches_df[:TEAMS_SELECTED].team]
selected_players_for_squad = agg.loc[(agg['days_since_last_match'] < DAYS_NOT_PLAYED) & (agg['team'].isin(selected_teams))].groupby('team').tail(PLAYERS_TO_SHOW).sort_values(by='xTotal', ascending=False).drop(['team_played_against', 'date_played'],axis=1).reset_index(drop=True)
daten = selected_players_for_squad.merge(future_matches_df[['team', 'sum_of_defence_against', 'attacking_rating']])#.drop(['NUMBER_OF_MATCHES', 'total_minutes_played', 'player_id', 'key_passes', 'shots'], axis = 1)
daten[['player_name', 'team', 'xTotal', 'xG', 'xA', 'goals', 'assists', 'sum_of_defence_against', 'attacking_rating', 'oldest_match', 'latest_match' ,'avg_minutes_played']].sort_values(by='xTotal', ascending=False)

Unnamed: 0,player_name,team,xTotal,xG,xA,goals,assists,sum_of_defence_against,attacking_rating,oldest_match,latest_match,avg_minutes_played
0,Teemu Pukki,Norwich,2.820347,2.187719,0.632628,2.0,1.0,8.088371,4.360161,2022-02-19,2022-03-13,90.0
1,Sergio Reguilón,Tottenham,2.52793,1.347763,1.180167,1.0,0.0,9.084292,11.301111,2022-01-19,2022-03-16,77.2
4,Jack Grealish,Manchester City,2.408249,0.814827,1.593422,0.0,0.0,8.906025,13.497197,2022-01-15,2022-03-14,76.2
7,Paul Pogba,Manchester United,2.379605,0.21788,2.161725,0.0,2.0,8.256241,11.365244,2022-02-15,2022-03-12,67.4
5,Riyad Mahrez,Manchester City,2.256371,2.147341,0.10903,3.0,0.0,8.906025,13.497197,2022-02-12,2022-03-14,59.2
2,Matt Doherty,Tottenham,2.117993,0.98779,1.130204,1.0,3.0,9.084292,11.301111,2022-02-19,2022-03-16,71.4
10,Martin Odegaard,Arsenal,1.968677,0.707808,1.260869,1.0,0.0,8.973681,8.917099,2022-02-24,2022-03-19,85.2
13,Ryan Fraser,Newcastle United,1.902598,0.996764,0.905834,1.0,2.0,9.077627,8.394603,2022-02-26,2022-03-17,69.4
16,Jack Harrison,Leeds,1.835557,0.86444,0.971117,1.0,0.0,7.229477,9.225872,2022-02-26,2022-03-18,63.0
17,Rodrigo,Leeds,1.820321,1.326784,0.493538,2.0,0.0,7.229477,9.225872,2022-02-26,2022-03-18,61.2
