# FM 24 Moneyball

In [21]:
import numpy as np
import pandas as pd 
import matplotlib.pyplot as plt

# Ignore warnings
import warnings
warnings.filterwarnings('ignore')

# Show all columns in pandas
pd.set_option('display.max_columns', 500) 

%matplotlib inline

## Ingest and clean data

In [59]:
### Read raw data

# Select file path for raw data
path = r'data\Man Utd Save\Winter 2023\Players Search - Winter 2023.html'

# Ingest in a dataframe
df_players_raw = pd.read_html(path, encoding='utf-8')[0]

# Format column names 
df_players_raw.columns = [x.lower().strip().replace(' ', '_').replace('-', '_') for x in df_players_raw.columns]

# ---------------------------------------------------------------------------------------------------------------------

### Read processed Data lab data

# Select file path for processed data
path = r'data\Man Utd Save\Winter 2023\Players Search - Winter 2023.csv'

# Ingest in a dataframe 
df_players = pd.read_csv(path)

# Format column names 
df_players.columns = [x.lower().strip().replace(' ', '_').replace('-', '_') for x in df_players.columns]

# Bring ID from raw data
df_players = df_players.merge(df_players_raw[['name', 'age', 'height', 'weight', 'uid']], 
                 left_on=['name', 'age', 'height', 'weight'], 
                 right_on=['name', 'age', 'height', 'weight']).reset_index().rename(columns={'level_0': 'id'})

df_players['id'] = df_players['uid']

df_players.drop(columns='uid', inplace=True)

In [62]:
df_players.head(10)

Unnamed: 0,id,name,position,age,height,weight,inf,club,division,nationality,home_grown,personality,media_handling,wage,transfer_value,asking_price,preferred_foot,starts,minutes_played,average_rating,sub_appearances,minutes/game,index,goals,goals/90,minutes/goal,xg,xg/90,xg/shot,xg_overperformance,xg_overperformance/90,non_pen_goals,non_pen_goals/90,non_pen_goals/shot,minutes/non_pen_goal,non_pen_xg,non_pen_xg/90,non_pen_goals___non_pens_xg_/90,non_pen_xg/shot,non_pen_xg_overperformance,non_pen_xg_overperformance/90,goals_outside_box,goals_outside_box/90,assists,assists/90,minutes/assist,xa,xa/90,xa_overperformance,xa_overperformance/90,assists/clear_cut_chances_created,goal_contributions,goal_contributions/90,xgc,xgc/90,xgc_overperformance,xgc_overperformance/90,non_pen_goal_contributions,non_pen_goal_contributions/90,non_pen_xgc,non_pen_xgc/90,non_pen_xgc_overperformance,non_pen_xgc_overperformance/90,conversion_%,shots,shots/90,shots_on_target,shots_on_target/90,shots_on_target_%,shots_outside_box/90,passes_attempted,passes_attempted/90,passes_completed,passes_completed/90,pass_completion_%,progressive_passes/90,progressive_passes,progressive_pass_rate,key_passes,key_passes/90,key_pass_%,open_play_key_passes,open_play_key_passes/90,open_play_key_pass_%,crosses_attempted,crosses_attempted/90,crosses_completed,crosses_completed/90,crosses_completed_%,open_play_crosses_attempted,open_play_crosses_attempted/90,open_play_crosses_completed,open_play_crosses_completed/90,open_play_cross_completion_%,chances_created,chances_created/90,clear_cut_chances_created,clear_cut_chances_created/90,pressures_attempted,pressures_attempted/90,pressures_completed,pressures_completed/90,pressure_success_%,possession_won/90,possession_lost/90,poss+_/90,poss+__%,dribbles/90,dribbles,penalties_taken,penalties_scored,pens_scored_%,tackles_attempted,tackles_attempted/90,tackles_completed,tackles_completed/90,tackles_failed,tackle_completion_%,tackles_failed/90,key_tackles,key_tackles/90,tackle_quality,interceptions,interceptions/90,blocks,blocks/90,shots_blocked,shots_blocked/90,headers_attempted,headers_attempted/90,headers_won,headers_won/90,headers_won_%,headers_lost,headers_lost/90,headers_lost_%,key_headers,key_headers/90,aerial_challenges_attempted/90,duels_win_%,fouls_against,fouls_made,net_fouls,fouls_won/90,fouls_committed/90,clearances,clearances/90,offsides,offsides/90,offside/non_pen_goals,offside/non_pen_xg,distance_covered/90,distance_covered,total_saves,saves/90,save_%,xsave_%,xsave_%_overperformance,saves_held,saves_held/90,saves_held_ratio,saves_held/shots_faced_ratio,saves_tipped,saves_tipped/90,saves_tipped_ratio,saves_tipped/shots_faced_ratio,saves_parried,saves_parried/90,saves_parried_ratio,saves_parried/shots_faced_ratio,saves/goal_conceaded,save_efficiency,shots_on_target_against,shots_on_target_against/90,xgp,xgp/90,penalties_faced,penalties_saved,pens_saved_%,goals_conceded,conceded/90,clean_sheets,clean_sheet_ratio,red_cards,yellow_cards,yellows/tackle,reds/tackle,yellows/90,reds/90,player_of_the_match,mistakes_leading_to_goal,sprints/90,attacking_actions/90,creative_actions/90,defensive_actions/90,goalkeeping_actions/90,excitement_factor/90,general_performance,goalkeeping,defensive_defender,creative_defender,attacking_defender,creative_midfielder,attacking_midfielder,creative_winger,attacking_winger,creative_forward,attacking_forward,finisher,aerial_threat,reader,assister
0,93070286,Alexander Isak,ST (C),24,190 cm,74 kg,-,Newcastle,Premier League,SWE (ERI),-,Fairly Ambitious,Media-friendly,"$150,000 p/w",$90M - $131M,-,Right,19,1602,6.84,2,76.29,0,7,0.39,228.86,6.52,0.37,0.22,0.48,0.03,6,0.34,0.2,267.0,5.76,0.32,0.02,0.19,0.24,0.01,1,0.06,3,0.17,534.0,2.09,0.12,0.91,0.05,0.0,10,0.56,8.61,0.48,1.39,0.08,9,0.51,7.85,0.44,1.15,0.06,23,30,1.69,17,0.96,57,0.39,317,17.81,262,14.72,83,1.12,20,6.31,10,0.56,3.15,10,0.56,3.15,20,1.12,1,0.06,5,20,1.12,1,0.06,5.0,8,0.43,4,0.22,119,6.69,39,2.19,32.77,3.82,6.74,-2.92,-43.32,1.35,24,1,1,100,22,1.24,15,0.84,7,68,0.39,0,0.0,0.73,21,1.18,2,0.11,0,0.0,143,8.03,52,2.92,36.36,91,5.11,63.64,2,0.11,8.03,40.61,24,30,6,1.35,1.69,5,0.28,27,1.52,4.5,4.69,12.56,223.6,0,0.0,0.0,0,0.0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0,0,0,0,0.0,0,0.0,0,1,0.05,0.0,0.06,0.0,1,0,7.02,2.81,1.01,7.75,0.0,4.94,57,0,6,23,68,70,86,59,77,78,83,90,64,18,64
1,27132909,Mileta Rajović,ST (C),24,189 cm,80 kg,ESC,Watford,Sky Bet Championship,MNE (SRB),-,-,Scouting Required,"$18,750 p/w",$3.7M - $7.6M,-,Right,7,927,7.06,15,42.14,1,11,1.07,84.27,5.82,0.57,0.22,5.18,0.5,11,1.07,0.41,84.27,5.82,0.57,0.5,0.22,5.18,0.5,0,0.0,0,0.0,0.0,0.94,0.09,-0.94,-0.09,0.0,11,1.07,6.76,0.66,4.24,0.41,11,1.07,6.76,0.66,4.24,0.41,41,27,2.62,17,1.65,63,0.68,242,23.5,212,20.58,88,0.78,8,3.31,6,0.58,2.48,6,0.58,2.48,7,0.68,0,0.0,0,7,0.68,0,0.0,0.0,2,0.23,2,0.19,78,7.57,18,1.75,23.08,4.08,8.16,-4.08,-50.0,1.26,13,0,0,0,20,1.94,14,1.36,6,70,0.58,0,0.0,1.25,8,0.78,0,0.0,0,0.0,117,11.36,54,5.24,46.15,63,6.12,53.85,0,0.0,11.36,49.64,14,16,2,1.36,1.55,3,0.29,12,1.17,1.09,2.06,13.04,134.3,0,0.0,0.0,0,0.0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0,0,0,0,0.0,0,0.0,0,1,0.05,0.0,0.1,0.0,1,1,7.28,3.92,0.78,8.37,0.0,5.34,37,0,16,14,39,68,85,52,65,68,73,100,60,17,28
2,27160181,Rasmus Højlund,ST (C),20,191 cm,79 kg,-,Man Utd,Premier League,DEN,-,Spirited,"Evasive, Reserved","$100,000 p/w",$119M - $138M,-,Left,14,1223,6.68,8,55.59,2,1,0.07,1223.0,3.14,0.23,0.14,-2.14,-0.16,1,0.07,0.05,1223.0,3.14,0.23,-0.16,0.14,-2.14,-0.16,0,0.0,1,0.07,1223.0,0.58,0.04,0.42,0.03,0.0,2,0.15,3.72,0.27,-1.72,-0.13,2,0.15,3.72,0.27,-1.72,-0.13,5,22,1.62,15,1.1,68,0.29,278,20.46,241,17.74,87,0.81,11,3.96,6,0.44,2.16,6,0.44,2.16,6,0.44,0,0.0,0,6,0.44,0,0.0,0.0,3,0.19,2,0.15,66,4.86,15,1.1,22.73,3.61,6.11,-2.5,-40.92,1.4,19,0,0,0,18,1.32,15,1.1,3,83,0.22,0,0.0,1.29,10,0.74,1,0.07,0,0.0,102,7.51,50,3.68,49.02,52,3.83,50.98,4,0.29,7.51,54.17,12,14,2,0.88,1.03,6,0.44,11,0.81,11.0,3.5,12.55,170.6,0,0.0,0.0,0,0.0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0,0,0,0,0.0,0,0.0,0,0,0.0,0.0,0.0,0.0,0,0,7.43,2.52,0.66,8.52,0.0,4.05,24,0,22,12,33,37,56,29,42,30,50,64,76,19,37
3,53143395,Jørgen Strand Larsen,ST (C),23,193 cm,79 kg,-,Celta,LaLiga EA Sports,NOR,-,Ambitious,Media-friendly,"$20,500 p/w",$34M - $41M,-,Right,18,1535,7.07,0,85.28,3,12,0.7,127.92,8.05,0.47,0.23,3.95,0.23,12,0.7,0.34,127.92,6.53,0.38,0.32,0.19,5.47,0.32,2,0.12,2,0.12,767.5,0.98,0.06,1.02,0.06,0.0,14,0.82,9.03,0.53,4.97,0.29,14,0.82,7.51,0.44,6.49,0.38,34,35,2.05,22,1.29,63,0.47,383,22.46,315,18.47,82,0.88,15,3.92,8,0.47,2.09,8,0.47,2.09,6,0.35,0,0.0,0,6,0.35,0,0.0,0.0,4,0.23,4,0.23,102,5.98,17,1.0,16.67,3.87,6.86,-2.99,-43.59,1.35,23,2,0,0,22,1.29,14,0.82,8,64,0.47,0,0.0,1.83,24,1.41,3,0.18,2,0.12,189,11.08,63,3.69,33.33,126,7.39,66.67,2,0.12,11.08,36.49,27,12,-15,1.58,0.7,2,0.12,7,0.41,0.58,1.07,12.63,215.4,0,0.0,0.0,0,0.0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0,0,0,0,0.0,0,0.0,0,0,0.0,0.0,0.0,0.0,3,1,6.68,3.42,0.82,7.43,0.0,4.46,52,0,8,15,65,61,80,48,70,73,77,99,69,19,52
4,19069586,João Pedro Galvão,ST (C),31,184 cm,75 kg,-,GRE,Campeonato Brasileiro Série A Assaí,ITA (BRA),-,-,Scouting Required,"$64,000 p/w",$600K - $5.8M,-,Right,33,2336,6.8,5,61.47,4,9,0.35,259.56,8.62,0.33,0.14,0.38,0.01,8,0.31,0.13,292.0,7.86,0.3,0.01,0.13,0.14,0.01,2,0.08,5,0.19,467.2,3.03,0.12,1.97,0.08,0.0,14,0.54,11.65,0.45,2.35,0.09,13,0.5,10.89,0.42,2.11,0.08,15,60,2.31,34,1.31,57,0.73,585,22.54,500,19.26,85,0.73,19,3.25,35,1.35,5.98,16,0.62,2.74,12,0.46,0,0.0,0,12,0.46,0,0.0,0.0,3,0.13,3,0.12,177,6.82,34,1.31,19.21,1.96,14.33,-12.37,-86.32,0.89,23,1,1,100,30,1.16,22,0.85,8,73,0.31,0,0.0,1.03,28,1.08,5,0.19,0,0.0,254,9.79,115,4.43,45.28,139,5.36,54.72,1,0.04,9.79,48.24,52,29,-23,2.0,1.12,14,0.54,15,0.58,1.88,1.91,11.69,303.3,0,0.0,0.0,0,0.0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0,0,0,0,0.0,0,0.0,0,4,0.13,0.0,0.15,0.0,2,0,6.2,2.64,0.92,7.79,0.0,4.39,81,0,15,30,76,84,89,56,81,78,90,88,58,9,67
5,19348862,Kaio Jorge,ST (C),21,182 cm,72 kg,nEU,Frosinone,Serie A TIM,BRA,-,-,Scouting Required,"$31,500 p/w",$5.6M - $9M,-,Right,14,1099,6.46,2,68.69,5,1,0.08,1099.0,3.79,0.31,0.24,-2.79,-0.23,1,0.08,0.06,1099.0,3.03,0.25,-0.17,0.19,-2.03,-0.17,0,0.0,1,0.08,1099.0,0.51,0.04,0.49,0.04,0.0,2,0.16,4.3,0.35,-2.3,-0.19,2,0.16,3.54,0.29,-1.54,-0.13,6,16,1.31,7,0.57,44,0.49,212,17.36,179,14.66,84,0.66,8,3.77,6,0.49,2.83,6,0.49,2.83,5,0.41,0,0.0,0,5,0.41,0,0.0,0.0,12,1.0,1,0.08,121,9.91,34,2.78,28.1,3.52,7.21,-3.69,-51.18,0.9,11,1,0,0,24,1.97,14,1.15,10,58,0.82,0,0.0,1.41,9,0.74,1,0.08,1,0.08,122,9.99,19,1.56,15.57,103,8.43,84.43,0,0.0,9.99,22.6,19,17,-2,1.56,1.39,6,0.49,11,0.9,11.0,3.63,12.71,155.2,0,0.0,0.0,0,0.0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0,0,0,0,0.0,0,0.0,0,2,0.08,0.0,0.16,0.0,0,0,7.12,1.54,0.66,6.57,0.0,3.19,21,0,1,10,32,30,37,22,34,30,40,56,41,9,41
6,11022221,Vedat Muriqi,ST (C),29,194 cm,92 kg,-,Mallorca,LaLiga EA Sports,KOS (ALB),-,-,Scouting Required,"$82,000 p/w",$24M - $30M,-,Left,19,1341,6.88,0,70.58,6,7,0.47,191.57,5.39,0.36,0.2,1.61,0.11,5,0.34,0.19,268.2,3.87,0.26,0.08,0.14,1.13,0.08,0,0.0,0,0.0,0.0,0.49,0.03,-0.49,-0.03,0.0,7,0.47,5.88,0.39,1.12,0.08,5,0.34,4.36,0.29,0.64,0.04,26,27,1.81,14,0.94,52,0.13,415,27.85,329,22.08,79,0.4,6,1.45,6,0.4,1.45,6,0.4,1.45,1,0.07,0,0.0,0,1,0.07,0,0.0,0.0,2,0.13,2,0.13,139,9.33,36,2.42,25.9,6.04,9.19,-3.15,-34.28,0.74,11,2,2,100,37,2.48,22,1.48,15,59,1.01,0,0.0,2.64,16,1.07,3,0.2,0,0.0,217,14.56,117,7.85,53.92,100,6.71,46.08,1,0.07,14.56,54.72,31,14,-17,2.08,0.94,5,0.34,5,0.34,1.0,1.29,13.05,194.5,0,0.0,0.0,0,0.0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0,0,0,0,0.0,0,0.0,0,0,0.0,0.0,0.0,0.0,1,0,9.53,2.0,0.54,12.27,0.0,3.15,21,0,13,6,30,37,51,23,41,46,53,93,70,38,14
7,28018529,Matt Godden,ST (C),32,181 cm,77 kg,Lst,Coventry,Sky Bet Championship,ENG,Trained in nation (15-21),-,Scouting Required,"$8,750 p/w",$275K,$275K,Right,17,1435,6.55,6,62.39,7,3,0.19,478.33,6.49,0.41,0.22,-3.49,-0.22,1,0.06,0.03,1435.0,4.21,0.26,-0.2,0.14,-3.21,-0.2,0,0.0,0,0.0,0.0,1.04,0.07,-1.04,-0.07,0.0,3,0.19,7.53,0.47,-4.53,-0.28,1,0.06,5.25,0.33,-4.25,-0.27,10,30,1.88,14,0.88,47,0.56,388,24.33,327,20.51,84,0.5,8,2.06,14,0.88,3.61,14,0.88,3.61,16,1.0,2,0.13,12,16,1.0,2,0.13,12.5,2,0.13,2,0.13,173,10.85,42,2.63,24.28,4.26,7.34,-3.08,-41.96,1.0,16,3,2,67,25,1.57,19,1.19,6,76,0.38,0,0.0,0.96,9,0.56,3,0.19,2,0.13,192,12.04,44,2.76,22.92,148,9.28,77.08,2,0.13,12.04,29.03,16,26,10,1.0,1.63,13,0.82,22,1.38,22.0,5.23,12.68,202.1,0,0.0,0.0,0,0.0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0,0,0,0,0.0,0,0.0,0,0,0.0,0.0,0.0,0.0,0,0,8.15,1.97,1.13,8.63,0.0,4.89,17,0,16,12,34,42,48,31,45,40,54,65,67,19,25
8,63031763,Róbert Boženík,ST (C),24,188 cm,81 kg,-,Boavista,Liga Portugal Betclic,SVK,-,-,Scouting Required,"$3,300 p/w",Not for Sale,-,Right,16,1413,6.82,1,83.12,8,6,0.38,235.5,4.47,0.28,0.14,1.53,0.1,6,0.38,0.18,235.5,4.47,0.28,0.1,0.14,1.53,0.1,0,0.0,1,0.06,1413.0,0.45,0.03,0.55,0.04,0.0,7,0.45,4.92,0.31,2.08,0.13,7,0.45,4.92,0.31,2.08,0.13,18,33,2.1,17,1.08,52,0.51,347,22.1,285,18.15,82,0.19,3,0.86,6,0.38,1.73,6,0.38,1.73,7,0.45,1,0.06,14,7,0.45,1,0.06,14.29,0,0.0,0,0.0,93,5.92,21,1.34,22.58,4.84,6.31,-1.47,-23.3,0.7,11,0,0,0,24,1.53,12,0.76,12,50,0.76,0,0.0,2.18,20,1.27,3,0.19,1,0.06,213,13.57,87,5.54,40.85,126,8.03,59.15,0,0.0,13.57,41.77,20,11,-9,1.27,0.7,5,0.32,8,0.51,1.33,1.79,12.28,192.8,0,0.0,0.0,0,0.0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0,0,0,0,0.0,0,0.0,0,0,0.0,0.0,0.0,0.0,0,0,5.86,2.14,0.51,8.38,0.0,3.63,29,0,10,10,47,39,55,25,42,42,56,94,61,22,30
9,2000241121,João Paulo,ST (C),20,182 cm,73 kg,-,GOI,Campeonato Brasileiro Série A Assaí,BRA,-,-,Scouting Required,$650 p/w,$150K - $475K,-,Right,28,2133,6.76,8,59.25,9,8,0.34,266.63,7.78,0.33,0.12,0.22,0.01,8,0.34,0.12,266.63,7.78,0.33,0.01,0.12,0.22,0.01,0,0.0,2,0.08,1066.5,1.72,0.07,0.28,0.01,0.0,10,0.42,9.5,0.4,0.5,0.02,10,0.42,9.5,0.4,0.5,0.02,12,65,2.74,30,1.27,46,1.1,526,22.19,451,19.03,86,0.76,18,3.42,16,0.68,3.04,16,0.68,3.04,10,0.42,3,0.13,30,10,0.42,3,0.13,30.0,9,0.37,3,0.13,157,6.62,37,1.56,23.57,1.94,16.12,-14.18,-87.97,1.31,31,0,0,0,29,1.22,20,0.84,9,69,0.38,0,0.0,1.12,23,0.97,4,0.17,0,0.0,258,10.89,61,2.57,23.64,197,8.31,76.36,2,0.08,10.89,28.22,44,26,-18,1.86,1.1,13,0.55,14,0.59,1.75,1.8,11.1,263.0,0,0.0,0.0,0,0.0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0,0,0,0,0.0,0,0.0,0,0,0.0,0.0,0.0,0.0,0,0,4.89,2.96,1.01,6.76,0.0,5.27,49,0,10,30,74,62,70,53,58,55,66,87,60,6,44


In [63]:
df_players['position'].unique()

array(['ST (C)', 'AM (C), ST (C)', 'AM (C)', 'AM (LC), ST (C)', 'AM (LC)',
       'AM (L), ST (C)', 'AM (L)', 'AM (RC), ST (C)', 'AM (RC)',
       'AM (RLC), ST (C)', 'AM (RLC)', 'AM (RL), ST (C)', 'AM (RL)',
       'AM (R), ST (C)', 'AM (R)', 'M/AM (C), ST (C)', 'M/AM (C)',
       'M/AM (LC), ST (C)', 'M/AM (LC)', 'M/AM (L), ST (C)', 'M/AM (L)',
       'M/AM (RC)', 'M/AM (RLC)', 'M/AM (RL), ST (C)', 'M/AM (RL)',
       'M/AM (R), ST (C)', 'M/AM (R)', 'M (C), AM (LC), ST (C)',
       'M (C), AM (LC)', 'M (C), AM (RC), ST (C)', 'M (C), AM (RC)',
       'M (C), AM (RLC), ST (C)', 'M (C), AM (RLC)', 'M (C), AM (RL)',
       'M (C), AM (R)', 'M (C)', 'M (LC), AM (C)', 'M (LC), AM (L)',
       'M (LC), AM (RLC), ST (C)', 'M (LC), AM (RLC)', 'M (LC)',
       'M (L), AM (LC), ST (C)', 'M (L), AM (LC)',
       'M (L), AM (RLC), ST (C)', 'M (L), AM (RLC)',
       'M (L), AM (RL), ST (C)', 'M (L), AM (RL)',
       'M (RC), AM (C), ST (C)', 'M (RC), AM (C)', 'M (RC), AM (RLC)',
       'M (RC)', '