# FM 24 Moneyball

In [1]:
import numpy as np
import pandas as pd 
import matplotlib.pyplot as plt
import plotly.express as px
import re

# Import created functions
import sys
sys.path.append(r'C:\Users\kevmi\Documents\FM 24 Moneyball\FM-24-Moneyball')

from money_ball_util import *

# Ignore warnings
import warnings
warnings.filterwarnings('ignore')

# Show all columns in pandas
pd.set_option('display.max_columns', 500) 

%matplotlib inline

## Ingest and clean data

In [2]:
### Read raw data

# Select file path for raw data
path = r"C:\Users\kevmi\Documents\FM 24 Moneyball\FM Data Lab\Man Utd\FM 24 exports\Summer 2024\Defensive Midfielders.html"

# Ingest in a dataframe
df_players_raw = pd.read_html(path, encoding='utf-8')[0]

# Format column names 
df_players_raw.columns = [x.lower().strip().replace(' ', '_').replace('-', '_') for x in df_players_raw.columns]

# ---------------------------------------------------------------------------------------------------------------------

### Read processed Data lab data

# Select file path for processed data
path = r"C:\Users\kevmi\Documents\FM 24 Moneyball\FM Data Lab\Man Utd\FM Data Lab outputs\Summer 2024\Defensive Midfielders SL.csv"

# Ingest in a dataframe 
df_players = pd.read_csv(path)   

# Format column names 
df_players.columns = [x.lower().strip().replace(' ', '_').replace('-', '_') for x in df_players.columns]

# Bring ID from raw data
df_players = df_players.merge(df_players_raw[['name', 'age', 'height', 'weight', 'uid']], 
                 left_on=['name', 'age', 'height', 'weight'], 
                 right_on=['name', 'age', 'height', 'weight']).reset_index().rename(columns={'level_0': 'id'})

df_players['id'] = df_players['uid']

df_players.drop(columns='uid', inplace=True)

## Data cleaning

In [3]:
df_players.head()

Unnamed: 0,id,name,position,age,height,weight,inf,club,division,nationality,home_grown,personality,media_handling,wage,transfer_value,asking_price,preferred_foot,starts,minutes_played,average_rating,sub_appearances,minutes/game,index,entries,leaguemultiplier,passes_attempted/90,passes_completed/90,pass_completion_%,progressive_passes/90,progressive_pass_rate,key_passes/90,key_pass_%,open_play_key_passes/90,open_play_key_pass_%,chances_created/90,clear_cut_chances_created/90,clear_cut_chances_%,pressures_attempted/90,pressures_completed/90,pressure_success_%,possession_won/90,possession_lost/90,poss+_/90,poss+__%,tackles_attempted/90,tackles_completed/90,tackle_completion_%,tackles_failed/90,key_tackles/90,key_tackles_%,tackle_quality,interceptions/90,blocks/90,shots_blocked/90,headers_attempted/90,headers_won/90,headers_won_%,headers_lost/90,headers_lost_%,key_headers/90,key_headers_%,aerial_challenges_attempted/90,duels_win_%,fouls_committed/90,clearances/90,distance_covered/90,mistakes_leading_to_goal,sprints/90,attacking_actions/90,creative_actions/90,defensive_actions/90,excitement_factor/90,general_performance,defensive_defender,creative_defender,attacking_defender,creative_midfielder,attacking_midfielder,creative_winger,attacking_winger,creative_forward,attacking_forward,finisher,aerial_threat,reader,assister
0,37071145,Mats Wieffer,"D (C), DM, M (C)",24,188 cm,80 kg,-,Feyenoord,Eredivisie,NED,-,Fairly Professional,Level-headed,"£16,500 p/w",£71M - £82M,-,Right,45,3457,7.21,0,76.82,1,1,False,64.41,57.48,89,6.38,9.9,1.67,2.59,1.56,2.43,0.39,0.39,100,10.21,2.71,26.53,9.35,9.55,-0.2,-2.09,2.47,2.0,81,0.47,0.05,2.6,3.52,1.77,0.23,0.1,5.73,3.41,59.55,2.32,40.45,0.47,13.74,5.73,66.03,0.7,0.29,13.73,0,14.4,3.42,2.37,13.87,7.03,88,12,71,88,82,82,82,88,82,88,88,88,53,82
1,2000208439,Gabriel Moscardo,DM,18,185 cm,73 kg,-,COR,Campeonato Brasileiro Série A Assaí,BRA,-,Fairly Determined,Level-headed,"£6,500 p/w",£15.5M - £19M,-,Right,13,1189,7.14,8,56.62,2,1,False,73.2,67.22,92,6.21,8.48,1.36,1.86,1.36,1.86,0.45,0.45,100,9.46,2.42,25.6,8.4,7.27,1.13,15.54,3.1,2.2,71,0.91,0.08,3.45,6.83,1.59,0.45,0.15,3.94,2.5,63.46,1.44,36.54,0.53,21.21,3.94,66.67,0.45,0.83,13.45,0,9.16,2.15,2.27,14.19,4.24,65,59,53,65,65,65,59,59,65,59,82,65,71,53
2,91187556,Leandro Barreiro,"DM, M (C)",24,174 cm,65 kg,Int,Mainz 05,Bundesliga,LUX (POR),-,Spirited,Evasive,"£20,500 p/w",Not for Sale,-,Right,31,2627,6.75,0,84.74,3,1,False,44.88,39.88,89,3.46,7.71,0.62,1.37,0.62,1.37,0.21,0.21,100,11.85,2.57,21.68,9.66,7.43,2.23,30.01,3.87,2.67,69,1.2,0.1,3.85,2.05,2.19,0.41,0.27,3.46,1.16,33.66,2.3,66.34,0.1,8.82,3.46,52.34,1.88,1.27,13.9,1,15.79,1.22,0.86,11.76,2.91,6,35,12,0,6,6,12,12,12,6,18,29,29,6
3,16147660,Xaver Schlager,"DM, M (C)",26,174 cm,76 kg,Int,RB Leipzig,Bundesliga,AUT,-,Resolute,Level-headed,"£78,000 p/w",£20M,£26.5M,Left,37,3060,6.88,0,82.7,4,1,False,49.91,45.53,91,6.38,12.79,1.79,3.59,1.26,2.53,0.47,0.47,100,9.97,2.82,28.32,7.59,6.91,0.68,9.84,3.18,2.29,72,0.88,0.03,1.28,2.3,1.53,0.12,0.03,2.47,0.91,36.9,1.56,63.1,0.06,6.45,2.47,56.77,1.38,0.56,13.9,0,15.32,1.88,2.09,8.73,4.32,59,0,65,53,53,47,65,47,47,47,29,6,6,59
4,77062603,Jesús Castillo,"DM, M (C)",22,185 cm,80 kg,-,Gil Vicente,Liga Portugal Betclic,PER,-,Balanced,Media-friendly,"£1,800 p/w",£140K - £1.4M,-,Right Only,30,2512,6.96,4,73.88,6,1,False,54.32,48.91,90,5.52,10.16,1.15,2.11,1.11,2.04,0.29,0.29,100,10.03,2.94,29.29,9.92,7.24,2.68,37.02,2.79,1.93,69,0.86,0.04,1.85,2.79,2.11,0.57,0.36,4.59,2.58,56.25,2.01,43.75,0.75,29.17,4.59,61.17,1.0,1.11,13.4,1,11.21,2.06,1.68,16.9,4.37,41,41,47,59,41,53,53,41,53,41,47,82,100,65


In [4]:
# # Keep only natural DMs
# path = 'data/Natural DMs.html'

# # Ingest in a dataframe
# df_dms = pd.read_html(path, encoding='utf-8')[0]

# # Format column names 
# df_dms.columns = [x.lower().strip().replace(' ', '_').replace('-', '_') for x in df_dms.columns]

# df_dms['natural_dm'] = 'Yes'

# df_dms = df_dms[['uid', 'natural_dm']]

# df_players = df_players.merge(df_dms, 
#                               left_on='id',
#                               right_on='uid',
#                               how='left')

# df_players = df_players[df_players['natural_dm'] == 'Yes']

# Get price estimation values
df_players['price_estimate_M'] = df_players['transfer_value'].apply(parse_transfer_value) / 1000000

## Passing analysis

### Full universe

In [5]:
# Initialize list of targets
targets = df_players['id']

scatter_plot(df_players,
             x_metric='passes_attempted/90', 
             y_metric='progressive_pass_rate',
             z_metric='pass_completion_%',
             title='Passing quality', 
             show_name_label=True
)

In [None]:
# Filter data 
mask = (
        # (df_players['passes_attempted/90'] >= 45) & \
        # (df_players['progressive_pass_rate'] >= 7) & \
        # (df_players['pass_completion_%'] >= 85) & \
        # (df_players['age'] <= 26) & \
        (df_players['id'].isin(targets))
        
        
)

df_targets = df_players[mask]

scatter_plot(df_targets,
             x_metric='passes_attempted/90', 
             y_metric='progressive_pass_rate',
             z_metric='pass_completion_%',
             title='Passing quality', 
             show_name_label=True
)

# Keep only players that fulfill criteria
targets = list(df_targets['id'])

## Possession Analysis

In [None]:
# Full universe 
scatter_plot(df_targets,
             x_metric='possession_lost/90', 
             y_metric='possession_won/90',
             z_metric='poss+_/90',
             title='Possesion quality', 
             show_name_label=True
)

In [None]:
# Filter data 
mask = (
        (df_players['possession_lost/90'] <= 8) & \
        (df_players['possession_won/90'] >= 7.5) & \
        (df_players['poss+_/90'] >= 0) & \
        (df_players['age'] <= 26) & \
        (df_players['id'].isin(targets))
        
        
)

df_targets = df_players[mask]

scatter_plot(df_targets,
             x_metric='possession_lost/90', 
             y_metric='possession_won/90',
             z_metric='poss+_/90',
             title='Possesion quality', 
             show_name_label=False
)

# Keep only players that fulfill criteria
targets = list(df_targets['id'])

## Aggresion vs Control
Identify aggresive and foul prones targets

In [None]:
# Full universe 
scatter_plot(df_targets,
             x_metric='tackles_completed/90', 
             y_metric='fouls_committed/90',
             z_metric='tackle_completion_%',
             title='Aggresion - Control', 
             show_name_label=True
)

In [None]:
# Compute tackes/fouls ratio
df_players['tackles_comp-fouls/90'] = df_players['tackles_completed/90'] / df_players['fouls_committed/90']
# Filter data 
mask = (
        # (df_players['tackles_comp-fouls/90'] <= 2) & \
        # (df_players['tackles_completed/90'] >= 1.5) & \
        # (df_players['tackle_completion_%'] >= 65) & \
        #(df_players['price_estimate_M'] <= 40) & \
        (df_players['id'].isin(targets))
        
        
)

df_targets = df_players[mask]

scatter_plot(df_targets,
             x_metric='tackles_completed/90', 
             y_metric='fouls_committed/90',
             z_metric='tackle_completion_%',
             title='Aggresion - Control', 
             show_name_label=False
)

# Keep only players that fulfill criteria
targets = df_targets['id']

## Pressure quality

In [None]:
# Full universe 
scatter_plot(df_targets,
             x_metric='pressures_completed/90', 
             y_metric='pressure_success_%',
             z_metric='duels_win_%',
             title='Pressure quality', 
             show_name_label=True
)

In [None]:
# Filter data 
mask = (
        (df_players['pressures_completed/90'] >= 2.0) & \
        (df_players['pressure_success_%'] >= 20) & \
        (df_players['duels_win_%'] >= 50) & \
        (df_players['id'].isin(targets)) | \
        (df_players['name'].str.contains('Barreiro')) 
        
        
)

df_targets = df_players[mask]

scatter_plot(df_targets,
             x_metric='pressures_completed/90', 
             y_metric='pressure_success_%',
             z_metric='duels_win_%',
             title='Pressure quality', 
             show_name_label=True
)

# Keep only players that fulfill criteria
targets = df_targets['id']

## Presence

In [None]:
# Full universe 
scatter_plot(df_targets,
             x_metric='interceptions/90', 
             y_metric='blocks/90',
             z_metric='clearances/90',
             title='Presence', 
             show_name_label=True
) 

In [None]:
df_players[df_players['name'] == 'Casemiro']

In [None]:
# Filter data 
mask = (
        (df_players['interceptions/90'] >= 1.4) & \
        (df_players['blocks/90'] >= 0.25) & \
        (df_players['clearances/90'] >= 0.5) & \
        (df_players['id'].isin(targets))
        
        
)

df_targets = df_players[mask]

scatter_plot(df_targets,
             x_metric='interceptions/90', 
             y_metric='blocks/90',
             z_metric='clearances/90',
             title='Presence', 
             show_name_label=True
)

#df_targets = df_players[mask]

## Distance

In [None]:
# Full universe 
scatter_plot(df_targets,
             x_metric='distance_covered/90', 
             y_metric='defensive_actions/90',
             z_metric='creative_actions/90',
             title='Distance', 
             show_name_label=True
) 

In [None]:
df_targets.groupby('division')['id'].count()

In [None]:
df_targets.groupby('division').get_group('Sky Bet Championship').sort_values('age')