# FM 24 Moneyball

In [None]:
import numpy as np
import pandas as pd 
import matplotlib.pyplot as plt
import plotly.express as px
import re

# Ignore warnings
import warnings
warnings.filterwarnings('ignore')

# Show all columns in pandas
pd.set_option('display.max_columns', 500) 

%matplotlib inline

## Ingest and clean data

In [2]:
### Read raw data

# Select file path for raw data
path = r"C:\Users\kevmi\Documents\FM 24 Moneyball\FM Data Lab\Man Utd\FM 24 exports\Winter 2023\Center Backs.html"

# Ingest in a dataframe
df_players_raw = pd.read_html(path, encoding='utf-8')[0]

# Format column names 
df_players_raw.columns = [x.lower().strip().replace(' ', '_').replace('-', '_') for x in df_players_raw.columns]

# ---------------------------------------------------------------------------------------------------------------------

### Read processed Data lab data

# Select file path for processed data
path = r"C:\Users\kevmi\Documents\FM 24 Moneyball\FM Data Lab\Man Utd\FM Data Lab outputs\Winter 2023\Center Backs.csv"

# Ingest in a dataframe 
df_players = pd.read_csv(path)   

# Format column names 
df_players.columns = [x.lower().strip().replace(' ', '_').replace('-', '_') for x in df_players.columns]

# Bring ID from raw data
df_players = df_players.merge(df_players_raw[['name', 'age', 'height', 'weight', 'uid']], 
                 left_on=['name', 'age', 'height', 'weight'], 
                 right_on=['name', 'age', 'height', 'weight']).reset_index().rename(columns={'level_0': 'id'})

df_players['id'] = df_players['uid']

df_players.drop(columns='uid', inplace=True)

## Data cleaning

In [3]:
df_players.head()

Unnamed: 0,id,name,position,age,height,weight,inf,club,division,nationality,home_grown,personality,media_handling,wage,transfer_value,asking_price,preferred_foot,starts,minutes_played,average_rating,sub_appearances,minutes/game,index,passes_attempted/90,passes_completed/90,pass_completion_%,progressive_passes/90,progressive_pass_rate,key_passes/90,key_pass_%,open_play_key_passes/90,open_play_key_pass_%,tackles_attempted/90,tackles_completed/90,tackle_completion_%,tackles_failed/90,key_tackles/90,tackle_quality,interceptions/90,blocks/90,shots_blocked/90,headers_attempted/90,headers_won/90,headers_won_%,headers_lost/90,headers_lost_%,key_headers/90,aerial_challenges_attempted/90,duels_win_%,fouls_won/90,fouls_committed/90,clearances/90,attacking_actions/90,creative_actions/90,defensive_actions/90,excitement_factor/90,general_performance,goalkeeping,defensive_defender,creative_defender,attacking_defender,creative_midfielder,attacking_midfielder,creative_winger,attacking_winger,creative_forward,attacking_forward,finisher,aerial_threat,reader,assister
0,168288,Robin Pröpper,D (C),30,190 cm,80 kg,-,FC Twente,Eredivisie,NED,-,-,Scouting Required,"$2,700 p/w",$850K - $8.4M,-,Right,17,1575,6.69,1,87.5,0,56.17,47.2,84,4.69,8.34,0.23,0.41,0.23,0.41,0.86,0.57,67,0.29,0.17,1.5,1.09,0.23,0.23,7.54,4.57,60.61,2.97,39.39,1.03,7.54,61.22,0.0,0.57,1.71,0.11,0.29,17.69,0.74,4,100,1,3,11,6,4,5,7,12,9,15,73,13,16
1,211232,Djéné Dakonam,"D (RC), DM",31,178 cm,71 kg,-,Getafe,LaLiga EA Sports,TOG (BEN),-,-,Scouting Required,"$20,500 p/w",$150K - $10.5M,-,Right,17,1496,6.56,0,88.0,1,43.38,38.08,88,4.03,9.29,0.48,1.11,0.48,1.11,3.73,3.07,82,0.66,0.24,2.07,1.93,0.78,0.42,3.79,2.35,61.9,1.44,38.1,0.24,3.79,72.0,0.24,1.8,1.56,0.59,0.78,16.0,2.29,44,100,73,52,44,30,27,42,54,42,43,20,5,23,67
2,11023165,Amir Rrahmani,D (C),29,192 cm,83 kg,nEU,Napoli,Serie A TIM,KOS (ALB),-,-,Scouting Required,"$66,000 p/w",$46M - $57M,-,Right,19,1552,6.85,0,81.68,2,64.19,57.7,90,6.55,10.21,0.23,0.36,0.23,0.36,1.45,0.93,64,0.52,0.0,2.27,1.62,0.46,0.23,7.94,5.86,73.72,2.09,26.28,0.64,7.94,72.22,0.17,0.64,1.1,0.55,0.23,14.85,0.93,32,100,3,35,44,64,79,54,68,62,75,84,64,11,32
3,12038706,Ramy Bensebaïni,"D (LC), WB (L)",28,187 cm,85 kg,-,Borussia Dortmund,Bundesliga,ALG,-,-,Scouting Required,"$125,000 p/w",$36M - $57M,-,Left,12,1058,7.04,8,52.9,3,59.8,53.17,89,6.21,10.38,1.45,2.42,1.19,1.99,3.23,2.47,76,0.77,0.09,2.0,2.3,0.94,0.34,8.51,6.98,82.0,1.53,18.0,1.19,8.51,80.43,1.02,1.62,0.51,2.33,2.21,24.11,4.76,96,100,69,99,99,96,97,98,99,99,98,90,94,96,96
4,12078947,Wilfried Singo,"D (RC), WB/M (R)",23,190 cm,79 kg,-,Monaco,Ligue 1 Uber Eats,CIV,-,-,Scouting Required,"$33,000 p/w",$26M - $34M,-,Right,15,1312,7.32,0,87.47,4,72.03,67.16,93,5.69,7.9,1.1,1.52,1.1,1.52,2.47,2.13,86,0.34,0.0,1.57,2.74,0.41,0.21,5.42,3.36,62.03,2.06,37.97,0.41,5.42,69.57,1.1,1.58,1.17,3.25,2.13,14.57,10.02,99,100,59,99,98,98,99,99,98,98,98,91,20,20,99


In [26]:
def scatter_plot(df, x_metric, y_metric, z_metric, 
                 title, show_color_bar=False): 
    # Create plotly figure
    fig = px.scatter(
        df,
        x=x_metric,
        y=y_metric,
        color=z_metric,
        text='name',
        color_continuous_scale='RdYlGn',
        labels={z_metric: f'Color Scale ({z_metric})'},
        title=title,
        width=1200, 
        height=700    
    )

    # Format traces
    fig.update_traces(marker=dict(size=10), 
                    textposition='top center',
                    textfont=dict(size=10),
                    hovertemplate=(
                        "<b>%{text}</b><br><br>" +
                        f"{x_metric}: " + "%{x}<br>" +
                        f"{y_metric}: " + "%{y}<br>" +
                        f"{z_metric}: " + "%{marker.color}<extra></extra>"
                    )
    )

    fig.update_layout(
        title={
            'text': f'{title} ({len(df): ,.0f})',
            'x': 0.5,
            'xanchor': 'center'
        },
        coloraxis_colorbar=dict(title=z_metric),
        coloraxis_showscale=show_color_bar,  # Hides the color bar
        plot_bgcolor='white',
        paper_bgcolor='white',
        xaxis=dict(showgrid=True, gridcolor='lightgray', zeroline=False),
        yaxis=dict(showgrid=True, gridcolor='lightgray', zeroline=False)
    )

    fig.show()


In [34]:
# Pre filter data 
mask = (df_players['pass_completion_%'] >= 90) & \
        (df_players['progressive_passes/90'] >= 4) & \
        (df_players['age'] <= 25) & \
        (df_players['headers_won_%'] >= 60)

df_plot = df_players[mask]

scatter_plot(df_plot,
             x_metric='passes_attempted/90', 
             y_metric='progressive_passes/90',
             z_metric='headers_won_%',
             title='Passing quality & Aerial dominance'
)

In [45]:
targets = [
    'Tobias Slotsager', 
    'Isaak Touré',
    'Hrvoj', 
    'Varane'
]

pattern = '|'.join(re.escape(name) for name in targets)  # escape in case names have special characters
df_players[df_players['name'].str.contains(pattern, regex=True)]

Unnamed: 0,id,name,position,age,height,weight,inf,club,division,nationality,home_grown,personality,media_handling,wage,transfer_value,asking_price,preferred_foot,starts,minutes_played,average_rating,sub_appearances,minutes/game,index,passes_attempted/90,passes_completed/90,pass_completion_%,progressive_passes/90,progressive_pass_rate,key_passes/90,key_pass_%,open_play_key_passes/90,open_play_key_pass_%,tackles_attempted/90,tackles_completed/90,tackle_completion_%,tackles_failed/90,key_tackles/90,tackle_quality,interceptions/90,blocks/90,shots_blocked/90,headers_attempted/90,headers_won/90,headers_won_%,headers_lost/90,headers_lost_%,key_headers/90,aerial_challenges_attempted/90,duels_win_%,fouls_won/90,fouls_committed/90,clearances/90,attacking_actions/90,creative_actions/90,defensive_actions/90,excitement_factor/90,general_performance,goalkeeping,defensive_defender,creative_defender,attacking_defender,creative_midfielder,attacking_midfielder,creative_winger,attacking_winger,creative_forward,attacking_forward,finisher,aerial_threat,reader,assister
192,24057195,Hrvoje Smolčić,D (LC),23,183 cm,80 kg,Wnt,Eintracht Frankfurt,Bundesliga,CRO,-,-,Scouting Required,"$25,500 p/w",$400K - $3.9M,-,Left,12,1032,6.96,2,73.71,192,70.81,65.41,92,5.58,7.88,0.17,0.25,0.17,0.25,1.48,1.05,71,0.44,0.17,3.4,1.4,0.7,0.35,7.15,5.58,78.05,1.57,21.95,0.87,7.15,76.77,0.09,0.44,0.44,0.65,0.35,17.37,1.31,71,100,13,54,78,69,80,67,65,68,69,78,72,25,74
546,85075627,Raphaël Varane,D (C),30,191 cm,81 kg,Wnt,Man Utd,Premier League,FRA (MTQ),-,Light-Hearted,Unflappable,"$425,000 p/w",$42M,-,Right,21,1934,7.0,2,84.09,546,58.96,54.07,92,3.35,5.68,0.09,0.16,0.09,0.16,0.84,0.56,67,0.28,0.19,1.8,1.72,0.7,0.42,7.72,6.28,81.33,1.44,18.67,0.98,7.72,79.89,0.19,0.47,1.77,0.68,0.14,19.51,1.16,46,100,46,6,44,61,76,48,46,51,63,94,85,55,0
628,2000006106,Isaak Touré,D (C),20,204 cm,99 kg,-,Lorient,Ligue 1 Uber Eats,FRA (CIV),-,-,Scouting Required,"$6,000 p/w",$300K - $10.5M,-,Left,17,1457,7.19,0,85.71,628,59.18,53.18,90,5.0,8.46,0.49,0.84,0.43,0.73,2.47,2.16,88,0.31,0.43,8.0,2.53,0.37,0.19,6.98,6.3,90.27,0.68,9.73,1.24,6.98,89.54,0.19,0.31,1.24,0.79,0.56,25.8,1.67,85,100,96,55,68,88,91,67,79,79,91,93,88,98,67
677,2000178532,Tobias Slotsager,D (RC),17,190 cm,82 kg,-,OB,3F Superliga,DEN,-,-,Scouting Required,"$3,800 p/w",$230K - $11.5M,-,Right,15,1344,6.95,1,84.0,677,80.22,75.0,93,5.02,6.26,0.27,0.33,0.27,0.33,0.87,0.6,69,0.27,0.33,3.25,1.67,0.74,0.6,7.43,6.29,84.68,1.14,15.32,1.07,7.43,83.06,0.07,0.27,0.94,0.4,0.33,21.39,0.67,45,100,45,25,34,57,68,39,49,57,61,73,87,80,16
