# FM 24 Moneyball

In [101]:
import numpy as np
import pandas as pd 
import matplotlib.pyplot as plt
import plotly.express as px
import re

# Ignore warnings
import warnings
warnings.filterwarnings('ignore')

# Show all columns in pandas
pd.set_option('display.max_columns', 500) 

%matplotlib inline

## Ingest and clean data

In [102]:
### Read raw data

# Select file path for raw data
path = r"C:\Users\kevmi\Documents\FM 24 Moneyball\FM Data Lab\Man Utd\FM 24 exports\Winter 2023\Center Backs.html"

# Ingest in a dataframe
df_players_raw = pd.read_html(path, encoding='utf-8')[0]

# Format column names 
df_players_raw.columns = [x.lower().strip().replace(' ', '_').replace('-', '_') for x in df_players_raw.columns]

# ---------------------------------------------------------------------------------------------------------------------

### Read processed Data lab data

# Select file path for processed data
path = r"C:\Users\kevmi\Documents\FM 24 Moneyball\FM Data Lab\Man Utd\FM Data Lab outputs\Winter 2023\Center Backs.csv"

# Ingest in a dataframe 
df_players = pd.read_csv(path)   

# Format column names 
df_players.columns = [x.lower().strip().replace(' ', '_').replace('-', '_') for x in df_players.columns]

# Bring ID from raw data
df_players = df_players.merge(df_players_raw[['name', 'age', 'height', 'weight', 'uid']], 
                 left_on=['name', 'age', 'height', 'weight'], 
                 right_on=['name', 'age', 'height', 'weight']).reset_index().rename(columns={'level_0': 'id'})

df_players['id'] = df_players['uid']

df_players.drop(columns='uid', inplace=True)

## Data cleaning

In [103]:
# Create a column of 1s
df_players['ones'] = 1
df_players.head()

Unnamed: 0,id,name,position,age,height,weight,inf,club,division,nationality,home_grown,personality,media_handling,wage,transfer_value,asking_price,preferred_foot,starts,minutes_played,average_rating,sub_appearances,minutes/game,index,passes_attempted/90,passes_completed/90,pass_completion_%,progressive_passes/90,progressive_pass_rate,key_passes/90,key_pass_%,open_play_key_passes/90,open_play_key_pass_%,pressures_attempted,pressures_attempted/90,pressures_completed,pressures_completed/90,pressure_success_%,possession_won/90,possession_lost/90,poss+_/90,poss+__%,tackles_attempted/90,tackles_completed/90,tackle_completion_%,tackles_failed/90,key_tackles/90,tackle_quality,interceptions/90,blocks/90,shots_blocked/90,headers_attempted/90,headers_won/90,headers_won_%,headers_lost/90,headers_lost_%,key_headers/90,aerial_challenges_attempted/90,duels_win_%,fouls_won/90,fouls_committed/90,clearances/90,attacking_actions/90,creative_actions/90,defensive_actions/90,excitement_factor/90,general_performance,goalkeeping,defensive_defender,creative_defender,attacking_defender,creative_midfielder,attacking_midfielder,creative_winger,attacking_winger,creative_forward,attacking_forward,finisher,aerial_threat,reader,assister,ones
0,168288,Robin Pröpper,D (C),30,190 cm,80 kg,-,FC Twente,Eredivisie,NED,-,-,Scouting Required,"$2,700 p/w",$850K - $8.4M,-,Right,17,1575,6.69,1,87.5,0,56.17,47.2,84,4.69,8.34,0.23,0.41,0.23,0.41,58,3.31,22,1.26,37.93,9.77,8.51,1.26,14.81,0.86,0.57,67,0.29,0.17,1.5,1.09,0.23,0.23,7.54,4.57,60.61,2.97,39.39,1.03,7.54,61.22,0.0,0.57,1.71,0.11,0.29,17.69,0.74,4,100,1,3,11,6,4,5,7,12,9,15,73,13,16,1
1,211232,Djéné Dakonam,"D (RC), DM",31,178 cm,71 kg,-,Getafe,LaLiga EA Sports,TOG (BEN),-,-,Scouting Required,"$20,500 p/w",$150K - $10.5M,-,Right,17,1496,6.56,0,88.0,1,43.38,38.08,88,4.03,9.29,0.48,1.11,0.48,1.11,136,8.18,41,2.47,30.15,11.37,6.98,4.39,62.89,3.73,3.07,82,0.66,0.24,2.07,1.93,0.78,0.42,3.79,2.35,61.9,1.44,38.1,0.24,3.79,72.0,0.24,1.8,1.56,0.59,0.78,16.0,2.29,44,100,73,52,44,30,27,42,54,42,43,20,5,23,67,1
2,11023165,Amir Rrahmani,D (C),29,192 cm,83 kg,nEU,Napoli,Serie A TIM,KOS (ALB),-,-,Scouting Required,"$66,000 p/w",$46M - $57M,-,Right,19,1552,6.85,0,81.68,2,64.19,57.7,90,6.55,10.21,0.23,0.36,0.23,0.36,51,2.96,21,1.22,41.18,12.12,6.96,5.16,74.14,1.45,0.93,64,0.52,0.0,2.27,1.62,0.46,0.23,7.94,5.86,73.72,2.09,26.28,0.64,7.94,72.22,0.17,0.64,1.1,0.55,0.23,14.85,0.93,32,100,3,35,44,64,79,54,68,62,75,84,64,11,32,1
3,12038706,Ramy Bensebaïni,"D (LC), WB (L)",28,187 cm,85 kg,-,Borussia Dortmund,Bundesliga,ALG,-,-,Scouting Required,"$125,000 p/w",$36M - $57M,-,Left,12,1058,7.04,8,52.9,3,59.8,53.17,89,6.21,10.38,1.45,2.42,1.19,1.99,88,7.49,31,2.64,35.23,14.8,8.93,5.87,65.73,3.23,2.47,76,0.77,0.09,2.0,2.3,0.94,0.34,8.51,6.98,82.0,1.53,18.0,1.19,8.51,80.43,1.02,1.62,0.51,2.33,2.21,24.11,4.76,96,100,69,99,99,96,97,98,99,99,98,90,94,96,96,1
4,12078947,Wilfried Singo,"D (RC), WB/M (R)",23,190 cm,79 kg,-,Monaco,Ligue 1 Uber Eats,CIV,-,-,Scouting Required,"$33,000 p/w",$26M - $34M,-,Right,15,1312,7.32,0,87.47,4,72.03,67.16,93,5.69,7.9,1.1,1.52,1.1,1.52,127,8.71,33,2.26,25.98,11.46,12.83,-1.37,-10.68,2.47,2.13,86,0.34,0.0,1.57,2.74,0.41,0.21,5.42,3.36,62.03,2.06,37.97,0.41,5.42,69.57,1.1,1.58,1.17,3.25,2.13,14.57,10.02,99,100,59,99,98,98,99,99,98,98,98,91,20,20,99,1


## Plot function

In [104]:
def scatter_plot(df, x_metric, y_metric, z_metric, 
                 title, show_color_bar=False, show_name_label=True): 
    # Set text to 'name' if show_name_label is True, else empty string
    text_col = 'name' if show_name_label else None

    # Create plotly figure
    fig = px.scatter(
        df,
        x=x_metric,
        y=y_metric,
        color=z_metric,
        text=text_col,
        color_continuous_scale='RdYlGn',
        labels={z_metric: f'Color Scale ({z_metric})'},
        title=title,
        width=1200, 
        height=700    
    )

    # Format traces
    fig.update_traces(
        marker=dict(size=10), 
        textposition='top center',
        textfont=dict(size=10),
        customdata=df[['name', 'club']],  # Always pass 'name' for hovertemplate
        hovertemplate=(
            "<b>%{customdata[0]}</b><br>" +
            "<b>%{customdata[1]}</b><br><br>" +
            f"{x_metric}: " + "%{x}<br>" +
            f"{y_metric}: " + "%{y}<br>" +
            f"{z_metric}: " + "%{marker.color}<extra></extra>"
        )
    )

    fig.update_layout(
        title={
            'text': f'{title} ({len(df): ,.0f})',
            'x': 0.5,
            'xanchor': 'center'
        },
        coloraxis_colorbar=dict(title=z_metric),
        coloraxis_showscale=show_color_bar,  # Show or hide the color bar
        plot_bgcolor='white',
        paper_bgcolor='white',
        xaxis=dict(showgrid=True, gridcolor='lightgray', zeroline=False),
        yaxis=dict(showgrid=True, gridcolor='lightgray', zeroline=False)
    )

    fig.show()


## Passing analysis

### Full universe

In [105]:
# Initialize list of targets
targets = df_players['id']

scatter_plot(df_players,
             x_metric='passes_attempted/90', 
             y_metric='progressive_pass_rate',
             z_metric='pass_completion_%',
             title='Passing quality', 
             show_name_label=False
)

In [106]:
# Filter data 
mask = (
        (df_players['passes_attempted/90'] >= 50) & \
        (df_players['progressive_pass_rate'] >= 7) & \
        (df_players['pass_completion_%'] >= 89) & \
        (df_players['age'] <= 26) & \
        (df_players['id'].isin(targets))
        
        
)

df_targets = df_players[mask]

scatter_plot(df_targets,
             x_metric='passes_attempted/90', 
             y_metric='progressive_pass_rate',
             z_metric='pass_completion_%',
             title='Passing quality', 
             show_name_label=False
)

# Keep only players that fulfill criteria
targets = list(df_targets['id'])

## Possession Analysis

In [107]:
# Full universe 
scatter_plot(df_targets,
             x_metric='possession_lost/90', 
             y_metric='possession_won/90',
             z_metric='poss+_/90',
             title='Possesion quality', 
             show_name_label=False
)

In [108]:
# Filter data 
mask = (
        (df_players['possession_lost/90'] <= 9) & \
        (df_players['possession_won/90'] >= 9) & \
        (df_players['poss+_/90'] >= 0) & \
        (df_players['age'] <= 26) & \
        (df_players['id'].isin(targets))
        
        
)

df_targets = df_players[mask]

scatter_plot(df_targets,
             x_metric='possession_lost/90', 
             y_metric='possession_won/90',
             z_metric='poss+_/90',
             title='Possesion quality', 
             show_name_label=False
)

# Keep only players that fulfill criteria
targets = list(df_targets['id'])

## Aereal/Ground Dominance 

In [117]:
# Remove levi cowil outlyer
df_targets = df_targets[df_targets['id'] != 28124573]

In [116]:
# Full universe 
scatter_plot(df_targets,
             x_metric='tackle_completion_%', 
             y_metric='headers_won_%',
             z_metric='duels_win_%',
             title='Dominance', 
             show_name_label=False
)

In [121]:
# Filter data 
mask = (
        (df_players['tackle_completion_%'] >= 70) & \
        (df_players['headers_won_%'] >= 70) & \
        (df_players['duels_win_%'] >= 65) & \
        (df_players['age'] <= 26) & \
        (df_players['id'].isin(targets))
        
        
)

df_targets = df_players[mask]

scatter_plot(df_targets,
             x_metric='tackle_completion_%', 
             y_metric='headers_won_%',
             z_metric='duels_win_%',
             title='Dominance', 
             show_name_label=False
)

# Keep only players that fulfill criteria
#targets = df_targets['id']

In [84]:
df_targets[df_targets['name'].str.contains('Levi')]

Unnamed: 0,id,name,position,age,height,weight,inf,club,division,nationality,home_grown,personality,media_handling,wage,transfer_value,asking_price,preferred_foot,starts,minutes_played,average_rating,sub_appearances,minutes/game,index,passes_attempted/90,passes_completed/90,pass_completion_%,progressive_passes/90,progressive_pass_rate,key_passes/90,key_pass_%,open_play_key_passes/90,open_play_key_pass_%,pressures_attempted,pressures_attempted/90,pressures_completed,pressures_completed/90,pressure_success_%,possession_won/90,possession_lost/90,poss+_/90,poss+__%,tackles_attempted/90,tackles_completed/90,tackle_completion_%,tackles_failed/90,key_tackles/90,tackle_quality,interceptions/90,blocks/90,shots_blocked/90,headers_attempted/90,headers_won/90,headers_won_%,headers_lost/90,headers_lost_%,key_headers/90,aerial_challenges_attempted/90,duels_win_%,fouls_won/90,fouls_committed/90,clearances/90,attacking_actions/90,creative_actions/90,defensive_actions/90,excitement_factor/90,general_performance,goalkeeping,defensive_defender,creative_defender,attacking_defender,creative_midfielder,attacking_midfielder,creative_winger,attacking_winger,creative_forward,attacking_forward,finisher,aerial_threat,reader,assister,ones
266,28124573,Levi Colwill,D (LC),20,187 cm,88 kg,-,Chelsea,Premier League,ENG,Trained in nation (15-21),-,Scouting Required,"$94,000 p/w",$153M - $189M,-,Left,13,1173,6.95,1,83.79,266,81.02,76.42,94,6.45,7.95,0.08,0.09,0.08,0.09,31,2.38,9,0.69,29.03,13.27,5.29,7.98,150.85,0.69,0.54,78,0.15,0.08,3.0,2.3,0.38,0.31,6.91,5.37,77.78,1.53,22.22,0.77,6.91,0.0,0.23,0.23,1.07,0.29,0.08,15.81,0.92,37,100,27,35,27,44,56,38,22,17,36,44,62,40,0,1
