# Player rating model

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
df=pd.read_csv('../datasets/players_data_light-2024_2025.csv')

In [3]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
import joblib
fw_df = df[df['Pos'] == 'FW'].copy()

# Features for Forwards
features = [
    'Gls', 'xG', 'Ast', 'xAG', 'KP',
    'PrgC', 'Touches', 'Carries', 'PrgR'
]
fw_df = fw_df.dropna(subset=features)

X = fw_df[features]

#synthetic target variable (e.g., G+A + xG + xAG weighted)

fw_df['rating_target'] = (
    0.4 * fw_df['Gls'] +
    0.3 * fw_df['xG'] +
    0.1 * fw_df['Ast'] +
    0.1 * fw_df['xAG'] +
    0.1 * fw_df['KP']
)

y = fw_df['rating_target']

# Train/Test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [4]:
pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('model', RandomForestRegressor(n_estimators=100, random_state=42))
])

pipeline.fit(X_train, y_train)

# Predict
fw_df['predicted_rating'] = pipeline.predict(X)

In [5]:
#  Top 10 forwards
top_fw = fw_df[['Player', 'Squad', 'Gls', 'Ast', 'predicted_rating']].sort_values(by='predicted_rating', ascending=False)
print(top_fw.head(10))

#model
joblib.dump(pipeline, "../models/fw_rating_model.pkl")

                  Player            Squad  Gls  Ast  predicted_rating
2243       Mohamed Salah        Liverpool   27   17           23.9664
1652       Kylian Mbappé      Real Madrid   22    3           19.6658
1449  Robert Lewandowski        Barcelona   25    2           19.2912
1287          Harry Kane    Bayern Munich   23    8           18.3440
1083      Erling Haaland  Manchester City   21    3           18.1209
1191      Alexander Isak    Newcastle Utd   20    5           16.8124
683      Ousmane Dembélé        Paris S-G   21    5           16.1905
2145       Mateo Retegui         Atalanta   22    4           15.7753
1654        Bryan Mbeumo        Brentford   16    5           15.0171
1299          Moise Kean       Fiorentina   17    3           14.5248


['../models/fw_rating_model.pkl']

In [6]:
top_fw = fw_df[['Player', 'Squad', 'Gls', 'Ast', 'predicted_rating','rating_target']].sort_values(by='predicted_rating', ascending=False)
print(top_fw.head(10))

                  Player            Squad  Gls  Ast  predicted_rating  \
2243       Mohamed Salah        Liverpool   27   17           23.9664   
1652       Kylian Mbappé      Real Madrid   22    3           19.6658   
1449  Robert Lewandowski        Barcelona   25    2           19.2912   
1287          Harry Kane    Bayern Munich   23    8           18.3440   
1083      Erling Haaland  Manchester City   21    3           18.1209   
1191      Alexander Isak    Newcastle Utd   20    5           16.8124   
683      Ousmane Dembélé        Paris S-G   21    5           16.1905   
2145       Mateo Retegui         Atalanta   22    4           15.7753   
1654        Bryan Mbeumo        Brentford   16    5           15.0171   
1299          Moise Kean       Fiorentina   17    3           14.5248   

      rating_target  
2243          27.38  
1652          19.82  
1449          19.53  
1287          18.70  
1083          17.82  
1191          17.31  
683           19.57  
2145          16.54 