### Simple Potential Ability Predictor
Uses linear regression to predict potential ability from players, given their age and stats.

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.preprocessing import OneHotEncoder, StandardScaler

In [2]:
data = pd.read_csv("football_manager_data_no_gk.csv")
data.drop(columns=data.columns[0], axis=1, inplace=True) #drops inf column
data.drop(['CA'],axis=1, inplace=True)
data = data.head(-2) #drops last 2 NAs
print(data.columns)

Index(['Name', 'Position', 'Age', 'Cor', 'Cro', 'Dri', 'Fin', 'Fir', 'Fre',
       'Hea', 'Lon', 'L Th', 'Mar', 'Pas', 'Pen', 'Tck', 'Tec', 'Agg', 'Ant',
       'Bra', 'Cmp', 'Cnt', 'Dec', 'Det', 'Fla', 'Ldr', 'OtB', 'Pos', 'Tea',
       'Vis', 'Wor', 'Acc', 'Agi', 'Bal', 'Jum', 'Nat', 'Pac', 'Sta', 'Str',
       'PA'],
      dtype='object')


In [3]:
#encoder = OneHotEncoder(sparse = False)
#position_encoded = encoder.fit_transform(data[['Position']])
#position_df = pd.DataFrame(position_encoded, columns=encoder.get_feature_names_out(['Position']))
#data = pd.concat([data, position_df], axis=1)

data.drop(columns=['Position', 'Name'], inplace=True)

#scaler = StandardScaler()
attributes = ['Age', 'Cor', 'Cro', 'Dri', 'Fin', 'Fir', 'Fre', 'Hea', 'Lon', 'L Th', 'Mar', 'Pas', 'Pen', 'Tck', 'Tec', 'Agg', 'Ant', 'Bra', 'Cmp', 'Cnt', 'Dec', 'Det', 'Fla', 'Ldr', 'OtB', 'Pos', 'Tea', 'Vis', 'Wor', 'Acc', 'Agi', 'Bal', 'Jum', 'Nat', 'Pac', 'Sta', 'Str']
#data[attributes] = scaler.fit_transform(data[attributes])

In [4]:
# Split into features (X) and target (y)
X = data.drop(columns=['PA'])
y = data['PA']

# Train-test split (80-20)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize the Linear Regression model
model = LinearRegression()

# Train the model
model.fit(X_train, y_train)

# Predict on the test set
y_pred = model.predict(X_test)


In [5]:
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)

print(f"Mean Absolute Error: {mae}")
print(f"Mean Squared Error: {mse}")

Mean Absolute Error: 10.277056563725349
Mean Squared Error: 175.76607364356022


In [13]:
new_data = pd.read_csv("las_palmas_players.csv")
new_data1 = new_data.copy()
new_data1 = new_data1.drop(new_data1.tail(2).index)
new_data.drop(columns=new_data.columns[0], axis=1, inplace=True) #drops inf column
new_data.drop(['Position', 'Name'],axis=1, inplace=True)
new_data = new_data.drop(new_data.tail(2).index)
print(new_data.columns)


Index(['Age', 'Cor', 'Cro', 'Dri', 'Fin', 'Fir', 'Fre', 'Hea', 'Lon', 'L Th',
       'Mar', 'Pas', 'Pen', 'Tck', 'Tec', 'Agg', 'Ant', 'Bra', 'Cmp', 'Cnt',
       'Dec', 'Det', 'Fla', 'Ldr', 'OtB', 'Pos', 'Tea', 'Vis', 'Wor', 'Acc',
       'Agi', 'Bal', 'Jum', 'Nat', 'Pac', 'Sta', 'Str'],
      dtype='object')


In [14]:
new_predictions = model.predict(new_data)
names = new_data1["Name"]
out_df = pd.DataFrame({'Name': names, 'PA':new_predictions})
for i, prediction in enumerate(new_predictions):
    print(f"{names[i]} predicted potential: {prediction:.2f}")

Owen Alonso predicted potential: 57.24
Carlos Alba predicted potential: 46.10
Christophe Lamy predicted potential: 111.36
Garoé predicted potential: 95.09
Dirk Stolz predicted potential: 103.75
Yared predicted potential: 112.95
Luís Veiga predicted potential: 137.15
Richard Bennett-Bostock predicted potential: 85.60
Jesús Cruz predicted potential: 102.73
Sergio Mariño predicted potential: 78.50
Edey Tavío predicted potential: 62.62
Ayoze predicted potential: 102.64
Alejandro Aguilar predicted potential: 130.96
Pablo Álvarez predicted potential: 104.92
Nelson predicted potential: 122.75
Derek Llorca predicted potential: 129.05
Jelle van den Berg predicted potential: 101.52
José Olivera predicted potential: 105.73
Estanis predicted potential: 125.97
Zeben Tabares predicted potential: 123.20
Pablo Goikoetxea predicted potential: 118.90
Enrique Dario predicted potential: 88.71
Víctor Manuel Asensio predicted potential: 120.85
Cristo Déniz predicted potential: 90.25
Bentagay Bethencourt pre

In [28]:
#Determine the weightings of each feature used in the linear regression
feature_names = data.columns
coefficients = model.coef_
feature_coefs = dict(zip(feature_names, coefficients))
for feature, coef in feature_coefs.items():
    print(f"{feature}: {coef:.4f}")

Age: -0.6388
Cor: 0.1351
Cro: -0.1339
Dri: 0.3830
Fin: 0.0507
Fir: 0.7333
Fre: 0.2263
Hea: 0.2613
Lon: -0.0188
L Th: 0.0706
Mar: -0.5050
Pas: 1.2395
Pen: 0.1572
Tck: -0.2689
Tec: 0.2245
Agg: -0.0423
Ant: 1.0253
Bra: 1.2387
Cmp: 0.5715
Cnt: 0.8342
Dec: 1.3581
Det: 0.2252
Fla: 0.5153
Ldr: 0.1474
OtB: -0.2183
Pos: 1.2812
Tea: -0.1028
Vis: 0.5604
Wor: -0.2926
Acc: 1.3244
Agi: 1.4320
Bal: 0.2735
Jum: 0.3859
Nat: 0.2256
Pac: 0.6182
Sta: -0.1256
Str: 0.8829
