In [86]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [87]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [88]:
df=pd.read_csv("/content/drive/MyDrive/Project Soccer/GOALKEEPERS.csv")

In [89]:
X = df.drop("CA",axis=1)   
y = df["CA"]

In [90]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(
    X,
    y,
    test_size=0.25,
    random_state=42)

Gradient Boosting

In [91]:
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

In [92]:
gbr = GradientBoostingRegressor()

In [93]:
param_grid = {'n_estimators': [100, 200, 300],
              'learning_rate': [0.1, 0.05, 0.01],
              'max_depth': [1, 3, 5],
              'min_samples_split': [2, 4, 6],
              'max_features': [None, 'sqrt', 'log2']}

In [94]:
grid = GridSearchCV(gbr, param_grid, cv=5)
grid.fit(X_train, y_train)

GridSearchCV(cv=5, estimator=GradientBoostingRegressor(),
             param_grid={'learning_rate': [0.1, 0.05, 0.01],
                         'max_depth': [1, 3, 5],
                         'max_features': [None, 'sqrt', 'log2'],
                         'min_samples_split': [2, 4, 6],
                         'n_estimators': [100, 200, 300]})

In [95]:
best_params = grid.best_params_
print("Best parameters: ", best_params)

Best parameters:  {'learning_rate': 0.1, 'max_depth': 1, 'max_features': 'sqrt', 'min_samples_split': 2, 'n_estimators': 100}


In [119]:
gbr = GradientBoostingRegressor(**best_params)#

gbr.fit(X_train, y_train)

GradientBoostingRegressor(max_depth=1, max_features='sqrt')

In [120]:
y_pred = gbr.predict(X_test)

In [123]:
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
print("Root Mean Squared Error: ", rmse)

Root Mean Squared Error:  9.870138521859358


In [124]:
r2 = r2_score(y_test, y_pred)
print("R^2 Score: ", r2)

R^2 Score:  0.7171301494264744


In [125]:
n = y_test.shape[0]

# Calculate the number of features
k = X_test.shape[1]

# Calculate adjusted R^2 score
adj_r2 = 1 - (1 - r2) * (n - 1) / (n - k - 1)
print("Adjusted R^2 Score: ", adj_r2)

Adjusted R^2 Score:  0.4499752905514779


In [101]:
from sklearn.metrics import mean_absolute_error

...

mae = mean_absolute_error(y_test, y_pred)
print("Mean Absolute Error: ", mae)

Mean Absolute Error:  7.4213345807748885


In [126]:
mse = mean_squared_error(y_test, y_pred)
print("Mean Squared Error,: ", mse,)

Mean Squared Error,:  97.41963444069205


In [103]:
from xgboost import XGBRegressor
from sklearn.metrics import mean_absolute_error, r2_score, mean_squared_error

In [104]:
param_grid = {
    'max_depth': [3, 5, 7],
    'learning_rate': [0.05, 0.1, 0.15],
    'n_estimators': [50, 100, 150],
    'gamma': [0, 0.1, 0.2],
    'reg_lambda': [1, 5, 10]}

In [105]:
xgb = XGBRegressor()

In [106]:
grid = GridSearchCV(xgb, param_grid, cv=5, scoring='neg_mean_absolute_error')

In [107]:
grid.fit(X_train, y_train)



GridSearchCV(cv=5, estimator=XGBRegressor(),
             param_grid={'gamma': [0, 0.1, 0.2],
                         'learning_rate': [0.05, 0.1, 0.15],
                         'max_depth': [3, 5, 7], 'n_estimators': [50, 100, 150],
                         'reg_lambda': [1, 5, 10]},
             scoring='neg_mean_absolute_error')

In [108]:
print('Best parameters found by GridSearchCV:', grid.best_params_)

Best parameters found by GridSearchCV: {'gamma': 0, 'learning_rate': 0.1, 'max_depth': 5, 'n_estimators': 100, 'reg_lambda': 10}


In [109]:
xgb_best = XGBRegressor(
                       ax_depth=grid.best_params_['max_depth'], 
                        learning_rate=grid.best_params_['learning_rate'], 
                        n_estimators=grid.best_params_['n_estimators'], 
                        gamma=grid.best_params_['gamma'],
                        reg_lambda=grid.best_params_['reg_lambda'])

In [110]:
xgb_best.fit(X_train, y_train)



XGBRegressor(ax_depth=5, reg_lambda=10)

In [111]:
y_pred = xgb_best.predict(X_test)

In [112]:
print('Mean Absolute Error:', mae)
print('R2 Score:', r2)
print('Adjusted R2 Score:', adj_r2)
print('RMSE:', rmse)

Mean Absolute Error: 7.4213345807748885
R2 Score: 0.7450013163831306
Adjusted R2 Score: 0.5041692263005317
RMSE: 9.371279403561811


In [113]:
mse = mean_squared_error(y_test, y_pred)
print("Mean Squared Error,: ", mse,)

Mean Squared Error,:  110.20023367616847


In [127]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 142 entries, 0 to 141
Data columns (total 18 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   Apps        142 non-null    float64
 1   Height      142 non-null    float64
 2   Weight      142 non-null    float64
 3   Age         142 non-null    int64  
 4   Av Rat      142 non-null    float64
 5   K Ps/90     142 non-null    float64
 6   Pas %       142 non-null    float64
 7   Distance    142 non-null    float64
 8   Int/90      142 non-null    float64
 9   xSv %       142 non-null    float64
 10  Sv %        142 non-null    float64
 11  Svh         142 non-null    float64
 12  Svt         142 non-null    float64
 13  PoM         142 non-null    float64
 14  CA          142 non-null    int64  
 15  Saves       142 non-null    float64
 16  Saves/xSv%  142 non-null    float64
 17  Value       142 non-null    float64
dtypes: float64(16), int64(2)
memory usage: 20.1 KB
