In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

In [16]:
from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet
from sklearn.ensemble import GradientBoostingRegressor, RandomForestRegressor
from sklearn.ensemble import StackingRegressor, AdaBoostRegressor, VotingRegressor, BaggingRegressor 
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import mean_squared_error, r2_score 
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

In [10]:
## loading the data
X = pd.read_csv('clean_selected_features.csv')
y = pd.read_csv('target.csv')
X.head()

Unnamed: 0.1,Unnamed: 0,value_eur,age,potential,wage_eur,movement_reactions,defending,mentality_composure,skill_ball_control
0,0,100500000.0,27,95,550000.0,94,27.0,60.0,96
1,1,79000000.0,29,92,375000.0,90,32.0,60.0,92
2,2,54500000.0,30,90,275000.0,89,32.0,60.0,90
3,3,52500000.0,32,90,275000.0,85,34.0,60.0,90
4,5,36000000.0,30,89,250000.0,90,59.0,60.0,94


In [11]:
X.drop(columns='Unnamed: 0', inplace=True)

In [12]:
y.head()

Unnamed: 0.1,Unnamed: 0,overall
0,0,93
1,1,92
2,2,90
3,3,90
4,5,89


In [13]:
y.drop(columns='Unnamed: 0', inplace=True)

In [15]:
## scale the features
scaler = MinMaxScaler()

X_norm = scaler.fit_transform(X)

In [17]:
X_train, X_test, y_train, y_test = train_test_split(X_norm, y, test_size=0.2, random_state=42)

# Models

## Linear Models

In [22]:
regressionModel = LinearRegression()

regressionModel.fit(X_train, y_train)

predicted_ratings = regressionModel.predict(X_test)

mse = mean_squared_error(y_test, predicted_ratings)
r_squared = r2_score(y_test, predicted_ratings)
print(f"Mean Squared Error: {mse}")
print("R-squared:", r_squared)

Mean Squared Error: 4.03083054241611
R-squared: 0.9170477863519058


In [25]:
ridge_model = Ridge(alpha=1.0)
lasso_model = Lasso(alpha=0.1)
elasticnet_model = ElasticNet(alpha=0.1, l1_ratio=0.5)

# Train Ridge model
ridge_model.fit(X_train, y_train)
ridge_predictions = ridge_model.predict(X_test)
ridge_mse = mean_squared_error(y_test, ridge_predictions)
ridge_r_squared = r2_score(y_test, ridge_predictions)
print("Ridge Regression")
print(f"Mean Squared Error: {ridge_mse}")
print(f"R-squared: {ridge_r_squared}\n")

# Train Lasso model
lasso_model.fit(X_train, y_train)
lasso_predictions = lasso_model.predict(X_test)
lasso_mse = mean_squared_error(y_test, lasso_predictions)
lasso_r_squared = r2_score(y_test, lasso_predictions)
print("Lasso Regression")
print(f"Mean Squared Error: {lasso_mse}")
print(f"R-squared: {lasso_r_squared}\n")

# Train ElasticNet model
elasticnet_model.fit(X_train, y_train)
elasticnet_predictions = elasticnet_model.predict(X_test)
elasticnet_mse = mean_squared_error(y_test, elasticnet_predictions)
elasticnet_r_squared = r2_score(y_test, elasticnet_predictions)
print("ElasticNet Regression")
print(f"Mean Squared Error: {elasticnet_mse}")
print(f"R-squared: {elasticnet_r_squared}")

Ridge Regression
Mean Squared Error: 4.0307415587701465
R-squared: 0.9170496175850309

Lasso Regression
Mean Squared Error: 5.47651973912325
R-squared: 0.887296320034468

ElasticNet Regression
Mean Squared Error: 17.893473767338985
R-squared: 0.6317624263199191


# ENSEMBLE MODEL

In [23]:
RFmodel = RandomForestRegressor(n_estimators=100, random_state=42,)

RFmodel.fit(X_train, y_train)


predicted_ratings = RFmodel.predict(X_test)

mse = mean_squared_error(y_test, predicted_ratings)
r_squared = r2_score(y_test, predicted_ratings)
print(f"Mean Squared Error: {mse}")
print("R-squared:", r_squared)

Mean Squared Error: 0.5922635575138598
R-squared: 0.9878115508350244


In [24]:
GBmodel = GradientBoostingRegressor(n_estimators=100, random_state=42)
GBmodel.fit(X_train, y_train)

predicted_ratings = GBmodel.predict(X_test)

mse = mean_squared_error(y_test, predicted_ratings)
r_squared = r2_score(y_test, predicted_ratings)
print(f"Mean Squared Error: {mse}")
print("R-squared:", r_squared)

Mean Squared Error: 1.1844677349213315
R-squared: 0.9756243236790664


In [26]:
adaboost_model = AdaBoostRegressor(n_estimators=100, random_state=42)


adaboost_model.fit(X_train, y_train)


adaboost_predictions = adaboost_model.predict(X_test)


mse = mean_squared_error(y_test, adaboost_predictions)
r_squared = r2_score(y_test, adaboost_predictions)

mse = mean_squared_error(y_test, predicted_ratings)
r_squared = r2_score(y_test, predicted_ratings)
print(f"Mean Squared Error: {mse}")
print("R-squared:", r_squared)

Mean Squared Error: 1.1844677349213315
R-squared: 0.9756243236790664


In [28]:

svr_model = SVR(kernel='rbf', C=1.0, epsilon=0.1)

svr_model.fit(X_train, y_train)


svr_predictions = svr_model.predict(X_test)


mse = mean_squared_error(y_test, svr_predictions)
r_squared = r2_score(y_test, svr_predictions)


mse = mean_squared_error(y_test, predicted_ratings)
r_squared = r2_score(y_test, predicted_ratings)
print(f"Mean Squared Error: {mse}")
print("R-squared:", r_squared)

Mean Squared Error: 1.1844677349213315
R-squared: 0.9756243236790664


In [29]:
import pickle

In [30]:
with open('random_forest_model.pkl', 'wb') as model_file:
    pickle.dump(RFmodel, model_file)

In [31]:
# Save the fitted MinMaxScaler to a file using pickle
with open('minmax_scaler.pkl', 'wb') as scaler_file:
    pickle.dump(scaler, scaler_file)