In [21]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, Ridge,Lasso
from sklearn.neighbors import KNeighborsRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.tree import DecisionTreeRegressor
from xgboost import XGBRegressor
from time import time
from sklearn.metrics import explained_variance_score,mean_absolute_error,r2_score

In [22]:
df = pd.read_csv('../data/processed/crossfit_db.csv')

In [23]:
#removing null values
df = df.dropna()

In [24]:
# Split train and test
drop_features = ['year', 'firstName', 'lastName', 'status', 'gender', 'age', 
                 'rank_2023_1', 'rank_2023_2', 'rank_2023_3','rank_2023_4',
                 'countryOfOriginCode', 'regionId', 'affiliateId', 'affiliateName',
                 'competitorName', 'countryOfOriginName', 'regionName' ,
                 'overallRank', 'overall_score_2023',
                 'score_reps_2023_1', 'score_reps_2023_2','score_reps_2023_3','score_reps_2023_4', 'score_time_2023_4'
                 ]


X = df.drop(columns=drop_features)
y = df["overall_score_2023"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [25]:
regressors = [
    KNeighborsRegressor(),
    RandomForestRegressor(),
    DecisionTreeRegressor(),
    LinearRegression(),
    Lasso(),
    Ridge(), 
    XGBRegressor()
]

In [34]:
head = 1600
for model in regressors[:head]:
    start = time()
    model.fit(X_train, y_train)
    train_time = time() - start
    start = time()
    y_pred = model.predict(X_test)
    predict_time = time()-start    
    print(model.__class__.__name__)
    print("\tTraining time: %0.3fs" % train_time)
    print("\tPrediction time: %0.3fs" % predict_time)
    print("\tExplained variance:", explained_variance_score(y_test, y_pred))
    print("\tMean absolute error:", mean_absolute_error(y_test, y_pred))
    print("\tR2 score:", r2_score(y_test, y_pred))
    print()

KNeighborsRegressor
	Training time: 0.006s
	Prediction time: 0.062s
	Explained variance: 0.7713839849391946
	Mean absolute error: 38108.227179821704
	R2 score: 0.7711262764789721

RandomForestRegressor
	Training time: 8.352s
	Prediction time: 0.075s
	Explained variance: 0.8172584952861128
	Mean absolute error: 33268.89480321809
	R2 score: 0.8166853306362989

DecisionTreeRegressor
	Training time: 0.135s
	Prediction time: 0.001s
	Explained variance: 0.6301338764673532
	Mean absolute error: 47702.24266144814
	R2 score: 0.6292828277845443

LinearRegression
	Training time: 0.003s
	Prediction time: 0.000s
	Explained variance: 0.8089697301067423
	Mean absolute error: 33971.9467152358
	R2 score: 0.8089162698819776

Lasso
	Training time: 0.070s
	Prediction time: 0.002s
	Explained variance: 0.8089698365353585
	Mean absolute error: 33971.933344279016
	R2 score: 0.8089163767603156

Ridge
	Training time: 0.008s
	Prediction time: 0.001s
	Explained variance: 0.808969732195349
	Mean absolute error: 33