In [21]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, Ridge,Lasso
from sklearn.neighbors import KNeighborsRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.tree import DecisionTreeRegressor
from xgboost import XGBRegressor
from time import time
from sklearn.metrics import explained_variance_score,mean_absolute_error,r2_score

In [22]:
df = pd.read_csv('../data/processed/crossfit_db.csv')

In [23]:
#removing null values
df = df.dropna()

In [35]:
# Split train and test
drop_features = ['year', 'firstName', 'lastName', 'status', 'gender', 
                 'rank_2023_1', 'rank_2023_2', 'rank_2023_3','rank_2023_4',
                 'countryOfOriginCode', 'regionId', 'affiliateId', 'affiliateName',
                 'competitorName', 'countryOfOriginName', 'regionName' ,
                 'overallRank', 'overall_score_2023',
                 'score_reps_2023_1', 'score_reps_2023_2','score_reps_2023_3','score_reps_2023_4', 'score_time_2023_4'
                 ]


X = df.drop(columns=drop_features)
y = df["overall_score_2023"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [36]:
regressors = [
    KNeighborsRegressor(),
    RandomForestRegressor(),
    DecisionTreeRegressor(),
    LinearRegression(),
    Lasso(),
    Ridge(), 
    XGBRegressor()
]

In [37]:
head = 1600
for model in regressors[:head]:
    start = time()
    model.fit(X_train, y_train)
    train_time = time() - start
    start = time()
    y_pred = model.predict(X_test)
    predict_time = time()-start    
    print(model.__class__.__name__)
    print("\tTraining time: %0.3fs" % train_time)
    print("\tPrediction time: %0.3fs" % predict_time)
    print("\tExplained variance:", explained_variance_score(y_test, y_pred))
    print("\tMean absolute error:", mean_absolute_error(y_test, y_pred))
    print("\tR2 score:", r2_score(y_test, y_pred))
    print()

KNeighborsRegressor
	Training time: 0.004s
	Prediction time: 0.081s
	Explained variance: 0.7713839849391946
	Mean absolute error: 38108.227179821704
	R2 score: 0.7711262764789721

RandomForestRegressor
	Training time: 8.901s
	Prediction time: 0.080s
	Explained variance: 0.820151060585026
	Mean absolute error: 32904.40984561861
	R2 score: 0.8196657132267428

DecisionTreeRegressor
	Training time: 0.230s
	Prediction time: 0.002s
	Explained variance: 0.6269390709082872
	Mean absolute error: 48207.061317677755
	R2 score: 0.6256775639958604

LinearRegression
	Training time: 0.009s
	Prediction time: 0.009s
	Explained variance: 0.8166198023030444
	Mean absolute error: 33142.08888462876
	R2 score: 0.8165476602624522

Lasso
	Training time: 0.122s
	Prediction time: 0.001s
	Explained variance: 0.816619894717918
	Mean absolute error: 33142.07749914892
	R2 score: 0.8165477534125757

Ridge
	Training time: 0.012s
	Prediction time: 0.001s
	Explained variance: 0.8166198025950702
	Mean absolute error: 33