In [90]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from xgboost import XGBRegressor
import sklearn
import numpy as np
from tqdm import tqdm

In [102]:
# Actual Values from 2020 ESPN draft league
actual_data = pd.read_csv("./data/2020_espn_draft_results.csv", index_col=0)
accumulated_data = pd.read_csv("./data/fantasy_football_auction_values_with_sentiment.csv",index_col=0)

In [103]:
actual_data.head()

Unnamed: 0,player_names,bid_amounts,keepers,round_num,round_pick
0,Chris Carson,15,True,1,1
1,Derrick Henry,27,True,1,2
2,Lamar Jackson,9,True,1,3
3,George Kittle,10,True,1,4
4,Terry McLaurin,5,True,1,5


In [104]:
accumulated_data.head()

Unnamed: 0,Player_Names,ESPN_Price,ESPN_Avg_Price,Yahoo_Price,Yahoo_Avg_Price,NFLprice,NFL_Avg_Price,Average_Price,Proj_Price,Inflated_Price,sentiment_scores
0,Christian McCaffrey,63,70,64,76,70,63,69,81,81,0.108964
1,Saquon Barkley,60,61,62,71,67,56,63,72,72,0.090616
2,Ezekiel Elliott,60,58,55,64,52,45,56,67,67,0.135976
3,Michael Thomas,51,57,54,56,49,45,53,43,43,0.085932
4,Dalvin Cook,59,51,54,58,50,37,49,57,57,0.0


In [None]:
data = pd

In [105]:
data = pd.merge(actual_data, accumulated_data, left_on="player_names", right_on="Player_Names").drop("Player_Names", axis=1)

In [106]:
data.shape

(198, 15)

In [107]:
# Disregard datapoints where they're kept as Keepers.
data = data[data['keepers'] != True].drop("keepers", axis=1)

In [108]:
data.shape

(179, 14)

In [109]:
data.head()

Unnamed: 0,player_names,bid_amounts,round_num,round_pick,ESPN_Price,ESPN_Avg_Price,Yahoo_Price,Yahoo_Avg_Price,NFLprice,NFL_Avg_Price,Average_Price,Proj_Price,Inflated_Price,sentiment_scores
19,Travis Kelce,20,2,8,35,37,43,40,36,29,35,37,37,0.169759
20,Christian McCaffrey,75,2,9,63,70,64,76,70,63,69,81,81,0.108964
21,Zach Ertz,11,2,10,19,16,22,13,26,16,15,19,19,0.096479
22,Michael Thomas,55,2,11,51,57,54,56,49,45,53,43,43,0.085932
23,Aaron Jones,34,2,12,43,41,36,40,30,29,37,45,45,0.125548


In [115]:
for model in [DecisionTreeRegressor(), RandomForestRegressor(), XGBRegressor()]:
    # Decision Trees
    mse = []
    mae = []
    r2 = []
    for i in tqdm(range(10)):
        xtrain, xtest, ytrain, ytest = train_test_split(data.drop(["player_names", "bid_amounts"], axis=1), data["bid_amounts"], test_size=0.20)
        model.fit(xtrain, ytrain)
        predictions = model.predict(xtest)
        mse.append(mean_squared_error(predictions, ytest))
        mae.append(mean_absolute_error(predictions, ytest))
        r2.append(r2_score(predictions, ytest))
    print(str(model))
    print(f"MSE: {np.mean(mse)}")
    print(f"MAE: {np.mean(mae)}")
    print(f"R2: {np.mean(r2)}")

100%|██████████| 10/10 [00:00<00:00, 66.73it/s]
  0%|          | 0/10 [00:00<?, ?it/s]

DecisionTreeRegressor()
MSE: 36.29722222222222
MAE: 3.2638888888888893
R2: 0.8580766886315482


100%|██████████| 10/10 [00:03<00:00,  3.13it/s]
 20%|██        | 2/10 [00:00<00:00, 16.81it/s]

RandomForestRegressor()
MSE: 18.568716111111108
MAE: 2.7160555555555557
R2: 0.9016250833615699


100%|██████████| 10/10 [00:00<00:00, 15.85it/s]

XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,
             colsample_bynode=1, colsample_bytree=1, gamma=0, gpu_id=-1,
             importance_type='gain', interaction_constraints='',
             learning_rate=0.300000012, max_delta_step=0, max_depth=6,
             min_child_weight=1, missing=nan, monotone_constraints='()',
             n_estimators=100, n_jobs=0, num_parallel_tree=1, random_state=0,
             reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
             tree_method='exact', validate_parameters=1, verbosity=None)
MSE: 19.635851334750715
MAE: 2.573988568037749
R2: 0.9259683397149493





In [110]:
# Decision Trees
mse = []
mae = []
r2 = []
for i in tqdm(range(10)):
    xtrain, xtest, ytrain, ytest = train_test_split(data.drop(["player_names", "bid_amounts"], axis=1), data["bid_amounts"], test_size=0.20)
    model = DecisionTreeRegressor()
    model.fit(xtrain, ytrain)
    predictions = model.predict(xtest)
    mse.append(mean_squared_error(predictions, ytest))
    mae.append(mean_absolute_error(predictions, ytest))
    r2.append(r2_score(predictions, ytest))
print(f"MSE: {np.mean(mse)}")
print(f"MAE: {np.mean(mae)}")
print(f"R2: {np.mean(r2)}")

100%|██████████| 10/10 [00:00<00:00, 71.53it/s]

MSE: 29.647222222222222
MAE: 3.1194444444444445
R2: 0.8791800496246239





In [111]:
# Random Forests
mse = []
mae = []
r2 = []
for i in tqdm(range(10)):
    xtrain, xtest, ytrain, ytest = train_test_split(data.drop(["player_names", "bid_amounts"], axis=1), data["bid_amounts"], test_size=0.20)
    model = RandomForestRegressor()
    model.fit(xtrain, ytrain)
    predictions = model.predict(xtest)
    mse.append(mean_squared_error(predictions, ytest))
    mae.append(mean_absolute_error(predictions, ytest))
    r2.append(r2_score(predictions, ytest))
print(f"MSE: {np.mean(mse)}")
print(f"MAE: {np.mean(mae)}")
print(f"R2: {np.mean(r2)}")

100%|██████████| 10/10 [00:03<00:00,  2.99it/s]

MSE: 23.061628888888894
MAE: 2.846555555555556
R2: 0.8974978768375854





In [112]:
# XGBoost
mse = []
mae = []
r2 = []
for i in tqdm(range(10)):
    xtrain, xtest, ytrain, ytest = train_test_split(data.drop(["player_names", "bid_amounts"], axis=1), data["bid_amounts"], test_size=0.20)
    model = XGBRegressor()
    model.fit(xtrain, ytrain)
    predictions = model.predict(xtest)
    mse.append(mean_squared_error(predictions, ytest))
    mae.append(mean_absolute_error(predictions, ytest))
    r2.append(r2_score(predictions, ytest))
print(f"MSE: {np.mean(mse)}")
print(f"MAE: {np.mean(mae)}")
print(f"R2: {np.mean(r2)}")

100%|██████████| 10/10 [00:00<00:00, 13.04it/s]

MSE: 16.622556939223134
MAE: 2.4241627643505734
R2: 0.9218142073120232





In [89]:
data.head()

Unnamed: 0.1,player_names,bid_amounts,round_num,round_pick,Unnamed: 0,ESPN_Price,ESPN_Avg_Price,Yahoo_Price,Yahoo_Avg_Price,NFLprice,NFL_Avg_Price,Average_Price,Proj_Price,Inflated_Price,sentiment_scores
19,Travis Kelce,20,2,8,21,35,37,43,40,36,29,35,37,37,0.169759
20,Christian McCaffrey,75,2,9,0,63,70,64,76,70,63,69,81,81,0.108964
21,Zach Ertz,11,2,10,56,19,16,22,13,26,16,15,19,19,0.096479
22,Michael Thomas,55,2,11,3,51,57,54,56,49,45,53,43,43,0.085932
23,Aaron Jones,34,2,12,11,43,41,36,40,30,29,37,45,45,0.125548
