In [92]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from xgboost import XGBRegressor
import sklearn
import numpy as np
from tqdm import tqdm
from matplotlib import pyplot


In [56]:
# Actual Values from 2020 ESPN draft league
actual_data = pd.read_csv("./data/2020_espn_draft_results.csv", index_col=0)
accumulated_data = pd.read_csv("./data/fantasy_football_auction_values_with_sentiment.csv")
madden_data = pd.read_csv("./data/madden21_launch_ratings.csv")

In [57]:
actual_data.head()

Unnamed: 0,player_names,bid_amounts,keepers,round_num,round_pick
0,Chris Carson,15,True,1,1
1,Derrick Henry,27,True,1,2
2,Lamar Jackson,9,True,1,3
3,George Kittle,10,True,1,4
4,Terry McLaurin,5,True,1,5


In [58]:
accumulated_data.head()

Unnamed: 0,Player_Names,ESPN_Price,ESPN_Avg_Price,Yahoo_Price,Yahoo_Avg_Price,NFLprice,NFL_Avg_Price,Average_Price,Proj_Price,Inflated_Price,sentiment_scores
0,Christian McCaffrey,63,70,64,76,70,63,69,81,81,0.108964
1,Saquon Barkley,60,61,62,71,67,56,63,72,72,0.090616
2,Ezekiel Elliott,60,58,55,64,52,45,56,67,67,0.135976
3,Michael Thomas,51,57,54,56,49,45,53,43,43,0.085932
4,Dalvin Cook,59,51,54,58,50,37,49,57,57,0.0


In [59]:
madden_data.head()

Unnamed: 0,firstName,lastName,Name,position,archetype,overall_rating,team,age,college,awareness_rating,...,press_rating,throwAccuracyDeep_rating,blockShedding_rating,runBlockFinesse_rating,teamId,agility_rating,Unnamed: 75,passBlockFinesse_rating,age.1,primaryKey
0,Aaron,Donald,Aaron Donald,RE,DE_PowerRusher,99,Rams,29,Pittsburgh,99,...,21,6,97,45,24,86,,45,29,10852
1,Christian,McCaffrey,Christian McCaffrey,HB,HB_ReceivingBack,99,Panthers,24,Stanford,97,...,15,30,32,22,21,97,,32,24,12556
2,Michael,Thomas,Michael Thomas,WR,WR_RouteRunner,99,Saints,27,Ohio State,99,...,16,6,22,31,27,92,,14,27,17552
3,Patrick,Mahomes,Patrick Mahomes,QB,QB_Improviser,99,Chiefs,24,Texas Tech,97,...,10,93,24,10,9,88,,10,24,12635
4,Stephon,Gilmore,Stephon Gilmore,CB,CB_MantoMan,99,Patriots,29,South Carolina,99,...,99,32,54,35,22,94,,35,29,11436


In [60]:
data = pd.merge(actual_data, accumulated_data, left_on="player_names", right_on="Player_Names").drop("Player_Names", axis=1)

In [61]:
data = pd.merge(data, madden_data, left_on="player_names", right_on="Name").drop(["firstName", "lastName", "Name"], axis=1)

In [62]:
# Disregard datapoints where they're kept as Keepers.
data = data[data['keepers'] != True].drop("keepers", axis=1)

In [63]:
# Drop irrelevant Madden columns
data = data.drop(["plyrBirthdate", "Unnamed: 65", "plyrPortrait", "teamId", "Unnamed: 75", "primaryKey", "age.1"], axis=1)

In [64]:
data.head()

Unnamed: 0,player_names,bid_amounts,round_num,round_pick,ESPN_Price,ESPN_Avg_Price,Yahoo_Price,Yahoo_Avg_Price,NFLprice,NFL_Avg_Price,...,throwUnderPressure_rating,signingBonus,passBlock_rating,changeOfDirection_rating,press_rating,throwAccuracyDeep_rating,blockShedding_rating,runBlockFinesse_rating,agility_rating,passBlockFinesse_rating
19,Travis Kelce,20,2,8,35,37,43,40,36,29,...,33,11100000,64,75,10,49,40,66,87,62
20,Christian McCaffrey,75,2,9,63,70,64,76,70,63,...,17,32190000,69,96,15,30,32,22,97,32
21,Zach Ertz,11,2,10,19,16,22,13,26,16,...,15,23430000,63,72,10,6,44,56,82,58
22,Michael Thomas,55,2,11,51,57,54,56,49,45,...,22,35130000,40,90,16,6,22,31,92,14
23,Michael Thomas,55,2,11,51,57,54,56,49,45,...,15,140000,35,81,63,6,63,35,84,35


In [81]:
from sklearn import preprocessing


for col in data:
    if data[col].dtype != int and col != "player_names":
        # Turn each of these into categorical
        le = preprocessing.LabelEncoder()
        le.fit(data[col])
        print(f"Classes: {list(le.classes_)}")
        data[col] = le.transform(data[col]) 


In [83]:
data.columns

Index(['player_names', 'bid_amounts', 'round_num', 'round_pick', 'ESPN_Price',
       'ESPN_Avg_Price', 'Yahoo_Price', 'Yahoo_Avg_Price', 'NFLprice',
       'NFL_Avg_Price', 'Average_Price', 'Proj_Price', 'Inflated_Price',
       'sentiment_scores', 'position', 'archetype', 'overall_rating', 'team',
       'age', 'college', 'awareness_rating', 'throwPower_rating',
       'kickReturn_rating', 'leadBlock_rating', 'strength_rating',
       'bCVision_rating', 'catchInTraffic_rating', 'playAction_rating',
       'pursuit_rating', 'plyrAssetname', 'mediumRouteRunning_rating',
       'catching_rating', 'acceleration_rating', 'spinMove_rating', 'height',
       'finesseMoves_rating', 'spectacularCatch_rating', 'runBlock_rating',
       'tackle_rating', 'injury_rating', 'zoneCoverage_rating', 'weight',
       'runningStyle_rating', 'deepRouteRunning_rating', 'yearsPro',
       'totalSalary', 'trucking_rating', 'throwAccuracyShort_rating',
       'jukeMove_rating', 'playRecognition_rating', 'sho

In [97]:
data.head()

Unnamed: 0,player_names,bid_amounts,round_num,round_pick,ESPN_Price,ESPN_Avg_Price,Yahoo_Price,Yahoo_Avg_Price,NFLprice,NFL_Avg_Price,...,throwUnderPressure_rating,signingBonus,passBlock_rating,changeOfDirection_rating,press_rating,throwAccuracyDeep_rating,blockShedding_rating,runBlockFinesse_rating,agility_rating,passBlockFinesse_rating
19,Travis Kelce,20,2,8,35,37,43,40,36,29,...,33,11100000,64,75,10,49,40,66,87,62
20,Christian McCaffrey,75,2,9,63,70,64,76,70,63,...,17,32190000,69,96,15,30,32,22,97,32
21,Zach Ertz,11,2,10,19,16,22,13,26,16,...,15,23430000,63,72,10,6,44,56,82,58
22,Michael Thomas,55,2,11,51,57,54,56,49,45,...,22,35130000,40,90,16,6,22,31,92,14
23,Michael Thomas,55,2,11,51,57,54,56,49,45,...,15,140000,35,81,63,6,63,35,84,35


In [100]:
import matplotlib.pyplot as plt

In [103]:
def plot_feature_importance(clf, num_cols_to_plot, dataset):
    imp = pd.DataFrame(dataset.drop(['player_names', 'bid_amounts'], axis=1).columns)
    imp['feature_importance'] = clf.feature_importances_
    imp = imp.sort_values(by=['feature_importance'], ascending=False)
    imp.columns = ['name', 'feature_importance']
    plt.barh(imp['name'][:num_cols_to_plot], imp['feature_importance'][:num_cols_to_plot])

In [None]:
for idx, model in enumerate([DecisionTreeRegressor(), RandomForestRegressor(), XGBRegressor()]):
    # Decision Trees
    mse = []
    mae = []
    r2 = []
    for i in tqdm(range(100)):
        xtrain, xtest, ytrain, ytest = train_test_split(data.drop(["player_names", "bid_amounts"], axis=1), data["bid_amounts"], test_size=0.20)
        model.fit(xtrain, ytrain)
        predictions = model.predict(xtest)
        mse.append(mean_squared_error(predictions, ytest))
        mae.append(mean_absolute_error(predictions, ytest))
        r2.append(r2_score(predictions, ytest))
    print(str(model))
    print(f"MSE: {np.mean(mse)}")
    print(f"MAE: {np.mean(mae)}")
    print(f"R2: {np.mean(r2)}")
    if idx == 1: # Variable importance from RF model.
        plot_feature_importance(model, 15, data)

100%|██████████| 100/100 [00:00<00:00, 158.20it/s]
  0%|          | 0/100 [00:00<?, ?it/s]

DecisionTreeRegressor()
MSE: 49.2409375
MAE: 4.1278125
R2: 0.8047534206537413


 51%|█████     | 51/100 [00:10<00:10,  4.61it/s]

In [96]:
importance

array([7.34823766e-03, 2.81425199e-04, 2.72305088e-01, 4.55860596e-01,
       3.34240935e-02, 7.63731660e-02, 5.30241701e-02, 6.28099952e-03,
       2.61731572e-02, 3.97090797e-03, 8.83835779e-03, 1.29697559e-04,
       4.36056491e-04, 5.78572658e-04, 3.97917151e-04, 7.09076873e-04,
       4.46020084e-04, 4.74579633e-04, 5.67359243e-04, 4.05976141e-04,
       3.47322366e-03, 7.27604989e-04, 1.13521057e-03, 3.43757216e-04,
       5.77960133e-04, 7.45057989e-04, 2.72782519e-03, 1.20904397e-03,
       4.21742558e-04, 1.12188950e-04, 5.04632132e-04, 2.49213290e-04,
       2.96470756e-03, 2.81283603e-04, 3.61329311e-04, 1.32736005e-03,
       4.48996251e-04, 8.32921102e-04, 9.80421306e-04, 2.00318536e-03,
       1.39049489e-03, 1.82019691e-03, 1.33279273e-04, 5.26967529e-04,
       1.41314593e-03, 7.00612464e-05, 7.48650309e-04, 9.18370217e-05,
       3.75139520e-04, 6.37317726e-04, 7.98122559e-04, 9.31039183e-04,
       1.86307620e-04, 1.30942904e-03, 3.00892539e-04, 6.04922413e-04,
      