In [None]:
from xgboost import XGBRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.metrics import mean_squared_error
import numpy as np
import pandas as pd
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

In [None]:
data = pd.read_csv('allTeams_data.csv')
X = data.drop(columns=['Unnamed: 0', "R", "D/N", "Streak", "Rank", "GB"])
Y = data["R"]
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.3)

In [None]:
bestModel = XGBRegressor(colsample_bytree=1.0,
                         gamma=1.5, 
                         learning_rate=0.06,
                         max_depth=5,
                         min_child_weight=100,
                         n_estimators=140, 
                         nthread=4, 
                         subsample=0.95)
# bestModel = RandomForestRegressor(n_estimators=155, 
#                                   max_leaf_nodes=140, 
#                                   min_samples_split=13)
bestModel.fit(X_train, y_train)

In [None]:
results = bestModel.predict(X_test)
scoresXG = cross_val_score(bestModel, X_train, y_train, scoring='neg_mean_squared_error')
np.mean((-scoresXG)**0.5)

In [None]:
resultsRound = np.around(results)
resultsRound = resultsRound.astype(int)
correct = resultsRound == y_test
print("Percent Correct (rounded results): " + str(correct.sum()/len(y_test)))
print("MSE: " + str(mean_squared_error(y_test, resultsRound)))

In [None]:
correct5 = 0
correct10 = 0
correct15 = 0
correct20 = 0
correct25 = 0
correct30 = 0
correct35 = 0
correct40 = 0
correct45 = 0
correct50 = 0
for r, y in zip(results, y_test):
    if (y >= r - .5) and (y <= r + .5):
        correct5 += 1
    if (y >= r - 1) and (y <= r + 1):
        correct10 += 1
    if (y >= r - 1.5) and (y <= r + 1.5):
        correct15 += 1
    if (y >= r - 2) and (y <= r + 2):
        correct20 += 1
    if (y >= r - 2.5) and (y <= r + 2.5):
        correct25 += 1
    if (y >= r - 3) and (y <= r + 3):
        correct30 += 1
    if (y >= r - 3.5) and (y <= r + 3.5):
        correct35 += 1
    if (y >= r - 4) and (y <= r + 4):
        correct40 += 1
    if (y >= r - 4.5) and (y <= r + 4.5):
        correct45 += 1
    if (y >= r - 5) and (y <= r + 5):
        correct50 += 1
        
print("Percent Correct within .5: " + str(correct5/len(y_test)))
print("Percent Correct within 1: " + str(correct10/len(y_test)))
print("Percent Correct within 1.5: " + str(correct15/len(y_test)))
print("Percent Correct within 2: " + str(correct20/len(y_test)))
print("Percent Correct within 2.5: " + str(correct25/len(y_test)))
print("Percent Correct within 3: " + str(correct30/len(y_test)))
print("Percent Correct within 3.5: " + str(correct35/len(y_test)))
print("Percent Correct within 4: " + str(correct40/len(y_test)))
print("Percent Correct within 4.5: " + str(correct45/len(y_test)))
print("Percent Correct within 5: " + str(correct50/len(y_test)))