In [97]:
import pandas as pd
from sklearn.linear_model import Ridge #Import Ridge and RFR for machine learning and testing.
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error

In [98]:
advanced_league_results = pd.read_csv("advanced_league_results.csv") #Read the csv file

In [99]:
stats_pred = ['W', 'L', 'MOV', 'ORtg', 'DRtg', 'NRtg', 'FTr', '3PAr', 'TS%', 'eFG%', 'TOV%', 'ORB%', 'DRB%', 'FT/FGA', 'Rounds Played'] #Organize the stats you need to predict the champion per year

In [100]:
train = advanced_league_results[advanced_league_results["Year"]< 2019] #Train all years before 2019

In [101]:
test = advanced_league_results[advanced_league_results["Year"] == 2019] #Test on the result of 2019

In [102]:
ridge_regression = Ridge(alpha = .1) #Uses a ridge alpha of .1

In [103]:
ridge_regression.fit(train[stats_pred], train["Won Championship"]) #Use the stats to train who would win each championship

In [104]:
prediction_value = ridge_regression.predict(test[stats_pred]) # Use ridge regression in order to set the prediction_value

In [105]:
prediction_value = pd.DataFrame(test, columns = ["Prediction Value"], index=test.index) #Use pandas to convert it into a DF, and test on the index

In [106]:
adv_stats_merger = pd.concat([test[["Team", "Won Championship"]], prediction_value], axis = 1) #Concatenate team and won championship with the prediction value

In [107]:
adv_stats_merger = adv_stats_merger.sort_values("Prediction Value", ascending = False) #Sort by who is the most likely to win a championship based on the ridge regression
adv_stats_merger["Likely Champion Rank"] = list(range(1,adv_stats_merger.shape[0]+1)) #Add a column for likely champion rank to show the predicted champion
adv_stats_merger #show the result, prediction value indicates how far off a prediction was, which will be formulated in the next code

Unnamed: 0,Team,Won Championship,Prediction Value,Likely Champion Rank
735,Milwaukee Bucks,0,,1
736,Golden State Warriors,0,,2
737,Toronto Raptors,1,,3
738,Utah Jazz,0,,4
739,Houston Rockets,0,,5
740,Boston Celtics,0,,6
741,Portland Trail Blazers,0,,7
742,Denver Nuggets,0,,8
743,Indiana Pacers,0,,9
744,Oklahoma City Thunder,0,,10


In [108]:
def sort_avg_precision(adv_stats_merger):
    actual_value = adv_stats_merger.sort_values("Won Championship", ascending = False).head(1)
    prediction_value = adv_stats_merger.sort_values("Prediction Value", ascending = False).head(16)
    adv_stats_merger["Prediction Value"] = actual_value["Won Championship"]/prediction_value["Likely Champion Rank"]
    avg_prec_storage = []
    avg_prec_storage.append(adv_stats_merger)
    return adv_stats_merger#A code for sorting average precision, which divides the value of won championship by the same row of the predicted ranking
#This code then holds that result for the prediction value.

In [109]:
sort_avg_precision(adv_stats_merger) #As shown, the prediction value was 0.333333 as it was 1/3 off from predicting the correct champion for the year of 2019

Unnamed: 0,Team,Won Championship,Prediction Value,Likely Champion Rank
735,Milwaukee Bucks,0,,1
736,Golden State Warriors,0,,2
737,Toronto Raptors,1,0.333333,3
738,Utah Jazz,0,,4
739,Houston Rockets,0,,5
740,Boston Celtics,0,,6
741,Portland Trail Blazers,0,,7
742,Denver Nuggets,0,,8
743,Indiana Pacers,0,,9
744,Oklahoma City Thunder,0,,10


In [110]:
def avg_precision(adv_stats_merger):#Merge all of the prediction values into a mean
    avg_precision = adv_stats_merger["Prediction Value"].mean()
    return avg_precision

In [111]:
avg_precision(adv_stats_merger) #For 2019, the merger just yields the singular value generated of .33333

0.3333333333333333

In [112]:
years = list(range(2018,2024))
avg_precision_mean = []#However, we then use an array to store all mean values
for year in years:
    train = advanced_league_results[advanced_league_results["Year"]< year]#Of which then are looped within each year
    test = advanced_league_results[advanced_league_results["Year"] == year]#In order to generate predictions from the last 5 years
    ridge_regression.fit(train[stats_pred], train["Won Championship"])#And then using the ridge regession to calculate the model
    prediction_value = ridge_regression.predict(test[stats_pred])
    prediction_value = pd.DataFrame(prediction_value, columns = ["Prediction Value"], index = test.index)
    adv_stats_merger = pd.concat([test[["Team", "Won Championship"]], prediction_value], axis = 1)
    adv_stats_merger = adv_stats_merger.sort_values("Prediction Value", ascending = False)#The values are sorted by predicted results
    adv_stats_merger["Likely Champion Rank"] = list(range(1,adv_stats_merger.shape[0]+1))#Of which then we store the likely champion as a category in a list with the merger
    sort_avg_precision(adv_stats_merger)#Where afterward it is calculated
    avg_precision_mean.append(avg_precision(adv_stats_merger))#And then the sum is appended for the mean

In [113]:
sum(avg_precision_mean)/len(avg_precision_mean)#We find that the sum / length is 0.8333333333333334. Which gives our model an accuracy of 0.8333333333333334.

0.8333333333333334

In [114]:
def backtest(advanced_league_results, ridge_regression, year, stats_pred): #Backtesting is then implemented to fit within our model
    years = list(range(2018,2024))
    avg_precision_mean = []
    sorted_predictions = []
    for year in years:
        train = advanced_league_results[advanced_league_results["Year"]< year]
        test = advanced_league_results[advanced_league_results["Year"] == year]
        ridge_regression.fit(train[stats_pred], train["Won Championship"])
        prediction_value = ridge_regression.predict(test[stats_pred])
        prediction_value = pd.DataFrame(prediction_value, columns = ["Prediction Value"], index=test.index)
        adv_stats_merger = pd.concat([test[["Team", "Won Championship"]], prediction_value], axis = 1)
        adv_stats_merger = adv_stats_merger.sort_values("Prediction Value", ascending = False)
        adv_stats_merger["Likely Champion Rank"] = list(range(1,adv_stats_merger.shape[0]+1))
        sorted_predictions.append(sort_avg_precision(adv_stats_merger))
        avg_precision_mean.append(avg_precision(adv_stats_merger))
    return sum(avg_precision_mean)/len(avg_precision_mean), avg_precision_mean, pd.concat(sorted_predictions) #The mean, each precision mean, and sorted predictions are returned

In [115]:
mean_ap, avg_precision_mean, sorted_predictions = backtest(advanced_league_results, ridge_regression, year, stats_pred)

In [116]:
mean_ap #There was an accurate of 0.8333333333333334 with our model.

0.8333333333333334

In [117]:
avg_precision_mean #The array of each mean has been shown below

[1.0, 0.5, 1.0, 1.0, 0.5, 1.0]

In [118]:
sorted_predictions #The predictions are then sorted for all years

Unnamed: 0,Team,Won Championship,Prediction Value,Likely Champion Rank
707,Golden State Warriors,1,1.0,1
718,Cleveland Cavaliers,0,,2
710,Boston Celtics,0,,3
705,Houston Rockets,0,,4
709,Philadelphia 76ers,0,,5
...,...,...,...,...
864,New Orleans Pelicans,0,,26
880,Portland Trail Blazers,0,,27
868,Oklahoma City Thunder,0,,28
869,Brooklyn Nets,0,,29


In [119]:
rf = RandomForestRegressor(n_estimators = 500, random_state = 5, min_samples_split=5)

mean_ap, aps, all_predictions = backtest(advanced_league_results, rf, years[24:], stats_pred) #We then use a RFM to predict the accuracy

In [120]:
mean_ap, aps, all_predictions = backtest(advanced_league_results, rf, years[24:], stats_pred) #We apply this to be backtested for each parameter from 24-30 years.

In [121]:
mean_ap #The mean_ap for rfh returns the same, with an accuracy of 0.8333333333333334!

0.8333333333333334