# Code to Adjust Offensive, Defensive and Net Ratings for Strength of Schedule
## Trying more stuff

In [None]:
from sklearn.linear_model import RidgeCV
import os, sys

sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath("__file__"))))
from nbafuns import *

# from sklearn.pipeline import make_pipeline
# model = make_pipeline(StandardScaler(with_mean=False), _RidgeGCV())

teams_dict, teams_list = get_teams(league="NBA")
box_DIR = "../data/box/"

In [None]:
def time_decay(X):
    X = X.to_numpy()
    y = np.zeros(np.size(X))
    for i,x in enumerate(X):
        yj=0
        for j in range(i):
            xj = x-X[j]
            yj += np.exp(-(x-X[j]))
        y[i] = yj
    return y

In [None]:
def get_ratings(season=2024,rest = 1):
    cols = [
        "gameId",
        "teamName",
        "teamId",
        "offensiveRating",
        "defensiveRating",
        "netRating",
        "possessions",
    ]
    df1 = pd.read_parquet(box_DIR + f"NBA_Box_T_Adv_{season}.parquet", columns=cols)
    cols = ["gameId", "team", "tId", "ORtg", "DRtg", "NRtg", "poss"]
    df1.columns = cols
    df2 = pd.read_parquet(box_DIR + f"NBA_Box_T_Base_{season}.parquet")
    df2["home"] = ~df2["MATCHUP"].str.contains("@")
    df2["tId"] = df2["TEAM_ID"]
    df2["gameId"] = df2["GAME_ID"]
    df2["date"] = df2["GAME_DATE"]
    df2 = (
            df2[["date","gameId", "tId", "home"]]
            .sort_values(by=["date","gameId", "tId", "home"])
            .reset_index(drop=True)
        )
    df2["gameId"] = df2["gameId"].astype(int)
    df3 = pd.merge(df1,df2)
    df4 = df3.sort_values(by=["date"])
    df4["date_d"] = (df4["date"] - df4["date"].iloc[0]).dt.days
    df4g = df4.groupby("tId")
    keys = list(df4g.groups)
    dfa = []
    for key in keys:
        df5 = df4g.get_group(key)
        df5["rest"] = time_decay(df5['date_d'])
        df5["rest"] = df5["rest"].round(5)
        dfa.append(df5)
    df6 = pd.concat(dfa)
    df7 = df6.groupby("gameId")
    df7_1 = df7.nth(0)
    df7_2 = df7.nth(1)
    df7_1.columns = ["gameId"] + [s + "1" for s in df7_1.columns if s != "gameId"]
    df7_2.columns = ["gameId"] + [s + "2" for s in df7_2.columns if s != "gameId"]
    df7_3 = pd.merge(df7_1, df7_2, on="gameId")
    df7_4 = df7.nth(1)
    df7_5 = df7.nth(0)
    df7_4.columns = ["gameId"] + [s + "1" for s in df7_4.columns if s != "gameId"]
    df7_5.columns = ["gameId"] + [s + "2" for s in df7_5.columns if s != "gameId"]
    df7_6 = pd.merge(df7_4, df7_5, on="gameId")
    df8 = pd.concat([df7_3, df7_6]).sort_values(by="date1").reset_index(drop=True)
    df9 = df8.copy()
    df9["pts1"] = df9["ORtg1"] * df9["poss1"] 
    df9["pts2"] = df9["DRtg1"] * df9["poss1"]
    off_all = (df9["pts1"].sum() / df9["poss1"].sum()).round(3)
    def_all = (df9["pts2"].sum() / df9["poss1"].sum()).round(3)
    df9 = df8.query("home1").reset_index(drop=True)
    df9["pts1"] = df9["ORtg1"] * df9["poss1"] 
    df9["pts2"] = df9["DRtg1"] * df9["poss1"]
    off_home = (df9["pts1"].sum() / df9["poss1"].sum()).round(3)
    def_home = (df9["pts2"].sum() / df9["poss1"].sum()).round(3)
    off_adv = off_home - off_all
    def_adv = def_home - def_all
    df8["Prior"] = (
        - rest* df8["rest1"]  
        + rest* df8["rest2"] 
        + np.where(df8["home1"],1,-1)*off_adv
        + np.where(df8["home2"],1,-1)*def_adv
    ).round(3)

    return df8

In [None]:
def process_results(data, results_adj):
    data["pts1"] = data["ORtg1"] * data["poss1"]
    data["pts2"] = data["DRtg1"] * data["poss1"]
    off_prior = data.groupby(["tId1"])[["poss1", "pts1"]].agg("sum").reset_index()
    def_prior = data.groupby(["tId1"])[["poss1", "pts2"]].agg("sum").reset_index()
    off_prior["OFF"] = off_prior["pts1"] / off_prior["poss1"]
    off_prior = off_prior[["tId1", "OFF"]]
    def_prior["DEF"] = def_prior["pts2"] / def_prior["poss1"]
    def_prior = def_prior[["tId1", "DEF"]]
    results_net = pd.merge(off_prior, def_prior, on=["tId1"])
    results_net["NET"] = results_net["OFF"] - results_net["DEF"]
    results_net.rename(columns={"tId1": "tId"}, inplace=True)
    results_net = results_net.astype(float).round(2)
    results_net["tId"] = results_net["tId"].astype(int)
    ortg_mean = data["pts1"].sum() / data["poss1"].sum()
    drtg_mean = data["pts2"].sum() / data["poss1"].sum()
    results_adj["tId"] = results_adj["tId"].astype(int)
    results_comb = pd.merge(results_net, results_adj, on=["tId"])
    # results_comb["aOFF"] = results_comb["aOFF"] #+ results_comb["OFF"]
    # results_comb["aDEF"] = results_comb["aDEF"] #+ results_comb["DEF"]
    # results_comb["aNET"] = results_comb["aNET"] #+ results_comb["NET"]
    results_comb["oSOS"] = results_comb["aOFF"] - results_comb["OFF"]
    results_comb["dSOS"] = results_comb["DEF"] - results_comb["aDEF"]
    results_comb["SOS"] = results_comb["oSOS"] + results_comb["dSOS"]
    results_comb.iloc[:, 1:] = results_comb.iloc[:, 1:].round(1)
    results = results_comb[
        ["Team", "OFF", "oSOS", "aOFF", "DEF", "dSOS", "aDEF", "NET", "SOS", "aNET"]
    ]
    # results = results_comb[["Team","OFF","DEF","NET","aOFF","aDEF","aNET"]]
    results = results.sort_values(by="aNET", ascending=0).reset_index(drop=True)
    return results, ortg_mean, drtg_mean

In [None]:
def map_teams(row_in, teams, scale):
    t1 = row_in[0]
    t2 = row_in[1]

    rowOut = np.zeros([len(teams) * 2])
    rowOut[teams.index(t1)] = scale
    rowOut[teams.index(t2) + len(teams)] = scale

    return rowOut


def convert_to_matricies(possessions, name, teams, prior, scale=1):
    # extract only the columns we need
    # Convert the columns of player ids into a numpy matrix
    stints_x_base = possessions[["tId1", "tId2"]].to_numpy()
    # Apply our mapping function to the numpy matrix
    stint_X_rows = np.apply_along_axis(map_teams, 1, stints_x_base, teams, scale=scale)
    # Convert the column of target values into a numpy matrix
    # Convert the column of target values into a numpy matrix
    stint_Y_rows_before = possessions[name].to_numpy()
    stint_Y_rows = stint_Y_rows_before - prior

    # return matricies and possessions series
    return stint_X_rows, stint_Y_rows


# Convert lambda value to alpha needed for ridge CV


def lambda_to_alpha(lambda_value, samples):
    return (lambda_value * samples) / 2.0


# Convert RidgeCV alpha back into a lambda value


def alpha_to_lambda(alpha_value, samples):
    return (alpha_value * 2.0) / samples


def calculate_netrtg(train_x, train_y, lambdas, teams_list, prior):
    alphas = [lambda_to_alpha(l, train_x.shape[0]) for l in lambdas]
    # create a 5 fold CV ridgeCV model. Our target data is not centered at 0, so we want to fit to an intercept.
    clf = RidgeCV(alphas=alphas, cv=5, fit_intercept=True)

    # fit our training data
    model = clf.fit(
        train_x,
        train_y,
    )

    # convert our list of players into a mx1 matrix
    team_arr = np.transpose(np.array(teams_list).reshape(1, len(teams_list)))

    # extract our coefficients into the offensive and defensive parts
    coef_ = model.coef_ 
    coef_offensive_array = coef_[0 : len(teams_list)][np.newaxis].T
    coef_defensive_array = coef_[len(teams_list) : 2 * len(teams_list)][
        np.newaxis
    ].T
    # concatenate the offensive and defensive values with the playey ids into a mx3 matrix
    team_id_with_coef = np.concatenate(
        [team_arr, coef_offensive_array, coef_defensive_array], axis=1
    )
    # build a dataframe from our matrix
    teams_coef = pd.DataFrame(team_id_with_coef)
    intercept = model.intercept_
    teams_coef.columns = ["tId", "aOFF", "aDEF"]
    teams_coef["aNET"] = teams_coef["aOFF"] - teams_coef["aDEF"]
    teams_coef["aOFF"] = teams_coef["aOFF"] + intercept
    teams_coef["aDEF"] = teams_coef["aDEF"] + intercept
    teams_coef["Team"] = teams_coef["tId"].map(teams_dict)
    results = teams_coef[["tId", "Team", "aOFF", "aDEF", "aNET"]]
    results = results.sort_values(by=["aNET"], ascending=False).reset_index(drop=True)
    return results, model, intercept

In [None]:
# seasons = np.arange(2010,2024,1).astype(str)
# dfa = []
# for season in seasons:
#     df = get_ratings(season)
#     dfa.append(df)
# data = pd.concat(dfa)

In [None]:
data = get_ratings(2023, rest=10)
prior = data["Prior"].to_numpy()
train_x, train_y = convert_to_matricies(data, "ORtg1", teams_list, prior, scale = 1/2)
n = 1.5
lambdas_net = [0.001 * n, 0.005 * n, 0.01 * n]
results_adj, model, intercept = calculate_netrtg(
    train_x, train_y, lambdas_net, teams_list, prior
)
results, ortg_mean, drtg_mean = process_results(data, results_adj)
print(intercept)
results[["Team","NET","aNET","SOS"]].sort_values("SOS",ascending=True)

In [None]:
xcvxcv

In [None]:
var1 = "NET"
# var1 = "OFF"
# var1 = "DEF"
var2 = "a" + var1
slope, intercept, r, p, sterr = scipy.stats.linregress(x=results[var1], y=results[var2])
r2 = r**2
fig, ax = plt.subplots(1, 1)
fig = sns.regplot(
    x=var1,
    y=var2,
    data=results,
    color="black",
    scatter_kws={"color": "tab:blue"},
    ax=ax,
)
ax.text(0.05, 0.9, r"$r^2=$" + f"{round(r2,4)}", transform=ax.transAxes)
ax.set_title("Adjusted Net Ratings vs Unadjusted")
plt.savefig("../figs/team_leaders/aNET_R2_1.png")
plt.show()

In [None]:
results["OFF_R"] = results["OFF"].rank(ascending=False).astype(int)
results["DEF_R"] = results["DEF"].rank(ascending=True).astype(int)
results["NET_R"] = results["NET"].rank(ascending=False).astype(int)
results["aOFF_R"] = results["aOFF"].rank(ascending=False).astype(int)
results["aDEF_R"] = results["aDEF"].rank(ascending=True).astype(int)
results["aNET_R"] = results["aNET"].rank(ascending=False).astype(int)

In [None]:
df_teams = pd.read_csv("../data/NBA_teams_colors_logos.csv")
df_teams = df_teams.rename(columns={"nameTeam": "Team"})
results_plot = pd.merge(results, df_teams)