# Code to Adjust Offensive, Defensive and Net Ratings for Strength of Schedule

In [None]:
from sklearn.linear_model import RidgeCV
import os, sys

sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath("__file__"))))
from nbafuns import *

teams_dict, teams_list = get_teams(league="NBA")
box_DIR = "../data/box/"
img_DIR_T = "../data/images/teams/"
fig_DIR = "../figs/teams/"

In [None]:
def get_ratings(season,enddate):
    cols = [
        "gameId",
        "teamName",
        "teamId",
        "offensiveRating",
        "defensiveRating",
        "netRating",
        "possessions",
    ]
    df = pd.read_parquet(box_DIR + f"NBA_Box_T_Adv_{season}.parquet", columns=cols)
    cols = ["gameId", "team", "tId", "ORtg", "DRtg", "NRtg", "poss"]
    df.columns = cols
    df1 = df.groupby("gameId")
    df1_1 = df1.nth(0)
    df1_2 = df1.nth(1)
    df1_1.columns = ["gameId"] + [s + "1" for s in df1_1.columns if s != "gameId"]
    df1_2.columns = ["gameId"] + [s + "2" for s in df1_2.columns if s != "gameId"]
    df1_3 = pd.merge(df1_1, df1_2, on="gameId")
    df1_4 = df1.nth(1)
    df1_5 = df1.nth(0)
    df1_4.columns = ["gameId"] + [s + "1" for s in df1_4.columns if s != "gameId"]
    df1_5.columns = ["gameId"] + [s + "2" for s in df1_5.columns if s != "gameId"]
    df1_6 = pd.merge(df1_4, df1_5, on="gameId")
    df2 = pd.concat([df1_3, df1_6]).sort_values(by="gameId").reset_index(drop=True)
    data1 = df2.copy()
    df10 = pd.read_parquet(box_DIR + f"NBA_Box_T_Base_{season}.parquet")
    startdate = (df10["GAME_DATE"].iloc[0] + dt.timedelta(days=21))
    ddd = [(startdate + dt.timedelta(days=x)).strftime("%Y-%m-%d") for x in range((enddate-startdate).days + 1)]
    datal =[]
    for dd in ddd:
        dfl1 = df10[(df10["GAME_DATE"] < dd) & ( df10["GAME_DATE"] >= pd.to_datetime(dd) + dt.timedelta(days=-21)) ]
        dfl1["HOME"] = ~dfl1["MATCHUP"].str.contains("@")
        dfl1["tId1"] = dfl1["TEAM_ID"]
        dfl1["gameId"] = dfl1["GAME_ID"]
        dfl2 = dfl1[["gameId", "tId1", "HOME"]].sort_values(by="gameId").reset_index(drop=True)
        dfl2[["gameId", "tId1"]] = dfl2[["gameId", "tId1"]].astype(int)
        dfl3 = pd.merge(data1, dfl2)
        datal.append(dfl3)
    return datal,ddd

In [None]:
def process_results(data, results_adj):
    data["pts1"] = data["ORtg1"] * data["poss1"]
    data["pts2"] = data["DRtg1"] * data["poss1"]
    off_prior = data.groupby(["tId1"])[["poss1", "pts1"]].agg("sum").reset_index()
    def_prior = data.groupby(["tId1"])[["poss1", "pts2"]].agg("sum").reset_index()
    off_prior["OFF"] = off_prior["pts1"] / off_prior["poss1"]
    off_prior = off_prior[["tId1", "OFF"]]
    def_prior["DEF"] = def_prior["pts2"] / def_prior["poss1"]
    def_prior = def_prior[["tId1", "DEF"]]
    results_net = pd.merge(off_prior, def_prior, on=["tId1"])
    results_net["NET"] = results_net["OFF"] - results_net["DEF"]
    results_net.rename(columns={"tId1": "tId"}, inplace=True)
    results_net = results_net.astype(float).round(2)
    results_net["tId"] = results_net["tId"].astype(int)
    ortg_mean = data["pts1"].sum() / data["poss1"].sum()
    drtg_mean = data["pts2"].sum() / data["poss1"].sum()
    results_adj["tId"] = results_adj["tId"].astype(int)
    results_comb = pd.merge(results_net, results_adj, on=["tId"])
    results_comb["aOFF"] = results_comb["aOFF"]
    results_comb["aDEF"] = results_comb["aDEF"]
    results_comb["oSOS"] = results_comb["aOFF"] - results_comb["OFF"]
    results_comb["dSOS"] = results_comb["DEF"] - results_comb["aDEF"]
    results_comb["SOS"] = results_comb["oSOS"] + results_comb["dSOS"]
    results_comb.iloc[:, 1:] = results_comb.iloc[:, 1:].round(2)
    results = results_comb[
        ["Team", "OFF", "oSOS", "aOFF", "DEF", "dSOS", "aDEF", "NET", "SOS", "aNET"]
    ]
    # results = results_comb[["Team","OFF","DEF","NET","aOFF","aDEF","aNET"]]
    results = results.sort_values(by="aNET", ascending=0).reset_index(drop=True)
    return results, ortg_mean, drtg_mean

In [None]:
def map_teams(row_in, teams, scale):
    t1 = row_in[0]
    t2 = row_in[1]

    rowOut = np.zeros([len(teams) * 2])
    rowOut[teams.index(t1)] = scale
    rowOut[teams.index(t2) + len(teams)] = scale

    return rowOut


def convert_to_matricies(possessions, name, teams, scale=1):
    # extract only the columns we need
    # Convert the columns of player ids into a numpy matrix
    stints_x_base = possessions[["tId1", "tId2"]].to_numpy()
    # Apply our mapping function to the numpy matrix
    stint_X_rows = np.apply_along_axis(map_teams, 1, stints_x_base, teams, scale=scale)
    # Convert the column of target values into a numpy matrix
    stint_Y_rows = possessions[name].to_numpy()

    # return matricies and possessions series
    return stint_X_rows, stint_Y_rows


# Convert lambda value to alpha needed for ridge CV


def lambda_to_alpha(lambda_value, samples):
    return (lambda_value * samples) / 2.0


# Convert RidgeCV alpha back into a lambda value


def alpha_to_lambda(alpha_value, samples):
    return (alpha_value * 2.0) / samples


def calculate_netrtg(train_x, train_y, lambdas, teams_list):
    alphas = [lambda_to_alpha(l, train_x.shape[0]) for l in lambdas]
    # create a 5 fold CV ridgeCV model. Our target data is not centered at 0, so we want to fit to an intercept.
    clf = RidgeCV(alphas=alphas, cv=5, fit_intercept=True)

    # fit our training data
    model = clf.fit(
        train_x,
        train_y,
    )

    # convert our list of players into a mx1 matrix
    team_arr = np.transpose(np.array(teams_list).reshape(1, len(teams_list)))

    # extract our coefficients into the offensive and defensive parts
    coef_offensive_array = model.coef_[0 : len(teams_list)][np.newaxis].T
    coef_defensive_array = model.coef_[len(teams_list) : 2 * len(teams_list)][
        np.newaxis
    ].T
    # concatenate the offensive and defensive values with the playey ids into a mx3 matrix
    team_id_with_coef = np.concatenate(
        [team_arr, coef_offensive_array, coef_defensive_array], axis=1
    )
    # build a dataframe from our matrix
    teams_coef = pd.DataFrame(team_id_with_coef)
    intercept = model.intercept_
    teams_coef.columns = ["tId", "aOFF", "aDEF"]
    teams_coef["aNET"] = teams_coef["aOFF"] - teams_coef["aDEF"]
    teams_coef["aOFF"] = teams_coef["aOFF"] + intercept
    teams_coef["aDEF"] = teams_coef["aDEF"] + intercept
    teams_coef["Team"] = teams_coef["tId"].map(teams_dict)
    results = teams_coef[["tId", "Team", "aOFF", "aDEF", "aNET"]]
    results = results.sort_values(by=["aNET"], ascending=False).reset_index(drop=True)
    return results, model, intercept

In [None]:
season = 2024
enddate = datetime.today()#.strftime("%Y-%m-%d")
datal,dates = get_ratings(season,enddate)

In [None]:
datar = datal[-1]
len(datar)/2

In [None]:
dfa = []
for datar,dd in zip(datal,dates) :
    train_x, train_y = convert_to_matricies(datar, "ORtg1", teams_list, scale=1 / 2)
    n = 1.5/2
    lambdas_net = [0.001 * n, 0.005 * n, 0.01 * n]
    results_adj, model, intercept = calculate_netrtg(
        train_x, train_y, lambdas_net, teams_list
    )
    results, ortg_mean, drtg_mean = process_results(datar, results_adj)
    results["OFF_R"] = results["OFF"].rank(ascending=False  ,method="first").astype(int)
    results["DEF_R"] = results["DEF"].rank(ascending=True   ,method="first").astype(int)
    results["NET_R"] = results["NET"].rank(ascending=False  ,method="first").astype(int)
    results["aOFF_R"] = results["aOFF"].rank(ascending=False,method="first").astype(int)
    results["aDEF_R"] = results["aDEF"].rank(ascending=True ,method="first").astype(int)
    results["aNET_R"] = results["aNET"].rank(ascending=False,method="first").astype(int)
    results["Date"] = pd.to_datetime(dd)
    dfa.append(results)
res_all = pd.concat(dfa)

In [None]:
res_all

In [None]:
df_teams = pd.read_csv("../data/NBA_teams_colors_logos.csv")
# df_teams = df_teams.rename(columns={"nameTeam": "Team"})
data = pd.merge(res_all, df_teams)
data["image"] = img_DIR_T + data["Team"] + ".png"
data["label"] ="#"+ data["aNET_R"].astype(str)
today = datetime.today().strftime("%B %d, %Y")

In [None]:
team = "Memphis Grizzlies"
dft = data.query(f"Team == '{team}'")
len(dft)

## Adjusted Offense Rank

In [None]:
p = (
    ggplot(dft)
    + aes(x="Date",y="OFF_R",group=0)
    + geom_step()
    + geom_point()
    + geom_smooth(se=False,span=0.4,color="blue")
    + scale_y_reverse(limits=(0,30))
    + scale_x_date(date_labels = "%b-%d",date_breaks = "1 month")
    + theme_idv
    + theme(
        figure_size=(8,4.5),
        panel_grid_major_x=element_line(linetype="dotted",size=0.2),
        panel_grid_minor_x=element_line(linetype="dotted",size=0.2),
    )
    + labs(
        title = f"{team}: Adjusted Offense Rank",
        subtitle="Rating adjusted for strength of schedule (3 weeks)",
        y="Offense Rank"
    )
    + pnba
)
p

## Adjusted Defense Rank

In [None]:
p = (
    ggplot(dft)
    + aes(x="Date",y="aDEF_R",group=0)
    + geom_step()
    + geom_point()
    + geom_smooth(se=False,span=0.4,color="blue")
    + scale_y_reverse(limits=(0,30))
    + scale_x_date(date_labels = "%b-%d",date_breaks = "1 month")
    + theme_idv
    + theme(
        figure_size=(8,4.5),
        panel_grid_major_x=element_line(linetype="dotted",size=0.2),
        panel_grid_minor_x=element_line(linetype="dotted",size=0.2),
    )
    + labs(
        title = f"{team}: Adjusted Defense Rank",
        subtitle="Rating adjusted for strength of schedule (3 weeks)",
        y="Defense Rank"
    )
    + pnba
)
p

In [None]:
p = (
    ggplot(dft)
    + aes(x="Date",y="aNET_R",group=0)
    + geom_step()
    + geom_point()
    + geom_smooth(se=False,span=0.4,color="blue")
    + scale_y_reverse(limits=(0,30))
    + scale_x_date(date_labels = "%b-%d",date_breaks = "1 month")
    + theme_idv
    + theme(
        figure_size=(8,4.5),
        panel_grid_major_x=element_line(linetype="dotted",size=0.2),
        panel_grid_minor_x=element_line(linetype="dotted",size=0.2),
    )
    + labs(
        title = f"{team}: Adjusted Net Rating Rank",
        subtitle="Rating adjusted for strength of schedule (3 weeks)",
        y="Net Rating Rank"
    )
    + pnba
)
p