# Home Court Advantage Analysis

In [None]:
from sklearn.linear_model import RidgeCV
import os, sys

sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath("__file__"))))
from nbafuns import *

# from sklearn.pipeline import make_pipeline
# model = make_pipeline(StandardScaler(with_mean=False), _RidgeGCV())

teams_dict, teams_list = get_teams(league="NBA")
data_DIR = "../data/boxscore_team/"

In [None]:
def get_ratings(season=2023):
    df1 = pd.read_csv(data_DIR+ f"NBA_BoxScores_Adv_{season}.csv")
    cols = [
        "gameId",
        "teamName",
        "teamId",
        "offensiveRating",
        "defensiveRating",
        "netRating",
        "possessions",
    ]
    df2 = df1[cols]
    df2.iloc[:, 2:] = df2.iloc[:, 2:].astype(str)
    df3 = df2.groupby("gameId")[cols[1:]].agg(", ".join).reset_index()
    df4 = df3.copy()
    df4[["team1", "team2"]] = df3["teamName"].str.split(",", expand=True)
    df4[["tId1", "tId2"]] = df3["teamId"].str.split(",", expand=True)
    df4[["ORtg1", "ORtg2"]] = df3["offensiveRating"].str.split(",", expand=True)
    df4[["DRtg1", "DRtg2"]] = df3["defensiveRating"].str.split(",", expand=True)
    df4[["NRtg1", "NRtg2"]] = df3["netRating"].str.split(",", expand=True)
    df4[["poss1", "poss2"]] = df3["possessions"].str.split(",", expand=True)
    df4 = df4.drop(columns=cols[1:])
    df5 = df3.copy()
    df5[["team2", "team1"]] = df3["teamName"].str.split(",", expand=True)
    df5[["tId2", "tId1"]] = df3["teamId"].str.split(",", expand=True)
    df5[["ORtg2", "ORtg1"]] = df3["offensiveRating"].str.split(",", expand=True)
    df5[["DRtg2", "DRtg1"]] = df3["defensiveRating"].str.split(",", expand=True)
    df5[["NRtg2", "NRtg1"]] = df3["netRating"].str.split(",", expand=True)
    df5[["poss2", "poss1"]] = df3["possessions"].str.split(",", expand=True)
    df5 = df5.drop(columns=cols[1:])
    df6 = pd.concat([df4, df5]).sort_values(by="gameId").reset_index(drop=True)
    df6.iloc[:, 5:] = df6.iloc[:, 5:].astype(float)
    df6.iloc[:, 3:5] = df6.iloc[:, 3:5].astype(int)
    data1 = df6.copy()
    stats = leaguegamelog.LeagueGameLog(
        player_or_team_abbreviation="T",
        season=season,
        season_type_all_star="Regular Season",
    )
    df10 = stats.get_data_frames()[0]
    df10["HOME"] = ~df10["MATCHUP"].str.contains("@")
    df10["tId1"] = df10["TEAM_ID"]
    df10["gameId"] = df10["GAME_ID"]
    df11 = (
        df10[["gameId", "tId1", "HOME"]].sort_values(by="gameId").reset_index(drop=True)
    )
    df11[["gameId", "tId1"]] = df11[["gameId", "tId1"]].astype(int)
    data = pd.merge(data1, df11)
    return data

In [None]:
data = get_ratings(2023)

In [None]:
df1 = data[data["HOME"]].reset_index(drop=True)
df2 = data[~data["HOME"]].reset_index(drop=True)

In [None]:
df = df1
df["pts1"] = df["ORtg1"] * df["poss1"]
df["pts2"] = df["DRtg1"] * df["poss1"]
off_prior = df.groupby(["tId1"])[["poss1", "pts1"]].agg("sum").reset_index()
def_prior = df.groupby(["tId1"])[["poss1", "pts2"]].agg("sum").reset_index()
off_prior["OFF"] = off_prior["pts1"] / off_prior["poss1"]
off_prior = off_prior[["tId1", "OFF"]]
def_prior["DEF"] = def_prior["pts2"] / def_prior["poss1"]
def_prior = def_prior.rename(columns={"poss1": "poss"})
def_prior = def_prior[["tId1", "DEF", "poss"]]
results_net = pd.merge(off_prior, def_prior, on=["tId1"])
results_net["NET"] = results_net["OFF"] - results_net["DEF"]
results_net.rename(columns={"tId1": "tId"}, inplace=True)
results_net = results_net.astype(float).round(2)
results_net["tId"] = results_net["tId"].astype(int)
results_net["Team"] = results_net["tId"].map(teams_dict)
df_home = results_net
df = df2
df["pts1"] = df["ORtg1"] * df["poss1"]
df["pts2"] = df["DRtg1"] * df["poss1"]
off_prior = df.groupby(["tId1"])[["poss1", "pts1"]].agg("sum").reset_index()
def_prior = df.groupby(["tId1"])[["poss1", "pts2"]].agg("sum").reset_index()
off_prior["OFF"] = off_prior["pts1"] / off_prior["poss1"]
off_prior = off_prior[["tId1", "OFF"]]
def_prior["DEF"] = def_prior["pts2"] / def_prior["poss1"]
def_prior = def_prior.rename(columns={"poss1": "poss"})
def_prior = def_prior[["tId1", "DEF", "poss"]]
results_net = pd.merge(off_prior, def_prior, on=["tId1"])
results_net["NET"] = results_net["OFF"] - results_net["DEF"]
results_net.rename(columns={"tId1": "tId"}, inplace=True)
results_net = results_net.astype(float).round(2)
results_net["tId"] = results_net["tId"].astype(int)
df_away = results_net

In [None]:
s = df_home.iloc[:, -1]
df_home = pd.concat([s, df_home.iloc[:, :-1]], axis=1)
df_ha = pd.merge(df_home, df_away, on="tId", suffixes=("_h", "_a"))

In [None]:
home_adv = (
    df_home.apply(lambda x: x["OFF"] * x["poss"], axis=1).sum() / df_home["poss"].sum()
    - df_home.apply(lambda x: x["DEF"] * x["poss"], axis=1).sum()
    / df_home["poss"].sum()
)
away_adv = (
    df_away.apply(lambda x: x["OFF"] * x["poss"], axis=1).sum() / df_away["poss"].sum()
    - df_away.apply(lambda x: x["DEF"] * x["poss"], axis=1).sum()
    / df_away["poss"].sum()
)
print(home_adv)
print(away_adv)
print(home_adv - away_adv)

In [None]:
df_ha["Home_m_Away"] = (
    2
    * ((df_ha["NET_h"] * df_ha["poss_h"]) - (df_ha["NET_a"] * df_ha["poss_a"]))
    / (df_ha["poss_h"] + df_ha["poss_a"])
)
df_ha["Home_m_Away"] = df_ha["Home_m_Away"].round(2)
df_ha["Home_m_Away2"] = df_ha["NET_h"] - df_ha["NET_a"]
df_teams = pd.read_csv("../data/NBA_teams_colors_logos.csv")
df_teams = df_teams.rename(columns={"nameTeam": "Team"})
results_plot = pd.merge(df_ha, df_teams)

In [None]:
df_ha.sort_values(by="Home_m_Away", ascending=False)

In [None]:
%reload_ext rpy2.ipython

In [None]:
%%R -i results_plot
results <- results_plot
library(tidyverse)
library(ggimage)
library(ggrepel)
theme_owen <- function() {
  theme_minimal(base_size = 16, base_family = "Consolas") %+replace%
    theme(
      panel.grid.minor = element_blank(),
      plot.background = element_rect(fill = "ghostwhite", color = "ghostwhite")
    )
}
p <- ggplot(
  results,
  aes(x = NET_h, y = NET_a)) +
#   # geom_point(aes(size = aNRtg_Rank)) +
# #   scale_y_reverse() +
  geom_hline(yintercept = 0, color = "black") +
  geom_vline(xintercept = 0, color = "black") +
  geom_abline(intercept = -5.65, slope = 1, color = "blue",size = 2) +
# #   geom_abline(slope = -1,color="black")+
  geom_image(
    aes(
      x = NET_h, y = NET_a,
      image = urlThumbnailTeam
    ),
    size = 0.1
  ) +
#   # geom_text(nudge_x = 1.3, nudge_y = 0, size = 6,check_overlap = TRUE) +
#   # geom_text_repel(nudge_x = 1.1, nudge_y = 0.5,size=6,min.segment.length=10) +
#   # geom_label(nudge_x = 1.3, nudge_y = 0, size = 6) +
  theme_owen() +
  theme(
    plot.title.position = "plot",
    plot.title = element_text(face = "bold", size = 24, hjust = 0.5),
    plot.margin = margin(10, 10, 15, 10),
    plot.subtitle = element_text(size = 18),
    plot.caption = element_text(size = 14)
  ) +
  theme(
    axis.text.x = element_text(size = 14, face = "bold", color = "black"),
    axis.text.y = element_text(size = 14, face = "bold", color = "black"),
    axis.title.x = element_text(size = 18, face = "bold", colour = "black"),
    axis.title.y = element_text(size = 18, face = "bold", colour = "black")
  ) +
  labs(
    title = paste0("Net Ratings Home vs Away as of ", format(Sys.Date(), format = "%B %d, %Y")),
    x = "Home Net Rating", y = "Away Net Rating",
    subtitle = "The Farther the team is from the blue line, more extreme home vs away differential \nAverage team is 5 points better at home vs away",
    caption = "@sradjoker"
  )
ggsave("../figs/team_ratings/TRatings_Home_Away.png", p, w = 10 * 1.5, h = 8 * 1.5, dpi = 300)