# NBA 4 Factors Analysis

In [None]:
from sklearn.linear_model import RidgeCV
from sklearn.linear_model import LinearRegression
import os, sys

sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath("__file__"))))
from nbafuns import *

# from sklearn.pipeline import make_pipeline
# model = make_pipeline(StandardScaler(with_mean=False), _RidgeGCV())

teams_dict, teams_list = get_teams(league="NBA")
data_DIR = "../fdata/boxscore_team/"

In [None]:
def get_ratings(season=2023):
    df1 = pd.read_csv(data_DIR+ f"NBA_BoxScores_4Factor_{season}.csv", index_col=0)
    df2 = pd.read_csv(data_DIR+ f"NBA_BoxScores_Adv_{season}.csv", index_col=0)
    cols = df2.columns.intersection(df1.columns).to_list()
    df3 = pd.merge(df2, df1, on=cols)
    cols = [
        "gameId",
        "teamName",
        "teamId",
        "effectiveFieldGoalPercentage",
        "teamTurnoverPercentage",
        "offensiveReboundPercentage",
        "freeThrowAttemptRate",
        "oppEffectiveFieldGoalPercentage",
        "oppTeamTurnoverPercentage",
        "oppOffensiveReboundPercentage",
        "oppFreeThrowAttemptRate",
        "offensiveRating",
        "defensiveRating",
        "netRating",
    ]
    df4 = df3[cols]
    df4 = df4.rename(
        columns={
            "effectiveFieldGoalPercentage": "eFG1",
            "teamTurnoverPercentage": "TOV1",
            "offensiveReboundPercentage": "OREB1",
            "freeThrowAttemptRate": "FTA1",
            "oppEffectiveFieldGoalPercentage": "eFG2",
            "oppTeamTurnoverPercentage": "TOV2",
            "oppOffensiveReboundPercentage": "OREB2",
            "oppFreeThrowAttemptRate": "FTA2",
            "offensiveRating": "ORtg",
            "defensiveRating": "DRtg",
            "netRating": "NRtg",
        }
    )
    data = df4.copy()
    return data

In [None]:
seasons = np.arange(2018, 2023, 1).astype(str)
dfa = []
for season in seasons:
    df = get_ratings(season=season)
    dfa.append(df)
data_t = pd.concat(dfa)
# data = data.iloc[::2]
data_p = get_ratings(season="2023")

In [None]:
X_t = data_t[
    ["eFG1", "TOV1", "OREB1", "FTA1", "eFG2", "TOV2", "OREB2", "FTA2"]
].to_numpy()

y_t = data_t["NRtg"].to_numpy()

X_p = data_p[
    ["eFG1", "TOV1", "OREB1", "FTA1", "eFG2", "TOV2", "OREB2", "FTA2"]
].to_numpy()

y_p = data_p["NRtg"].to_numpy()

In [None]:
alphas = np.logspace(-6, 6, 13)

In [None]:
reg = LinearRegression(fit_intercept=False)
# reg = RidgeCV(alphas=alphas, cv=10, fit_intercept=False)
model = reg.fit(X_t, y_t)
y_pf = reg.predict(X_p)
coef = model.coef_
print(model.coef_)
# print(model.alpha_)

In [None]:
coef[:4] / np.sum(abs(coef[:4])) * 100

In [None]:
slope, intercept, r, p, sterr = scipy.stats.linregress(x=y_p, y=y_pf)
r2 = r**2
fig, ax = plt.subplots(1, 1)
fig = sns.regplot(
    x=y_p, y=y_pf, color="black", scatter_kws={"color": "tab:blue"}, ax=ax
)
# fig = sns.regplot(x = y, y = y_pred,ax=ax)
ax.text(0.05, 0.9, r"$r^2=$" + f"{round(r2,4)}", transform=ax.transAxes)
plt.show()

# Kevins Luck Adjustments
LA_ORtg = 30.65711592+212.7311275*(`3PR_Team`*`3P%_Team`)+106.1847216*`2P%_Team`+64.39756164*`2PR_Team`+151.8564492*(`FT%_Team`*FTAR_Team)+.815155804*((1-`FG%_Team`)*`ORB%_Team`)-1.176866417*`TOV%_Team`-1.03961469*`TOV%_Lg`+126.1557339*(`3P%_Lg`*`3PR_Lg`)+.620188564*`ORB%_Lg`-115.5861051*`2PR_Lg`+128.3086605*(`2P%_Lg`*`2PR_Lg`)-93.57506625*`3PR_Lg`,
LA_DRtg = 30.65711592+212.7311275*(`3PR_Lg`*`3P%_Lg`)+106.1847216*`2P%_Lg`+64.39756164*`2PR_Lg`+151.8564492*(`FT%_Lg`*FTAR_Lg)+.815155804*((1-`FG%_Lg`)*`ORB%_Lg`)-1.176866417*`TOV%_Lg`-1.03961469*`TOV%_Opp`+126.1557339*(`3P%_Opp`*`3PR_Opp`)+.620188564*`ORB%_Opp`-115.5861051*`2PR_Opp`+128.3086605*(`2P%_Opp`*`2PR_Opp`)-93.57506625*`3PR_Opp`