# Leverage using PBP stats method

In [None]:
import os, sys

sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath("__file__"))))
from nbafuns import *
from sklearnex import patch_sklearn
patch_sklearn()
from sklearn.linear_model import RidgeCV, Ridge
from joblib import Parallel, delayed, parallel_backend

data_DIR = "../data/rapm/"
export_DIR = "./fdata/"
misc_DIR = "../data/misc/"
model_path = "../data/models/"
pbp_DIR = "../data/pbpdata/"

fig_DIR = "../figs/rapm/"

In [None]:
seasons = np.arange(2017,2024)
box = get_box("T","Base",False,seasons)
box = box.loc[box["matchup"].str.contains(" vs. ")]
# box["matchup"] = box["matchup"].str.replace(" ","")
box["matchup"] = box["matchup"].str.split(" vs. ",expand=True)[1]
box = box.rename(columns={"team_abbreviation":"home_team_abbrev", "matchup":"visit_team_abbrev","team_id":"tidh"})
box["win"] = np.where(box["wl"] == "W",1,0)
box = box[["game_id","tidh","home_team_abbrev","visit_team_abbrev","game_date","win"]]

In [None]:
odds = pd.read_parquet(misc_DIR + f"odds_2024.parquet")
odds["game_date"] = pd.to_datetime(odds["game_date"])
odds["net"] = odds["home_team_score"] - odds["visit_team_score"]
odds["win"] = np.where(odds["net"]>0,1,0)

In [None]:
oddl = odds.groupby("line")[["win"]].agg({"win":["sum","count"]})
oddl = oddl.reset_index()
oddl.columns = ["line","wins","tot"]
oddl["win_pct"] = oddl["wins"]/oddl["tot"]
oddl["win_pct"] = oddl["win_pct"].round(3)
oddl = oddl.rename(columns={"line":"spread"})

In [None]:
theme_idv = theme_xkcd(base_size=16)
theme_idv += theme(
    text=element_text(family=["Comic Sans MS"]),
    plot_title=element_text(face="bold", size=20),
    plot_caption=element_text(size=10,ha='left'),
)


In [None]:
slope, intercept, r, p, sterr = scipy.stats.linregress(x=oddl["spread"], y=oddl["win_pct"])
r2 = r**2
print(r2)
print(slope)
print(intercept)
slope = round(slope,4)
intercept = round(intercept,4)

In [None]:
p = (
    ggplot(oddl)
    + aes(x="spread",y="win_pct")
    + geom_point()
    + geom_smooth(method="lm")
    + geom_vline(xintercept=0,color="blue",linetype="dotted",size=1)
    + geom_hline(yintercept=0.5,color="blue",linetype="dotted",size=1)
    + scale_y_continuous(labels=percent_format())
    + annotate('text', x=13, y=0.98, label=f'r^2={r2:0.3f}', size=14)
    + annotate('text', x=9.8, y=0.86, label=f'Win %=100 ({slope}*Spread+{intercept})', size=10)
    + theme_idv
    + labs(
        title="Spread vs Win % (2017-2024)",
        caption="bsky:@sradjoker.cc | x:@SravanNBA | source: rotowire",
        x="Spread",
        y="Win %"
    )
)
p

In [None]:
odds["win_prob_pre"] = -0.0266*odds["line"]+0.491
odds["win_prob_pre"] = odds["win_prob_pre"].round(3)
winp1 = odds[["game_date","home_team_abbrev","visit_team_abbrev","win_prob_pre"]]
winp = pd.merge(box,winp1)
winp = winp[["game_id","win_prob_pre","win"]]
winp.columns = ["gid","win_prob_pre","win"]

In [None]:
# Define the PyTorch model
class PyTorchModel(nn.Module):
    def __init__(self,in_features=3,h1=12,h2=12,out_features=1):
        super().__init__() #instsantiate our nn.Module
        self.layer1 = nn.Linear(in_features, h1)
        self.layer2 = nn.Linear(h1, h2)
        self.output = nn.Linear(h2, out_features)
    
    def forward(self, x):
        x = Func.relu(self.layer1(x))
        x = Func.relu(self.layer2(x))
        x = Func.sigmoid(self.output(x))
        return x

In [None]:
# random seed
rr = 99

In [None]:
# Prepare your data
X = dfw[['margin', 'win_prob_pre', 'secs']].values
y = dfw['win'].values
from sklearn.model_selection import train_test_split
# Train Test Split
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2,random_state=11)
X_train = torch.FloatTensor(X_train)
X_test = torch.FloatTensor(X_test)
y_train = torch.FloatTensor(y_train).unsqueeze(1)
y_test = torch.FloatTensor(y_test).unsqueeze(1)
inputs = torch.FloatTensor(X)
eval = torch.FloatTensor(y).unsqueeze(1)


In [None]:
torch.manual_seed(23)
# Initialize the model
model = PyTorchModel()
# Define loss and optimizer
# criterion = nn.BCELoss()
criterion = nn.BCELoss()
# Choose Adam Optimizer, lr = learning rate
# optimizer = optim.RMSprop(model.parameters(), lr=1e-3)
optimizer = torch.optim.Adam(model.parameters(),lr=0.001)
# Training loop
num_epochs = 200
losses = []
for epoch in range(num_epochs):
    y_pred = model.forward(X_train)
    loss = criterion(y_pred, y_train)
    losses.append(loss.detach().numpy())
    if epoch % 10 == 0:
        print(f'Epoch {epoch+1}/{num_epochs}, Loss: {loss}')
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    clear_output(wait=True)

In [None]:
plt.plot(range(num_epochs),losses)
plt.ylim([0.6,0.8])
plt.show()

In [None]:
with torch.no_grad():
    y_eval = model.forward(X_test)
    loss = criterion(y_eval,y_test)
print(loss)

In [None]:
# Print model's state_dict
print("Model's state_dict:")
for param_tensor in model.state_dict():
    print(param_tensor, "\t", model.state_dict()[param_tensor].size())

# Print optimizer's state_dict
print("Optimizer's state_dict:")
for var_name in optimizer.state_dict():
    print(var_name, "\t", optimizer.state_dict()[var_name])

In [None]:
torch.save(model.state_dict(), model_path +"win_prob_dict_2")

In [None]:
torch.save(model, model_path +"win_prob_2.pt")

In [None]:
model1 = PyTorchModel()
model1.load_state_dict(torch.load(model_path +"win_prob_dict_2", weights_only=True))
model1.eval()

In [None]:
# Model class must be defined somewhere
model1 = torch.load( model_path +"win_prob_2.pt", weights_only=False)
model1.eval()

In [None]:
# inputs = torch.tensor(dfw[['margin', 'win_prob', 'secs']].values, dtype=torch.float32)
# targets = torch.tensor(dfw['win'].values, dtype=torch.float32).unsqueeze(1)
# dataset = TensorDataset(inputs, targets)
# dataloader = DataLoader(dataset, batch_size=512, shuffle=True)

# for epoch in range(num_epochs):
#     for batch_inputs, batch_targets in dataloader:
#         optimizer.zero_grad()
#         outputs = model(batch_inputs)
#         loss = criterion(outputs, batch_targets)
#         loss.backward()
#         optimizer.step()
#     print(f'Epoch {epoch+1}/{num_epochs}, Loss: {loss.item()}')

# # Print the model summary
# print(model)