In [None]:
import pandas as pd
import numpy as np
from tqdm import tqdm
from nba_api.stats.endpoints import leaguegamefinder

In [None]:
epm = pd.read_csv('epm_data.csv')

In [None]:
epm["epm_per_poss"] = epm["epm"] / 100

In [None]:
"""
League average pace is 99.1 possessions per 48 minutes, which is sufficiently
close to a possession per minute.
"""

'\nLeague average pace is 99.1 possessions per 48 minutes, which is sufficiently\nclose to a possession per minute.\n'

In [None]:
epm["epm_per_min"] = epm["epm_per_poss"]

In [None]:
all_shots = pd.read_csv('pbpstats-tracking-shots.csv', engine='c')

In [None]:
made_shots = all_shots[all_shots["Made"] == True]

In [None]:
made_shots_not_4 = made_shots[~made_shots["GameId"].astype(str).str.startswith('4')]

In [None]:
game_data = pd.read_csv(r'game_log.csv', engine='c')

In [None]:
data = leaguegamefinder.LeagueGameFinder()

In [None]:
game_data = data.get_data_frames()[0]

In [None]:
data[data["GAME_ID"] == "0022000628"]

Unnamed: 0,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,PTS,...,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS
21975,22020,1610612756,PHX,Phoenix Suns,22000628,2021-03-18,PHX vs. MIN,L,240,119,...,0.737,9,38,47,29,6,6,22,30,-4.0
21981,22020,1610612750,MIN,Minnesota Timberwolves,22000628,2021-03-18,MIN @ PHX,W,240,123,...,0.686,11,27,38,22,14,4,12,16,4.0


In [None]:
def did_team_win_game(team_id, game_id):
  df = game_data[game_data["GAME_ID"] == game_id]
  df = df[df["TEAM_ID"] == team_id]
  if len(df) != 1:
    raise Exception("Issue finding matching game")

  WL = df.iloc[0]["WL"]
  if WL == "W":
    return 1
  else:
    return 0

In [None]:
margin = []
quarter_one = []
quarter_two = []
quarter_three = []
quarter_four = []
win = []
missing_vals = 0
all_missing_vals = []

for index, row in tqdm(made_shots_not_4.iterrows()):
  game_id = "00" + str(row["GameId"])
  try:
    if row["Period"] in [1, 2, 3, 4]:
      margin.append(row["Margin"])
      if row["Period"] == 1:
        quarter_one.append(1)
        quarter_two.append(0)
        quarter_three.append(0)
        quarter_four.append(0)

      if row["Period"] == 2:
        quarter_one.append(0)
        quarter_two.append(1)
        quarter_three.append(0)
        quarter_four.append(0)

      if row["Period"] == 3:
        quarter_one.append(0)
        quarter_two.append(0)
        quarter_three.append(1)
        quarter_four.append(0)

      if row["Period"] == 4:
        quarter_one.append(0)
        quarter_two.append(0)
        quarter_three.append(0)
        quarter_four.append(1)

      win.append(did_team_win_game(row["TeamId"], game_id))
  except:
    missing_vals += 1
    all_missing_vals.append(game_id)
    continue

print(str(missing_vals) + " missing vals.")
print(str((100 * missing_vals) / len(made_shots)) + " percent missing.")



7914it [01:20, 98.71it/s] 

0 missing vals.
0.0 percent missing.





In [None]:
X = pd.DataFrame({"Margin": margin, "quarter_one": quarter_one, "quarter_two": quarter_two, "quarter_three": quarter_three, "quarter_four": quarter_four, "Win": win})

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
import pandas as pd
X = X[X["quarter_four"] == 1]
y = X[["Win"]]
X = X[["Margin"]]
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X.values.reshape(-1, 1))
# Create a logistic regression model
model = LogisticRegression()
model.fit(X_scaled, y.values.ravel())

# Get the coefficients
intercept = model.intercept_[0]

# Calculate probability for a one-point increase in each quarter
coef_point_diff = model.coef_[0][0]

odds_ratio = np.exp(coef_point_diff)

print(f"Odds Ratio for Point Differential: {odds_ratio}")

Odds Ratio for Point Differential: 21.084091885423227


New idea: each data point should be a game. The first feature should be point differential in the first quarter, second feature should be point differential in the second quarter, and so on. Make sure it is the DIFFERENCE - margin of this quarter minus margin of last quarter

In [None]:
made_shots_not_4.columns

Index(['EventNumber', 'Margin', 'OReboundedShotEventNum', 'X', 'ShotQuality',
       'OReboundedRebEventNum', 'Y', 'Blocked', 'LineupId', 'Value',
       'BlockPlayerId', 'OpponentLineupId', 'PlayerId', 'Assisted', 'And1',
       'GameId', 'Made', 'AssistPlayerId', 'Period', 'Time', 'Putback',
       'PossessionNumber', 'ShotType', 'Player', 'StartTime', 'Team', 'TeamId',
       'Opp', 'OppTeamId', 'VideoUrl', 'Distance', 'ClockTime',
       'ClockStartTime', 'AssistPlayer', 'BlockPlayer', 'PassFromPlayerId',
       'PassFromPlayer', 'CatchAndShoot', 'PassFromX', 'PassFromY',
       'SecondsRemainingOnShotClock', 'WideOpen'],
      dtype='object')

In [None]:
q1_margin = []
q2_margin = []
q3_margin = []
q4_margin = []
win = []
missing_vals = 0

for game_id in tqdm(np.unique(made_shots_not_4["GameId"])):
  game_df = made_shots_not_4[made_shots_not_4["GameId"] == game_id]

  for team_id in np.unique(game_df["TeamId"]):
    try:
      team_df = game_df[game_df["TeamId"] == team_id]
      q1_df = team_df[team_df["Period"] == 1]
      q1_df = q1_df.sort_values(by='EventNumber', ascending=False)
      q1_df["Margin"]
      q1 = q1_df["Margin"].values[0]

      q2_df = team_df[team_df["Period"] == 2]
      q2_df = q2_df.sort_values(by='EventNumber', ascending=False)
      q2 = q2_df["Margin"].values[0]

      q3_df = team_df[team_df["Period"] == 3]
      q3_df = q3_df.sort_values(by='EventNumber', ascending=False)
      q3 = q3_df["Margin"].values[0]

      q4_df = team_df[team_df["Period"] == 4]
      q4_df = q4_df.sort_values(by='EventNumber', ascending=False)
      q4 = q4_df["Margin"].values[0]

      q1_margin.append(q1)
      q2_margin.append(q2 - q1)
      q3_margin.append(q3 - q2)
      q4_margin.append(q4 - q3)

      game_id_string = "00" + str(game_id)
      win.append(did_team_win_game(team_id, game_id_string))
    except:
      missing_vals += 1
      continue

df = pd.DataFrame({"q1_margin_change": q1_margin, "q2_margin_change": q2_margin, "q3_margin_change": q3_margin, "q4_margin_change": q4_margin, "win": win})
print("Missing vals: " + str(missing_vals))

100%|██████████| 499/499 [00:08<00:00, 61.93it/s]

Missing vals: 176





In [None]:
df

Unnamed: 0,q1_margin_change,q2_margin_change,q3_margin_change,q4_margin_change,win
0,-6,8,-9,7,0
1,-8,-3,7,1,1
2,5,-13,8,-11,0
3,-4,12,8,-4,1
4,0,12,0,-18,0
...,...,...,...,...,...
500,3,-1,2,-3,1
501,1,-1,1,-5,1
502,-4,1,2,-7,0
503,-1,-6,6,3,1


In [None]:
X = df[["q1_margin_change", "q2_margin_change", "q3_margin_change", "q4_margin_change"]]
y = df[["win"]]

In [None]:
print(len(X))
print(len(y))

505
505


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = LogisticRegression()
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)

print(f'Accuracy: {accuracy:.2f}')

coefficients = pd.DataFrame({'Feature': X.columns, 'Coefficient': model.coef_[0]})
print(coefficients)


Accuracy: 0.90
            Feature  Coefficient
0  q1_margin_change     0.306274
1  q2_margin_change     0.350086
2  q3_margin_change     0.334358
3  q4_margin_change     0.319150


  y = column_or_1d(y, warn=True)


In [None]:
odds_ratios = [round((2.71828 ** coef), 3) for coef in coefficients["Coefficient"].tolist()]
print("Odds ratios:")
print(odds_ratios)
probability_changes = [round((odds / (1 + odds)), 3) for odds in odds_ratios]
print("Probability changes:")
print(probability_changes)

Odds ratios:
[1.358, 1.419, 1.397, 1.376]
Probability changes:
[0.576, 0.587, 0.583, 0.579]


In [None]:
for feature, prob_change in zip(X.columns, probability_changes):
    print(f"{feature}: +1 change leads to a {prob_change * 100}% change in the probability of winning.")

q1_margin_change: +1 change leads to a 57.599999999999994% change in the probability of winning.
q2_margin_change: +1 change leads to a 58.699999999999996% change in the probability of winning.
q3_margin_change: +1 change leads to a 58.3% change in the probability of winning.
q4_margin_change: +1 change leads to a 57.9% change in the probability of winning.
