In [None]:
from nba_api.stats.endpoints import leaguegamelog, playbyplayv3
import numpy as np
from tqdm import tqdm
import pandas as pd

In [None]:
pbp_data = pd.read_csv("ALL_PBP_DATA.csv")

In [None]:
def identify_possessions(df):
    #mark possessions ends (indicated by made shots, turnovers, and the end of free throw sequences)
    df['possession_end'] = df['actionType'].isin(['Made Shot', 'Turnover']) | (
        df['description'].str.contains("Free Throw") &
        (~df['description'].shift(-1).str.contains("Free Throw").fillna(False))
    )
    df['possession_id'] = df['possession_end'].cumsum()
    return df

In [None]:
pbp = determine_foul_trouble(pbp_data)

To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  df['foul_count'] = df.groupby(['gameId', 'personId', 'period'])['description'].apply(lambda x: x.str.contains('Foul').cumsum())


In [None]:
print(np.unique(pbp["foul_count"]))

[0 1 2 3 4 5 6]


In [None]:
foul_trouble = pbp[pbp["foul_trouble"] == True]

In [None]:
not_foul_trouble = pbp[pbp["foul_trouble"] == False]

In [None]:
num_rows = foul_trouble[(foul_trouble["actionType"] == "Made Shot") | (foul_trouble["actionType"] == "Missed Shot")].shape[0]
print(num_rows)


67


In [None]:
len(foul_trouble[foul_trouble["actionType"] == "Made Shot"]) / num_rows

0.47761194029850745

In [None]:
num_rows = not_foul_trouble[(not_foul_trouble["actionType"] == "Made Shot") | (not_foul_trouble["actionType"] == "Missed Shot")].shape[0]

In [None]:
len(not_foul_trouble[not_foul_trouble["actionType"] == "Made Shot"]) / num_rows

0.47483487332870444

In [None]:
def determine_foul_trouble(df):
    #Assuming a player is in foul trouble if they reach 3+ fouls by the half, 4+ by the 3rd, 5+ by the 4th
    foul_limits = {1: 3, 2: 3, 3: 4, 4: 5}
    df['foul_count'] = df.groupby(['gameId', 'personId', 'period'])['description'].apply(lambda x: x.str.contains('Foul').cumsum())
    df['foul_trouble'] = df.apply(lambda row: row['foul_count'] >= foul_limits.get(row['period'], 5), axis=1)
    return df
