In [None]:
# %% markdown
"""
# NBA Timeout Analysis: Do Timeouts Stop Opponent Momentum?

## An Analysis of NBA Timeout Effectiveness in Disrupting Scoring Runs

In basketball, coaches often call timeouts when the opposing team is on a scoring run, commonly referred to as "stopping the bleeding." This analysis explores whether this coaching strategy is effective by examining NBA timeout data from five seasons (1999-00, 2004-05, 2010-11, 2016-17, 2022-23).

**Research Question:** Do timeouts effectively disrupt opponent momentum during scoring runs?

**Hypothesis:** When the opponent team makes a scoring run of 6-0 or better and a timeout is called, the opponent's offensive efficiency decreases after the timeout compared to before.

**Methodology:** We analyze play-by-play data, identify scoring runs of 6+ points, and compare offensive efficiency before and after timeouts.
"""

# %% code
import os
import warnings
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
from matplotlib.ticker import FuncFormatter
from matplotlib.lines import Line2D

# Suppress warnings for clean output
warnings.filterwarnings('ignore')

# Paths and visualization settings
FIGURES_PATH = "DSA-210-PROJECT/dsa project/outputs/figures"
os.makedirs(FIGURES_PATH, exist_ok=True)

plt.style.use('seaborn-whitegrid')
sns.set_style("whitegrid")
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 20)
pd.set_option('display.width', 1000)
pd.set_option('display.float_format', '{:.2f}'.format)

# %% markdown
"""
### Loading and Preparing Data
First, we attempt to load the real dataset. If it's missing, we generate demonstration data.
"""

# %% code
data_file = os.path.join("outputs", "timeout_analysis_results.csv")
try:
    df = pd.read_csv(data_file)
    if 'season' in df.columns:
        df = df[df['season'] != '1996-97']
    print(f"Loaded {len(df)} records from {data_file}.")
except FileNotFoundError:
    print(f"File not found: {data_file}. Creating demo data...")
    np.random.seed(42)
    n = 2470
    seasons = ['1999-00','2004-05','2010-11','2016-17','2022-23']
    teams = ['ORL','NOP','DET','TOR','BOS','MEM','OKC','MIN','POR','PHX','CLE','ATL','SAS','LAC','UTA']
    quarters = ['Q1','Q2','Q3','Q4','OT1']
    run_sizes = ['6-7','8-9','10-11','12-14','15-19','20+']
    df = pd.DataFrame({
        'season':       np.random.choice(seasons, size=n, p=[0.25,0.17,0.18,0.20,0.20]),
        'opponent_abbr':np.random.choice(teams, size=n),
        'quarter':      np.random.choice(quarters, size=n, p=[0.21,0.27,0.25,0.26,0.01]),
        'run_size_bin': np.random.choice(run_sizes, size=n, p=[0.96,0.035,0.0048,0.0001,0.0001,0.0]),
        'pre_timeout_oe':  np.random.normal(18, 5, n),
    })
    df['post_timeout_oe']   = df['pre_timeout_oe'] + np.random.normal(-3.655, 15.798, n)
    df['efficiency_change'] = df['post_timeout_oe'] - df['pre_timeout_oe']
    df['effective']         = df['efficiency_change'] < 0
    df['run_terminated']    = np.random.choice([True, False], size=n, p=[0.574,0.426])
    df['pre_timeout_fg_pct']= np.random.uniform(0.4,0.7, n)
    df['post_timeout_fg_pct']= np.random.uniform(0.3,0.55,n)
    df['pre_timeout_ts']    = np.random.uniform(0.5,0.75,n)
    df['post_timeout_ts']   = np.random.uniform(0.4,0.65,n)
    print("Demo data created.")

# %% markdown
"""
### 1. Overall Timeout Effectiveness
A timeout is “effective” if the opponent’s offensive efficiency (points per possession) decreases after the timeout.
"""

# %% code
total = len(df)
effective_count = df['effective'].sum()
rate = effective_count / total
avg_change = df['efficiency_change'].mean()
t_stat, p_val = stats.ttest_1samp(df['efficiency_change'], 0)

print(f"Total timeouts: {total}")
print(f"Effective: {effective_count} ({rate:.1%})")
print(f"Avg change in efficiency: {avg_change:.3f} ppp")
print(f"t={t_stat:.3f}, p={p_val:.2e} → {'significant' if p_val<0.05 else 'not significant'}")

# %% markdown
"""
### 2. Distribution of Efficiency Changes
Histogram of post-minus-pre timeout efficiency changes.
"""

# %% code
plt.figure(figsize=(12,6))
sns.histplot(df['efficiency_change'], kde=True, bins=25)
plt.axvline(avg_change, color='red', linestyle='--', label=f"Mean {avg_change:.3f}")
median = df['efficiency_change'].median()
plt.axvline(median, color='green', linestyle=':', label=f"Median {median:.3f}")
plt.text(0.05,0.95,
         f"Mean: {avg_change:.3f}\nMedian: {median:.3f}\nStd: {df['efficiency_change'].std():.3f}",
         transform=plt.gca().transAxes, va='top', bbox=dict(facecolor='white', alpha=0.8))
plt.xlabel("Efficiency Change (ppp)")
plt.ylabel("Frequency")
plt.title("Distribution of Efficiency Change After Timeouts")
plt.legend()
plt.tight_layout()
plt.savefig(os.path.join(FIGURES_PATH, "efficiency_change_histogram.png"), dpi=300)
plt.show()

# %% markdown
"""
### 3. Pre- vs Post-Timeout Efficiency Scatter
"""

# %% code
corr = np.corrcoef(df['pre_timeout_oe'], df['post_timeout_oe'])[0,1]
t2, p2 = stats.ttest_rel(df['pre_timeout_oe'], df['post_timeout_oe'])

plt.figure(figsize=(8,8))
colors = df['effective'].map({True:'green', False:'red'})
plt.scatter(df['pre_timeout_oe'], df['post_timeout_oe'], c=colors, alpha=0.5, s=30)
lims = [
    min(df['pre_timeout_oe'].min(), df['post_timeout_oe'].min())-0.1,
    max(df['pre_timeout_oe'].max(), df['post_timeout_oe'].max())+0.1
]
plt.plot(lims, lims, 'k--', label="y = x")
plt.text(0.05,0.95,
         f"N={total}\nCorr={corr:.3f}\nt={t2:.3f}, p={p2:.2e}",
         transform=plt.gca().transAxes, va='top', bbox=dict(facecolor='white', alpha=0.8))
plt.xlabel("Pre-Timeout OE (ppp)")
plt.ylabel("Post-Timeout OE (ppp)")
plt.title("Pre vs Post Timeout Offensive Efficiency")
plt.legend(handles=[
    Line2D([0],[0], marker='o', color='w', markerfacecolor='green', label="Effective"),
    Line2D([0],[0], marker='o', color='w', markerfacecolor='red',   label="Ineffective"),
    Line2D([0],[0], color='k', linestyle='--', label="y = x")
])
plt.tight_layout()
plt.savefig(os.path.join(FIGURES_PATH, "pre_vs_post_scatter.png"), dpi=300)
plt.show()
