# NBA Timeout Effect Analysis

This notebook analyzes the effect of timeouts on opponent scoring momentum in NBA games with statistical analysis and visualizations.

In [None]:
# 1. Setup and Imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
import os
import warnings

warnings.filterwarnings('ignore')
os.makedirs("outputs/figures", exist_ok=True)

In [None]:
# 2. Data Loading and Cleaning
def load_and_clean_data(file_path):
    if not os.path.exists(file_path):
        print(f"Error: Data file {file_path} not found")
        return None
    df = pd.read_csv(file_path)
    if df.empty:
        return df
    # Convert boolean columns
    for col in ['effective', 'run_terminated']:
        if col in df.columns and df[col].dtype != bool:
            df[col] = df[col].astype(bool)
    # Fill missing numeric values
    for col in df.select_dtypes(include=[np.number]).columns:
        df[col] = df[col].fillna(0)
    # Clip outliers
    for col in ['pre_timeout_oe', 'post_timeout_oe', 'efficiency_change']:
        if col in df.columns:
            mean = df[col].mean()
            std = df[col].std()
            df[col] = df[col].clip(mean - 3*std, mean + 3*std)
    # Create bins
    if 'run_points' in df.columns:
        df['run_size_bin'] = pd.cut(df['run_points'], bins=[5,8,10,12,15,20,100], labels=['6-7','8-9','10-11','12-14','15-19','20+'])
    if 'score_diff' in df.columns:
        df['score_situation'] = pd.cut(df['score_diff'], bins=[-100,-20,-10,-5,0,5,10,20,100], labels=['Down 20+','Down 10-19','Down 5-9','Down 1-4','Up 1-4','Up 5-9','Up 10-19','Up 20+'])
    return df

# Load data
file_path = 'outputs/timeout_analysis_results.csv'
results_df = load_and_clean_data(file_path)


## 3. Statistical Analysis

In [None]:
def perform_statistical_analysis(df):
    res = {}
    total = len(df)
    eff = df['effective'].sum()
    res['overall'] = {'total_timeouts': total, 'effective_timeouts': eff, 'effectiveness_rate': eff/total}
    t_stat, p_val = stats.ttest_1samp(df['efficiency_change'], 0)
    res['t_test'] = {'t_statistic': t_stat, 'p_value': p_val, 'significant': p_val<0.05}
    return res

analysis_results = perform_statistical_analysis(results_df)
analysis_results

## 4. Data Visualization

In [None]:
# Set styles
sns.set(style="whitegrid")
plt.rcParams['figure.figsize'] = (12, 8)

# 4.1 Histogram
def efficiency_histogram():
    eff = results_df['efficiency_change']
    fig, ax = plt.subplots()
    sns.histplot(eff, kde=True, bins=25, ax=ax)
    ax.set_title('Distribution of Efficiency Change')
    return fig

fig_hist = efficiency_histogram()
plt.show()

# 4.2 Box Plot by Quarter
def quarter_boxplot():
    fig, ax = plt.subplots()
    sns.boxplot(x='quarter', y='efficiency_change', data=results_df, ax=ax)
    ax.set_title('Efficiency Change by Quarter')
    return fig

fig_quarter = quarter_boxplot()
plt.show()

# 4.3 Box Plot by Run Size
def run_size_boxplot():
    if 'run_size_bin' in results_df.columns:
        fig, ax = plt.subplots()
        sns.boxplot(x='run_size_bin', y='efficiency_change', data=results_df, ax=ax)
        ax.set_title('Efficiency Change by Run Size')
        return fig

fig_run_size = run_size_boxplot()
plt.show()

# 4.5 Bar Chart: Effectiveness by Season
def season_effectiveness_bars():
    df = results_df.groupby('season').agg({'effective':['mean'],'efficiency_change':'mean'})
    seasons = df.index
    rates = df[('effective','mean')]
    fig, ax = plt.subplots()
    ax.bar(seasons, rates*100)
    ax.set_ylabel('Effectiveness Rate (%)')
    ax.set_title('Timeout Effectiveness by Season')
    return fig

fig_season = season_effectiveness_bars()
plt.show()

# 4.6 Stacked Bar: Run Termination by Season
def run_termination_by_season():
    df = results_df.groupby('season').agg({'run_terminated':['sum','count']})
    df.columns = ['terminated','total']
    df['continued'] = df['total'] - df['terminated']
    fig, ax = plt.subplots()
    ax.bar(df.index, df['terminated'], label='Terminated')
    ax.bar(df.index, df['continued'], bottom=df['terminated'], label='Continued')
    ax.set_title('Run Termination by Season')
    ax.legend()
    return fig

fig_stacked = run_termination_by_season()
plt.show()

# 4.7 Pie Chart: Overall Effectiveness
def timeout_effectiveness_pie():
    eff = results_df['effective'].mean()
    fig, ax = plt.subplots()
    ax.pie([eff,1-eff], labels=['Effective','Ineffective'], autopct='%1.1f%%')
    ax.set_title('Overall Timeout Effectiveness')
    return fig

fig_pie = timeout_effectiveness_pie()
plt.show()

# 4.8 Pre vs Post Shooting Metrics
def pre_post_metrics_comparison():
    pre = results_df['pre_timeout_fg_pct']
    post = results_df['post_timeout_fg_pct']
    fig, ax = plt.subplots()
    ax.bar(['Pre','Post'], [pre.mean()*100, post.mean()*100])
    ax.set_ylabel('FG%')
    ax.set_title('Pre vs Post Timeout FG%')
    return fig

fig_shooting = pre_post_metrics_comparison()
plt.show()

## 5. Conclusions and Report Saving

In [None]:
# Save summary report
summary = "Overall effectiveness rate: {:.1f}%".format(analysis_results['overall']['effectiveness_rate']*100)
with open('outputs/timeout_summary.txt','w') as f:
    f.write(summary)
# Save notebook
import nbformat
nbformat.write(nbformat.reads('','json'), 'outputs/timeout_analysis_full.ipynb')
print("Report and notebook saved.")