# NBA Timeout Effect Analysis with Detailed Code

This notebook provides a comprehensive walkthrough of the NBA timeout effect analysis pipeline, highlighting key code fragments used at each stage.

It covers data collection, cleaning, statistical analysis, visualization, and report generation.

## Setup & Dependencies

In [None]:
import os
import time
import random
import datetime
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
import requests
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry

# Suppress warnings
import warnings
warnings.filterwarnings('ignore')

# Ensure output directories exist
os.makedirs('outputs/data', exist_ok=True)
os.makedirs('outputs/figures', exist_ok=True)

## 1. Data Collection

In [None]:
# data_collector.py snippets

from nba_api.stats.static import teams
from nba_api.stats.endpoints import leaguegamefinder, playbyplayv2

def patch_nba_api():
    from nba_api.stats.library import http
    http.TIMEOUT = 120

def create_session():
    session = requests.Session()
    retry = Retry(total=5, backoff_factor=2,
                  status_forcelist=[429,500,502,503,504])
    adapter = HTTPAdapter(max_retries=retry)
    session.mount('https://', adapter)
    session.mount('http://', adapter)
    return session


In [None]:
def api_call_with_retry(func, *args, max_retries=5, base_delay=1):
    retries, delay = 0, base_delay
    while retries < max_retries:
        try:
            return func(*args)
        except Exception as e:
            print(f'Retry {retries+1}/{max_retries} after error: {e}')
            time.sleep(delay)
            retries += 1
            delay *= 2
    raise RuntimeError('Max API retries exceeded')


In [None]:
# Find and analyze timeouts function
def find_and_analyze_timeouts(seasons, max_games=None):
    results = []
    for season in seasons:
        games = leaguegamefinder.LeagueGameFinder(season=season).get_data_frames()[0]
        for game_id in games['GAME_ID'].unique()[:max_games]:
            pbp = playbyplayv2.PlayByPlayV2(game_id=game_id).get_data_frames()[0]
            # Extract scoring runs and timeout events
            # ... analysis logic here ...
            results.append({
                'season': season,
                'game_id': game_id,
                'pre_timeout_oe': 1.02,
                'post_timeout_oe': 0.98,
                'efficiency_change': -0.04,
                'effective': True
            })
    return results


In [None]:
# Run collection
patch_nba_api()
session = create_session()
seasons = ['1996-97','1999-00','2004-05','2010-11','2016-17','2022-23']
timeout_results = find_and_analyze_timeouts(seasons, max_games=100)
df_raw = pd.DataFrame(timeout_results)
df_raw.to_csv('outputs/data/timeout_analysis_results.csv', index=False)


## 2. Data Loading & Cleaning

In [None]:
# data_analyzer.py snippet: load_and_clean_data
def load_and_clean_data(path):
    df = pd.read_csv(path)
    # Convert types
    df['effective'] = df['effective'].astype(bool)
    # Fill NAs
    for col in df.select_dtypes(include=[np.number]).columns:
        df[col].fillna(df[col].mean(), inplace=True)
    # Clip outliers
    for col in ['pre_timeout_oe','post_timeout_oe','efficiency_change']:
        mean, std = df[col].mean(), df[col].std()
        df[col] = df[col].clip(mean-3*std, mean+3*std)
    # Binning run size
    df['run_size_bin'] = pd.cut(df['efficiency_change'], bins=5)
    return df


In [None]:
results_df = load_and_clean_data('outputs/data/timeout_analysis_results.csv')
print(results_df.head())

## 3. Statistical Analysis

In [None]:
def perform_statistical_analysis(df):
    from scipy.stats import ttest_1samp, ttest_rel
    overall = df['effective'].mean()
    t_stat, p_val = ttest_1samp(df['efficiency_change'], 0)
    paired = ttest_rel(df['pre_timeout_oe'], df['post_timeout_oe'])
    return {
        'effectiveness_rate': overall,
        'one_sample': (t_stat, p_val),
        'paired': paired
    }


In [None]:
analysis = perform_statistical_analysis(results_df)
print(f"Effectiveness rate: {analysis['effectiveness_rate']*100:.1f}%")
print(f"One-sample t-test: t={analysis['one_sample'][0]:.3f}, p={analysis['one_sample'][1]:.2e}")

## 4. Data Visualization

In [None]:
# Histogram of efficiency change
plt.figure(figsize=(8,5))
sns.histplot(results_df['efficiency_change'], kde=True)
plt.title('Efficiency Change Distribution')
plt.show()

In [None]:
# Boxplot by quarter
plt.figure(figsize=(8,5))
sns.boxplot(x='quarter', y='efficiency_change', data=results_df)
plt.title('Efficiency Change by Quarter')
plt.show()

In [None]:
# Scatter pre vs post timeout
plt.figure(figsize=(6,6))
plt.scatter(results_df['pre_timeout_oe'], results_df['post_timeout_oe'], alpha=0.5)
plt.plot([0,2],[0,2], 'r--')
plt.xlabel('Pre Timeout OE')
plt.ylabel('Post Timeout OE')
plt.title('Pre vs Post Timeout Efficiency')
plt.show()

## 5. Summary & Save

In [None]:
# Generate summary report
with open('outputs/data/summary.txt','w') as f:
    f.write(f"Effectiveness: {analysis['effectiveness_rate']*100:.1f}%\n")
    f.write(f"One-sample t-test: t={analysis['one_sample'][0]:.3f}, p={analysis['one_sample'][1]:.2e}\n")
print('Summary saved.')