# DeepShot: Strategic Insights

In this notebook, we'll explore the strategic implications of our models, analyzing basketball shooting trends and team strategies.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
import matplotlib.patches as patches

# Set visualization style
sns.set_theme(style='whitegrid')
plt.rcParams['figure.figsize'] = [10, 6]

# Create directories
processed_dir = Path('../data/processed')
features_dir = processed_dir / 'features'
results_dir = Path('../results')
strategy_dir = results_dir / 'strategy'

for directory in [processed_dir, features_dir, results_dir, strategy_dir]:
    directory.mkdir(parents=True, exist_ok=True)

## 1. Loading the Data

In [None]:
# Load shot data
shots = pd.read_csv(features_dir / 'shots_with_features.csv')
print(f"Loaded {len(shots)} shots")

# Add season information if not present
if 'season' not in shots.columns and 'game_date' in shots.columns:
    # Convert game_date to datetime
    shots['game_date'] = pd.to_datetime(shots['game_date'])
    
    # Extract season (assuming season starts in October and ends in June)
    def get_season(date):
        year = date.year
        month = date.month
        if month >= 10:  # October to December
            return f"{year}-{year+1}"
        else:  # January to June
            return f"{year-1}-{year}"
    
    shots['season'] = shots['game_date'].apply(get_season)
    print("Added season information")

# Display available seasons
if 'season' in shots.columns:
    seasons = shots['season'].unique()
    print(f"Available seasons: {sorted(seasons)}")

## 2. Defining Shot Zones

In [None]:
def define_shot_zones(df):
    """Define shot zones based on court location"""
    # Create a copy of the dataframe
    df_zones = df.copy()
    
    # Calculate shot distance if not present
    if 'shot_distance' not in df_zones.columns and 'loc_x' in df_zones.columns and 'loc_y' in df_zones.columns:
        df_zones['shot_distance'] = np.sqrt(df_zones['loc_x']**2 + df_zones['loc_y']**2)
    
    # Define shot zones
    conditions = [
        df_zones['shot_distance'] <= 4,  # Restricted area
        (df_zones['shot_distance'] > 4) & (df_zones['shot_distance'] <= 8),  # Paint (non-RA)
        (df_zones['shot_distance'] > 8) & (df_zones['shot_distance'] <= 16),  # Mid-range
        (df_zones['shot_distance'] > 16) & (df_zones['shot_distance'] <= 23.75),  # Long mid-range
        df_zones['shot_distance'] > 23.75  # Three-point
    ]
    
    zone_names = ['Restricted Area', 'Paint (Non-RA)', 'Mid-Range', 'Long Mid-Range', 'Three-Point']
    df_zones['shot_zone'] = np.select(conditions, zone_names, default='Unknown')
    
    # Define simplified zones
    simple_conditions = [
        df_zones['shot_distance'] <= 8,  # At Rim
        (df_zones['shot_distance'] > 8) & (df_zones['shot_distance'] <= 23.75),  # Mid-range
        df_zones['shot_distance'] > 23.75  # Three-point
    ]
    
    simple_zone_names = ['At Rim', 'Mid-Range', 'Three-Point']
    df_zones['simple_zone'] = np.select(simple_conditions, simple_zone_names, default='Unknown')
    
    return df_zones

# Apply shot zones
shots_with_zones = define_shot_zones(shots)

# Display zone distribution
zone_counts = shots_with_zones['shot_zone'].value_counts()
print("Shot zone distribution:")
display(zone_counts)

# Display simple zone distribution
simple_zone_counts = shots_with_zones['simple_zone'].value_counts()
print("\nSimplified shot zone distribution:")
display(simple_zone_counts)

## 3. Analyzing Shot Distribution Trends

In [None]:
# Analyze shot distribution by season
if 'season' in shots_with_zones.columns:
    # Calculate shot distribution by season and zone
    season_zone_dist = shots_with_zones.groupby(['season', 'simple_zone']).size().unstack()
    
    # Convert to percentages
    season_zone_pct = season_zone_dist.div(season_zone_dist.sum(axis=1), axis=0) * 100
    
    # Sort by season
    season_zone_pct = season_zone_pct.sort_index()
    
    # Plot the trends
    plt.figure(figsize=(12, 6))
    season_zone_pct.plot(kind='bar', stacked=True, colormap='viridis')
    plt.title('Shot Distribution by Season', fontsize=16)
    plt.xlabel('Season')
    plt.ylabel('Percentage of Shots (%)')
    plt.legend(title='Shot Zone')
    plt.xticks(rotation=45)
    plt.grid(axis='y', alpha=0.3)
    plt.tight_layout()
    plt.show()
    
    # Plot the trends as lines
    plt.figure(figsize=(12, 6))
    season_zone_pct.plot(kind='line', marker='o', colormap='viridis')
    plt.title('Shot Distribution Trends by Season', fontsize=16)
    plt.xlabel('Season')
    plt.ylabel('Percentage of Shots (%)')
    plt.legend(title='Shot Zone')
    plt.grid(alpha=0.3)
    plt.tight_layout()
    plt.show()
else:
    print("Season information not available for trend analysis")

## 4. Analyzing Shot Efficiency by Zone

In [None]:
# Calculate shot efficiency by zone
zone_efficiency = shots_with_zones.groupby('shot_zone').agg(
    total_shots=('shot_made', 'count'),
    made_shots=('shot_made', 'sum'),
    fg_pct=('shot_made', 'mean')
).reset_index()

# Calculate points per shot
zone_efficiency['points_per_shot'] = np.where(
    zone_efficiency['shot_zone'] == 'Three-Point',
    zone_efficiency['fg_pct'] * 3,
    zone_efficiency['fg_pct'] * 2
)

# Sort by points per shot
zone_efficiency = zone_efficiency.sort_values('points_per_shot', ascending=False)

# Display zone efficiency
print("Shot efficiency by zone:")
display(zone_efficiency)

# Plot zone efficiency
plt.figure(figsize=(12, 6))
sns.barplot(x='shot_zone', y='points_per_shot', data=zone_efficiency, palette='viridis')
plt.title('Points Per Shot by Zone', fontsize=16)
plt.xlabel('Shot Zone')
plt.ylabel('Points Per Shot')
plt.grid(axis='y', alpha=0.3)
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

## 5. Team Shot Distribution Analysis

In [None]:
# Analyze team shot distribution
if 'team_id' in shots_with_zones.columns:
    # Get team names if available
    if 'team_name' in shots_with_zones.columns:
        team_column = 'team_name'
    else:
        team_column = 'team_id'
    
    # Calculate team shot distribution
    team_zone_dist = shots_with_zones.groupby([team_column, 'simple_zone']).size().unstack()
    
    # Convert to percentages
    team_zone_pct = team_zone_dist.div(team_zone_dist.sum(axis=1), axis=0) * 100
    
    # Sort by three-point percentage
    if 'Three-Point' in team_zone_pct.columns:
        team_zone_pct = team_zone_pct.sort_values('Three-Point', ascending=False)
    
    # Display top and bottom teams
    print("Teams with highest three-point attempt rate:")
    display(team_zone_pct.head(5))
    
    print("\nTeams with lowest three-point attempt rate:")
    display(team_zone_pct.tail(5))
    
    # Plot team distribution
    plt.figure(figsize=(14, 8))
    team_zone_pct.head(10).plot(kind='bar', stacked=True, colormap='viridis')
    plt.title('Shot Distribution by Team (Top 10 Three-Point Teams)', fontsize=16)
    plt.xlabel('Team')
    plt.ylabel('Percentage of Shots (%)')
    plt.legend(title='Shot Zone')
    plt.xticks(rotation=45)
    plt.grid(axis='y', alpha=0.3)
    plt.tight_layout()
    plt.show()
else:
    print("Team information not available for distribution analysis")

## 6. Player Shot Distribution Analysis

In [None]:
# Analyze player shot distribution
if 'player_id' in shots_with_zones.columns:
    # Get player names if available
    if 'player_name' in shots_with_zones.columns:
        player_column = 'player_name'
    else:
        player_column = 'player_id'
    
    # Filter for players with minimum shots
    min_shots = 100
    player_shot_counts = shots_with_zones[player_column].value_counts()
    qualified_players = player_shot_counts[player_shot_counts >= min_shots].index
    
    # Filter shots for qualified players
    qualified_shots = shots_with_zones[shots_with_zones[player_column].isin(qualified_players)]
    
    # Calculate player shot distribution
    player_zone_dist = qualified_shots.groupby([player_column, 'simple_zone']).size().unstack()
    
    # Convert to percentages
    player_zone_pct = player_zone_dist.div(player_zone_dist.sum(axis=1), axis=0) * 100
    
    # Find midrange specialists
    if 'Mid-Range' in player_zone_pct.columns:
        midrange_specialists = player_zone_pct.sort_values('Mid-Range', ascending=False)
        
        print(f"Top midrange specialists (minimum {min_shots} shots):")
        display(midrange_specialists.head(10))
        
        # Plot midrange specialists
        plt.figure(figsize=(14, 8))
        midrange_specialists.head(10).plot(kind='bar', stacked=True, colormap='viridis')
        plt.title(f'Shot Distribution of Top Midrange Specialists', fontsize=16)
        plt.xlabel('Player')
        plt.ylabel('Percentage of Shots (%)')
        plt.legend(title='Shot Zone')
        plt.xticks(rotation=45)
        plt.grid(axis='y', alpha=0.3)
        plt.tight_layout()
        plt.show()
    
    # Find three-point specialists
    if 'Three-Point' in player_zone_pct.columns:
        three_point_specialists = player_zone_pct.sort_values('Three-Point', ascending=False)
        
        print(f"\nTop three-point specialists (minimum {min_shots} shots):")
        display(three_point_specialists.head(10))
else:
    print("Player information not available for distribution analysis")

## 7. Shot Success Rate Trends

In [None]:
# Analyze shot success rate trends by season and zone
if 'season' in shots_with_zones.columns:
    # Calculate success rates
    season_zone_success = shots_with_zones.groupby(['season', 'simple_zone'])['shot_made'].mean()
    season_zone_success = season_zone_success.unstack()
    
    # Sort by season
    season_zone_success = season_zone_success.sort_index()
    
    # Plot success rate trends
    plt.figure(figsize=(12, 6))
    season_zone_success.plot(kind='line', marker='o', colormap='viridis')
    plt.title('Shot Success Rate Trends by Season and Zone', fontsize=16)
    plt.xlabel('Season')
    plt.ylabel('Field Goal Percentage')
    plt.legend(title='Shot Zone')
    plt.grid(alpha=0.3)
    plt.tight_layout()
    plt.show()
    
    # Calculate points per shot by season and zone
    def points_per_shot(row):
        if row['simple_zone'] == 'Three-Point':
            return row['shot_made'] * 3
        else:
            return row['shot_made'] * 2
    
    shots_with_zones['points'] = shots_with_zones.apply(points_per_shot, axis=1)
    season_zone_pps = shots_with_zones.groupby(['season', 'simple_zone'])['points'].mean()
    season_zone_pps = season_zone_pps.unstack()
    
    # Sort by season
    season_zone_pps = season_zone_pps.sort_index()
    
    # Plot points per shot trends
    plt.figure(figsize=(12, 6))
    season_zone_pps.plot(kind='line', marker='o', colormap='viridis')
    plt.title('Points Per Shot Trends by Season and Zone', fontsize=16)
    plt.xlabel('Season')
    plt.ylabel('Points Per Shot')
    plt.legend(title='Shot Zone')
    plt.grid(alpha=0.3)
    plt.tight_layout()
    plt.show()
else:
    print("Season information not available for trend analysis")

## Key Strategic Insights

Our analysis of basketball shooting trends reveals several key strategic insights:

1. **The Three-Point Revolution**:
   - Three-point attempts have increased dramatically over time, while midrange shots have declined
   - This shift is driven by efficiency - three-pointers yield more points per shot than midrange jumpers
   - Teams have embraced this analytical insight, fundamentally changing basketball strategy

2. **Team Strategic Identities**:
   - Teams show distinct shot distribution patterns, revealing different strategic approaches
   - Some teams have fully embraced the three-point revolution, while others maintain more balanced approaches
   - These strategic differences create unique team identities and matchup considerations

3. **Player Specialization**:
   - Despite overall trends, certain players continue to specialize in midrange shooting
   - These specialists maintain efficiency in these zones despite the general trend away from midrange shots
   - Player skill can sometimes transcend general efficiency patterns

4. **Efficiency Dynamics**:
   - Shot success rates have remained relatively stable over time despite changing shot distributions
   - This suggests defenses adapt to offensive trends, creating a strategic equilibrium
   - The highest-value shots remain at the rim and from the corners

5. **Strategic Implications**:
   - Optimal strategy involves maximizing high-efficiency shots (at rim and three-pointers)
   - However, maintaining some midrange threat may prevent defenses from overplaying other areas
   - The most successful teams balance analytical efficiency with tactical diversity

These insights demonstrate how basketball strategy has evolved through analytical understanding, changing the fundamental nature of how the game is played at the highest levels.