In [1]:
import pandas as pd
import numpy as np
import os
import seaborn as sns
import matplotlib.pyplot as plt
import json
import folium
from folium.plugins import MarkerCluster

In [3]:
#Load dataset
path = '/Users/shantamaraj/Desktop/Basketball Analytics/Processed Data/nba_updated_coordinates.csv'
df = pd.read_csv(path)

df.head()

Unnamed: 0,regular_season_index,season_start,season_end,season_type,player_id,rank,player,team_id,team,games_played,...,personal_fouls,total_points,assist_to_turnover_ratio,steal_to_turnover_ratio,playoff_index,country,team_full,state,latitude,longitude
0,0.0,2012,2013,Regular,201142,1,Kevin Durant,1610612760,OKC,81,...,143,2280,1.34,0.41,,United States of America,Oklahoma City Thunder,Oklahoma,35.4634,-97.5151
1,1.0,2012,2013,Regular,977,2,Kobe Bryant,1610612747,LAL,78,...,173,2133,1.63,0.37,,United States of America,Los Angeles Lakers,California,34.043,-118.2673
2,2.0,2012,2013,Regular,2544,3,LeBron James,1610612748,MIA,76,...,110,2036,2.44,0.57,,United States of America,Miami Heat,Florida,25.7814,-80.187
3,3.0,2012,2013,Regular,201935,4,James Harden,1610612745,HOU,78,...,178,2023,1.54,0.48,,United States of America,Houston Rockets,Texas,29.7508,-95.3621
4,4.0,2012,2013,Regular,2546,5,Carmelo Anthony,1610612752,NYK,67,...,205,1920,0.98,0.3,,United States of America,New York Knicks,New York,40.7505,-73.9934


In [6]:
df.shape

(8046, 37)

In [7]:
#Aggregate data by state and country
state_summary = df.groupby(['country', 'state', 'latitude', 'longitude']).agg({'3_point_feild_goals_attempted': 'mean','3_point_feild_goal_percentage': 'mean'
}).reset_index()

In [8]:
#Rename columns 
state_summary.rename(columns={
    '3_point_feild_goals_attempted': 'avg_3pa',
    '3_point_feild_goal_percentage': 'avg_3p_pct'}, inplace=True)

state_summary.head()

Unnamed: 0,country,state,latitude,longitude,avg_3pa,avg_3p_pct
0,Canada,Ontario,43.6435,-79.3791,99.687075,0.281299
1,United States of America,Arizona,33.4458,-112.0712,110.565217,0.272248
2,United States of America,California,34.043,-118.2673,99.6375,0.271632
3,United States of America,California,37.768,-122.3877,112.681818,0.265259
4,United States of America,California,38.5802,-121.4998,124.845745,0.266176


In [18]:
import branca.colormap as cm

#Create folium map
# --- Create the base map ---
m = folium.Map(location=[39.8283, -98.5795], zoom_start=4, tiles='CartoDB positron')

#Define a color scale for 3P% accuracy
colormap = cm.LinearColormap(
    colors=['orange', 'yellow', 'lightgreen', 'green'],
    vmin=state_summary['avg_3p_pct'].min(),
    vmax=state_summary['avg_3p_pct'].max(),
    caption='Average 3-Point Accuracy (3P%)')

#Add markers
for _, row in state_summary.iterrows():
    color = colormap(row['avg_3p_pct'])
    folium.CircleMarker(
        location=[row['latitude'], row['longitude']],
        radius=row['avg_3pa'] / 10,  # scaled by average attempts
        color=color,
        fill=True,
        fill_color=color,
        fill_opacity=0.7,
        popup=(
            f"<b>{row['state']}, {row['country']}</b><br>"
            f"Avg 3PA: {row['avg_3pa']:.1f}<br>"
            f"Avg 3P%: {row['avg_3p_pct']:.2%}")).add_to(m)

#Add color legend to map
colormap.add_to(m)

m

### Geospatial Visualization: 3-Point Accuracy Across US & Canada (2012–2024)

This map visualizes the geographic distribution of average 3-point shooting performance across the US and Canada between 2012 and 2024.  
Each circle’s **size** represents the average number of 3-point attempts per player (3PA), while its **color** indicates shooting accuracy (3P%), ranging from orange (lower accuracy) to dark green (higher accuracy).

The visualization highlights several key insights:
- **High-performance regions** such as **California**, **Texas**, and **Ontario (Canada)** exhibit both large and green circles, suggesting a combination of high volume and efficiency in 3-point shooting.
- **Central and southeastern U.S. states** show smaller, more orange markers, indicating lower accuracy or fewer 3-point attempts overall.
- The **geographic spread** of 3-point proficiency illustrates how the modern NBA’s perimeter-oriented offense has expanded beyond traditional coastal markets to influence nearly every team in North America.

Overall, this map reinforces how the 3-point shot has become a defining strategic element of basketball across regions, merging efficiency and volume as core indicators of offensive evolution.

In [12]:
#Aggregate data by team
team_summary = df.groupby(['country', 'state', 'team_full', 'latitude', 'longitude']).agg({'3_point_feild_goals_attempted': 'mean', '3_point_feild_goal_percentage': 'mean'}).reset_index()

#Rename columns
team_summary.rename(columns={'3_point_feild_goals_attempted': 'avg_3pa','3_point_feild_goal_percentage': 'avg_3p_pct'}, inplace=True)

team_summary.head()

Unnamed: 0,country,state,team_full,latitude,longitude,avg_3pa,avg_3p_pct
0,Canada,Ontario,Toronto Raptors,43.6435,-79.3791,99.687075,0.281299
1,United States of America,Arizona,Phoenix Suns,33.4458,-112.0712,110.565217,0.272248
2,United States of America,California,Golden State Warriors,37.768,-122.3877,112.681818,0.265259
3,United States of America,California,LA Clippers,34.043,-118.2673,94.138264,0.28036
4,United States of America,California,Los Angeles Lakers,34.043,-118.2673,106.506024,0.260731


In [14]:

#Create folium map
m_team = folium.Map(location=[39.8283, -98.5795], zoom_start=4, tiles='CartoDB positron')

#Define color scale for 3P%
colormap = cm.LinearColormap(
    colors=['orange', 'yellow', 'lightgreen', 'green'],
    vmin=team_summary['avg_3p_pct'].min(),
    vmax=team_summary['avg_3p_pct'].max(),
    caption='Average 3-Point Accuracy (3P%)')

#Add team markers
for _, row in team_summary.iterrows():
    color = colormap(row['avg_3p_pct'])
    folium.CircleMarker(
        location=[row['latitude'], row['longitude']],
        radius=row['avg_3pa'] / 10,  # scaled by average attempts
        color=color,
        fill=True,
        fill_color=color,
        fill_opacity=0.7,
        popup=(
            f"<b>{row['team_full']}</b><br>"
            f"{row['state']}, {row['country']}<br>"
            f"Avg 3PA: {row['avg_3pa']:.1f}<br>"
            f"Avg 3P%: {row['avg_3p_pct']:.2%}")).add_to(m_team)

#Add color scale legend
colormap.add_to(m_team)

m_team

### Team-Level 3-Point Accuracy and Attempt Distribution (2012–2024)

This map visualizes the **average 3-point accuracy (3P%)** and **attempt volume (3PA)** across all NBA teams in the United States and Canada between 2012 and 2024. Each circle represents one NBA franchise, positioned at its geographic coordinates.

- **Color intensity** reflects shooting accuracy— green tones indicate higher 3-point efficiency, while yellow to orange tones show lower accuracy.
- **Circle size** represents average 3-point attempts per player— larger circles correspond to higher shot volumes.

From this map, we can see that:
- Teams like the **Golden State Warriors (California)**, **Houston Rockets (Texas)**, and **Toronto Raptors (Canada)** stand out with both high accuracy and attempt rates, illustrating their role in driving the “3-point revolution.”
- Teams in the Midwest and Southeast show moderate accuracy, suggesting more traditional offensive styles.
- The overall geographic spread highlights how the 3-point shot has become a **universal element of modern basketball**, transcending regional playstyles.

This visualization provides a clear snapshot of how each team contributes to the league’s long-term shift toward perimeter-oriented offense.

In [20]:
#Create small random offsets for teams in same state
np.random.seed(42)
team_summary['latitude_jitter'] = team_summary['latitude'] + np.random.uniform(-0.3, 0.3, len(team_summary))
team_summary['longitude_jitter'] = team_summary['longitude'] + np.random.uniform(-0.3, 0.3, len(team_summary))

#Rebuild the map with jittered coordinates
m_team_jitter = folium.Map(location=[39.8283, -98.5795], zoom_start=4, tiles='CartoDB positron')

colormap = cm.LinearColormap(
    colors=['orange', 'yellow', 'lightgreen', 'green'],
    vmin=team_summary['avg_3p_pct'].min(),
    vmax=team_summary['avg_3p_pct'].max(),
    caption='Average 3-Point Accuracy (3P%)')

for _, row in team_summary.iterrows():
    color = colormap(row['avg_3p_pct'])
    folium.CircleMarker(
        location=[row['latitude_jitter'], row['longitude_jitter']],
        radius=row['avg_3pa'] / 10,
        color=color,
        fill=True,
        fill_color=color,
        fill_opacity=0.7,
        popup=(
            f"<b>{row['team_full']}</b><br>"
            f"{row['state']}, {row['country']}<br>"
            f"Avg 3PA: {row['avg_3pa']:.1f}<br>"
            f"Avg 3P%: {row['avg_3p_pct']:.2%}")).add_to(m_team_jitter)

colormap.add_to(m_team_jitter)
m_team_jitter

### Adjusted Team-Level Map: Resolving City Overlaps

Because some states and metropolitan areas (such as **California**, **Texas**, and **New York**) host multiple NBA teams, their coordinates originally overlapped on the previous map — making it difficult to distinguish franchises in the same city.

This updated visualization adds a **small coordinate offset (“jitter”)** to each team’s location.  
While the map remains geographically accurate, the jitter separates teams slightly to improve visual clarity and allow each franchise’s 3-point profile to be seen individually.

- Circles now appear as **distinct clusters** in multi-team markets such as Los Angeles (Lakers, Clippers) and New York (Knicks, Nets).
- The **color scale** still represents 3-point shooting accuracy (3P%), while **circle size** continues to show average attempt volume (3PA).

This refined version emphasizes the **regional clustering of 3-point performance**, illustrating how certain markets dominate perimeter play while others remain more balanced or conservative in shot selection.

In [23]:
path2 = '/Users/shantamaraj/Desktop/Basketball Analytics/Visualizations/'

m.save(f"{path2}nba_state_3point_distribution.html")
m_team.save(f"{path2}nba_team_3point_accuracy.html")
m_team_jitter.save(f"{path2}nba_team_3point_jittered.html")