In [9]:
import pandas as pd

# NOTE THAT THIS IS FOR REGULAR SEASON AND PLAYOFFS AND FREE THROWS IS NOT RECORDED
df = pd.read_csv("PLAYOFFS_SEASON_CEBL_SHOT_DATA.csv")
df.head()

Unnamed: 0.1,Unnamed: 0,r,x,y,p,pno,tno,per,perType,actionType,actionNumber,previousAction,subType,player,shirtNumber,season,team_name
0,0,0,26.83,40.98,14,14,1,1,REGULAR,2pt,7,,jumpshot,T. Daniels,30,2019,Edmonton Stingers
1,1,0,19.030001,22.049999,4,4,1,1,REGULAR,2pt,11,,jumpshot,M. Gueye,3,2019,Edmonton Stingers
2,2,0,9.05,20.639999,1,1,1,1,REGULAR,2pt,15,,jumpshot,X. Moon,2,2019,Edmonton Stingers
3,3,0,33.849998,76.849998,2,2,1,1,REGULAR,3pt,17,,jumpshot,A. Ellis,12,2019,Edmonton Stingers
4,4,0,5.93,47.189999,2,2,1,1,REGULAR,2pt,21,,layup,A. Ellis,12,2019,Edmonton Stingers


In [10]:
df_transformed = df.copy()
# rename team_name column to jsut team

df_transformed["x"] = df_transformed["x"].apply(lambda x: 100 - x if x > 50 else x)
df_transformed["y"] = df_transformed.apply(
    lambda row: 100 - row["y"] if row["x"] > 50 else row["y"], axis=1
)

df_transformed = df_transformed[
    [
        "r",
        "x",
        "y",
        "per",
        "actionType",
        "subType",
        "player",
        "shirtNumber",
        "season",
        "team_name",
    ]
]


In [11]:
from mplbasketball.utils import transform

SCALE_X = 0.94
SCALE_Y = 0.5

ORIENTATION = "vu"
ORIGIN = "bottom-left"

df_transformed["x"] = df_transformed["x"] * SCALE_X
df_transformed["y"] = df_transformed["y"] * SCALE_Y

df_transformed["x"], df_transformed["y"] = transform(
    df_transformed["x"],  # type: ignore
    df_transformed["y"],  # type: ignore
    origin=ORIGIN,
    fr="hl",
    to=ORIENTATION,  # type: ignore
)

In [12]:
import numpy as np

X_START, X_END = -50, 0
Y_START, Y_END = 48, 94

TOP_OF_KEY_RADIUS = -0.1


def generate_smooth_arc(
    start_x: float,
    start_y: float,
    end_x: float,
    end_y: float,
    center_x: float,
    center_y: float,
    radius: float,
    num_points: int = 100,
) -> list[tuple[float, float]]:
    """
    Generates a smooth arc from (start_x, start_y) to (end_x, end_y) using the given center and radius.

    Parameters:
    - start_x, start_y: Coordinates of the start point of the arc
    - end_x, end_y: Coordinates of the end point of the arc
    - center_x, center_y: Center of the arc
    - radius: Radius of the arc
    - num_points: Number of points for smoothness

    Returns:
    - List of (x, y) coordinates forming the arc
    """
    # Compute the angle of start and end points relative to the center
    theta_start = np.arctan2(start_y - center_y, start_x - center_x)
    theta_end = np.arctan2(end_y - center_y, end_x - center_x)

    # Ensure the arc follows the correct direction
    if theta_start > theta_end:
        theta_start, theta_end = theta_end, theta_start  # Swap angles if needed

    # Generate angles for the arc (ensuring smooth curvature)
    theta = np.linspace(theta_start, theta_end, num_points)

    # Compute the arc points using the circle equation
    arc_x = center_x + radius * np.cos(theta)
    arc_y = center_y + radius * np.sin(theta)

    arc_points = [(start_x, start_y)] + list(zip(arc_x, arc_y)) + [(end_x, end_y)]

    return arc_points


top_key_arc = generate_smooth_arc(
    start_x=X_START + 17,
    start_y=Y_END - 27,
    end_x=X_END - 17,
    end_y=Y_END - 27,
    center_x=-25,
    center_y=Y_END - 29,
    radius=-0.5,
)

top_of_key_midrange_arc = generate_smooth_arc(
    start_x=X_START + 17 - 6.7,
    start_y=Y_END - 25,
    end_x=X_END - 17 + 6.7,
    end_y=Y_END - 25,
    center_x=-25,
    center_y=Y_END - 29,
    radius=-0.4,
)

zone_map: dict[str, dict[str, list[tuple[float, float]] | str]] = {
    "left_corner": {
        "coords": [
            (X_START, Y_END),
            (X_START + 3, Y_END),
            (X_START + 3, Y_END - 14),
            (X_START, Y_END - 14),
        ],
        "color": "green",
    },
    "left_short_corner": {
        "coords": [
            (X_START + 3, Y_END),
            (X_START + 17, Y_END),
            (X_START + 17, Y_END - 10),
            (-25, Y_END - 10),
            (
                X_START + 17 - 6,
                Y_END - 24,
            ),
            (X_START + 3, Y_END - 14),
        ],
        "color": "yellow",
    },
    "right_corner": {
        "coords": [
            (X_END - 3, Y_END),
            (X_END, Y_END),
            (X_END, Y_END - 14),
            (X_END - 3, Y_END - 14),
        ],
        "color": "green",
    },
    "right_short_corner": {
        "coords": [
            (X_END - 17, Y_END),
            (X_END - 3, Y_END),
            (X_END - 3, Y_END - 14),
            (X_END - 17 + 6, Y_END - 24),
            (-25, Y_END - 10),
            (X_END - 17, Y_END - 10),
        ],
        "color": "yellow",
    },
    "left_wing_three": {
        "coords": [
            (X_START, Y_END - 14),
            (X_START + 3, Y_END - 14),
            (X_START + 17, Y_END - 27),
            (X_START + 17 - 6, Y_START - 1),
            (X_START, Y_START - 2),
        ],
        "color": "orange",
    },
    "right_wing_three": {
        "coords": [
            (X_END - 17, Y_END - 27),
            (X_END - 3, Y_END - 14),
            (X_END, Y_END - 14),
            (X_END, Y_START - 2),
            (X_END - 17 + 6, Y_START - 1),
        ],
        "color": "orange",
    },
    "top_key_three": {
        "coords": [
            *top_key_arc,
            (X_END - 17 + 6, Y_START - 1),
            (X_START + 17 - 6, Y_START - 1),
        ],
        "color": "red",
    },
    "restricted_area": {
        "coords": [
            (X_START + 17, Y_END),
            (X_END - 17, Y_END),
            (X_END - 17, Y_END - 10),
            (X_START + 17, Y_END - 10),
        ],
        "color": "blue",
    },
    "top_key_midrange": {
        "coords": [
            *top_of_key_midrange_arc,
            (-25, Y_END - 9.5),
            # (X_START + 17 - 6.7, Y_END - 25),
            # (X_END - 17 + 6.7, Y_END - 25),
        ],
        "color": "black",
    },
}

In [13]:
import os

import pandas as pd
from shapely.geometry import Point, Polygon

# Create directory for zone top scorers

os.makedirs("zone_top_scorers", exist_ok=True)


# Create zone polygons

zone_polygons = {zone: Polygon(data["coords"]) for zone, data in zone_map.items()}


# Classify shots into zones

shot_zones = []

for _, row in df_transformed.iterrows():
    pt = Point(row["x"], row["y"])

    for zone_name, poly in zone_polygons.items():
        if poly.contains(pt):
            shot_zones.append(
                (
                    row["player"],
                    row["team_name"],
                    str(row["season"]),
                    zone_name,
                    row["r"],
                    row["actionType"],
                    row["subType"],
                )
            )

            break


# Create DataFrame with zone classifications

zone_df = pd.DataFrame(
    shot_zones,
    columns=["player", "team", "season", "zone", "made", "actionType", "subType"],
)


# Calculate points

zone_df["points"] = (
    zone_df.apply(lambda x: 3 if x["actionType"] == "3pt" else 2, axis=1)
    * zone_df["made"]
)


# Calculate stats by player, team, and zone

zone_stats = zone_df.groupby(["player", "team", "zone"], as_index=False).agg(
    {"points": "sum", "made": ["count", "sum"]}
)

zone_stats.columns = [
    "player",
    "team",
    "zone",
    "total_points",
    "total_attempts",
    "made_shots",
]

zone_stats["fg_percentage"] = (
    zone_stats["made_shots"] / zone_stats["total_attempts"]
).fillna(0)


teams = zone_stats["team"].unique()

zones = zone_stats["zone"].unique()


# Process and save team-specific top scorers

all_teams_zones_top_scorers = pd.DataFrame()


for team in teams:
    team_data = zone_stats[zone_stats["team"] == team]

    team_all_zones_top_scorers = pd.DataFrame()

    for zone in zones:
        team_zone_data = team_data[team_data["zone"] == zone]

        if len(team_zone_data) == 0:
            continue

        num_players = min(3, len(team_zone_data))

        top_scorers = team_zone_data.nlargest(num_players, "total_points")

        team_all_zones_top_scorers = pd.concat(
            [team_all_zones_top_scorers, top_scorers]
        )

        all_teams_zones_top_scorers = pd.concat(
            [all_teams_zones_top_scorers, top_scorers]
        )

    if len(team_all_zones_top_scorers) == 0:
        continue

    team_all_zones_top_scorers.sort_values(
        ["zone", "total_points"], ascending=[True, False], inplace=True
    )

    team_all_zones_top_scorers.to_csv(
        f"zone_top_scorers/{team.replace(' ', '_')}_zone_top_scorers.csv", index=False
    )


# Save all teams' zone top scorers

all_teams_zones_top_scorers.sort_values(
    ["team", "zone", "total_points"], ascending=[True, True, False], inplace=True
)

all_teams_zones_top_scorers.to_csv(
    "zone_top_scorers/all_teams_zone_top_scorers.csv", index=False
)


# Save top 3 scorers by zone regardless of team

# First aggregate player stats across all teams

player_zone_stats = zone_df.groupby(["player", "zone"], as_index=False).agg(
    {"points": "sum", "made": ["count", "sum"]}
)

player_zone_stats.columns = [
    "player",
    "zone",
    "total_points",
    "total_attempts",
    "made_shots",
]

player_zone_stats["fg_percentage"] = (
    player_zone_stats["made_shots"] / player_zone_stats["total_attempts"]
).fillna(0)


# Get player teams (can be multiple)

player_teams = zone_df.groupby(["player"], as_index=False)["team"].agg(
    lambda x: ", ".join(sorted(set(x)))
)


# Merge to add team info

player_zone_stats = pd.merge(player_zone_stats, player_teams, on="player")


overall_top_scorers = pd.DataFrame()

for zone in zones:
    zone_data = player_zone_stats[player_zone_stats["zone"] == zone]

    if len(zone_data) == 0:
        continue

    top_scorers = zone_data.nlargest(3, "total_points")[
        ["zone", "player", "team", "total_points"]
    ]

    overall_top_scorers = pd.concat([overall_top_scorers, top_scorers])


overall_top_scorers.sort_values(
    ["zone", "total_points"], ascending=[True, False], inplace=True
)

overall_top_scorers.to_csv("zone_top_scorers/overall_zone_top_scorers.csv", index=False)


In [None]:
import matplotlib.pyplot as plt
from matplotlib.axes import Axes
from mplbasketball import Court

ax: Axes

court_color = "#f4e4c1"  # Light beige (wood-like color for the court)
line_color = "#555555"  # Dark gray (for court lines)
court = Court(court_type="nba", origin=ORIGIN, units="ft")
fig, ax = plt.subplots(figsize=(18, 12))
court.draw(
    ax,
    court_color=court_color,
    line_color=line_color,
    line_width=0.3,
    orientation=ORIENTATION,
    showaxis=True,
)  # type: ignore


scatter_plot = ax.scatter(
    df_transformed["x"],
    df_transformed["y"],
    c=df_transformed["r"].map(
        {1: "green", 0: "red"}
    ),  # Green for makes, Red for misses
    s=10,  # Tiny dots
    zorder=2,  # Ensure scatter points appear above the court
)

# Add title
ax.set_title(
    "Scarborough Shooting Stars Career Shot Chart (2022-Current)",
    fontsize=14,
    color="#4B5320",
    fontweight="bold",
    fontfamily="sans-serif",
    loc="center",
)

In [None]:
scatter_plot.remove()


# Define different gridsizes for 2PT vs. 3PT shots
shot_gridsizes = {
    "2pt": (80, 70),  # Finer hexbin for mid-range & paint
    "3pt": (100, 80),  # Larger hexbin for three-pointers
}

# Define colormap
cmap = "coolwarm"


# Apply hexbin separately for 2PT and 3PT shots
for action_type, gridsize in shot_gridsizes.items():
    subset = df_transformed[
        df_transformed["actionType"] == action_type
    ]  # Filter shots by action type

    ax.hexbin(
        subset["x"],
        subset["y"],
        C=subset["r"],  # FG% per hex (1 for makes, 0 for misses)
        gridsize=gridsize,
        cmap=cmap,
        reduce_C_function=np.mean,  # Compute FG% in each hex
        mincnt=1,  # Only show bins with at least 1 attempt
        zorder=1,
    )

fig