This is the Python file for our plots.


The first one is for shots by position. 

In [16]:
import altair as alt
import pandas as pd


# Load the CSV file into a Pandas DataFrame
file_path = 'nba-late-game-shots.csv'
df = pd.read_csv(file_path)

# Filter the data for late-game shots
filtered_data = df[(df["QUARTER"] >= 4) & (df["MINS_LEFT"] <= 2)]

# Define the selection for interactivity with a cleaner label
position_selection = alt.selection_single(
    fields=["POSITION"],
    bind=alt.binding_select(
        options=filtered_data["POSITION"].unique(),
        name="Position: "  # Set the label explicitly
    )
)

# Create the chart
chart = alt.Chart(filtered_data).mark_circle(size=100).encode(
    x=alt.X("LOC_X:Q", scale=alt.Scale(domain=(-25, 25)), title="X Location"),
    y=alt.Y("LOC_Y:Q", scale=alt.Scale(domain=(0, 85)), title="Y Location"),
    color=alt.condition(
        alt.datum.EVENT_TYPE == "Made Shot",
        alt.value("green"),  # Made shots
        alt.value("red")     # Missed shots
    ),
    tooltip=["PLAYER_NAME", "EVENT_TYPE", "POSITION", "LOC_X", "LOC_Y"]
).properties(
    width=600,
    height=600,
    title="Shot Locations for Selected Position"
).add_selection(
    position_selection
).transform_filter(
    position_selection
)

# Save the chart to an HTML file
output_file_path = 'shot_locations_chart.html'
chart.save(output_file_path)

print(f"Chart has been saved to {output_file_path}")


  position_selection = alt.selection_single(
  chart = alt.Chart(filtered_data).mark_circle(size=100).encode(


Chart has been saved to shot_locations_chart.html


In [17]:
data = pd.read_csv('nba-late-game-shots.csv')
time_slider = alt.binding_range(min=0, max=60, step=1, name='Seconds Left')
time_selection = alt.param(value=60, bind=time_slider)

shot_filter = alt.binding_select(options=['Both', 'Made', 'Missed'], name='Shot Outcome')
shot_selection = alt.param(value='Both', bind=shot_filter)

distance_time = alt.Chart(data).mark_circle().add_params(
    time_selection,
    shot_selection
).encode(
    alt.X('LOC_X:Q', title='Horizontal Distance from Basket',
          scale=alt.Scale(domain=[-25, 25])),
    alt.Y('LOC_Y:Q', title='Distance From Baseline',
          scale=alt.Scale(domain=[0, 94])),
    alt.Color(
        'SHOT_MADE:N', 
        title='Shot Made',
        scale=alt.Scale(domain=[True, False], range=['red', 'blue'])
    ),
    alt.Tooltip(['PLAYER_NAME:N', 'GAME_DATE:N', 'SECS_LEFT:Q']),
    alt.OpacityValue(0.4)
).transform_filter(
    (alt.datum.SECS_LEFT <= time_selection) &
    ((shot_selection == 'Both') | 
     ((shot_selection == 'Made') & (alt.datum.SHOT_MADE == True)) |
     ((shot_selection == 'Missed') & (alt.datum.SHOT_MADE == False)))
).properties(
    width=250,
    height=470
).configure_axis(
    grid=False
)

shot_type_data = data.groupby('ACTION_TYPE').agg(
    attempts=('SHOT_MADE', 'count'),
    made=('SHOT_MADE', 'sum')
).reset_index()

shot_type_data['shooting_percentage'] = (shot_type_data['made'] / shot_type_data['attempts']) * 100

shot_types = alt.Chart(shot_type_data).mark_bar().encode(
    x=alt.X('ACTION_TYPE:N', title='Shot Type', sort='-y'),
    y=alt.Y('shooting_percentage:Q', title='Shooting Percentage', scale=alt.Scale(domain=[0, 100])),
    color=alt.Color('attempts:Q', scale=alt.Scale(scheme='reds'), title='Attempts'),
    tooltip=['ACTION_TYPE', 'shooting_percentage', 'attempts', 'made']
).properties(
    title='Shooting Percentage by Shot Type (Color by Attempts)',
    width=600,
    height=400
)

distance_time.save('distance_time.html')
shot_types.save('shot_types.html')


Dataset 4

In [18]:
import matplotlib.pyplot as plt
# Load the dataset
nba_late_game_shots = pd.read_csv('nba-late-game-shots.csv')

# Calculate total shots and successful shots by team
team_late_game_stats = nba_late_game_shots.groupby('TEAM_NAME').agg(
    total_shots=('SHOT_MADE', 'count'),
    successful_shots=('SHOT_MADE', 'sum')
).reset_index()

# Calculate shooting percentage
team_late_game_stats['shooting_percentage'] = (
    team_late_game_stats['successful_shots'] / team_late_game_stats['total_shots']
) * 100

# Plotting the data
plt.figure(figsize=(12, 8))
plt.scatter(team_late_game_stats['total_shots'], team_late_game_stats['shooting_percentage'], alpha=0.7)
plt.title("NBA Late-Game Shots: Total Shots vs. Shooting Percentage", fontsize=14)
plt.xlabel("Total Shots", fontsize=12)
plt.ylabel("Shooting Percentage (%)", fontsize=12)
plt.grid(True, linestyle='--', alpha=0.5)

# Annotate team names
for _, row in team_late_game_stats.iterrows():
    plt.text(row['total_shots'], row['shooting_percentage'], row['TEAM_NAME'], fontsize=8, alpha=0.7)

plt.show()
plt.savefig('nba_late_game_shots_by_team.png')


ModuleNotFoundError: No module named 'matplotlib'