<a href="https://colab.research.google.com/github/rossl18/rossl18.github.io/blob/main/BasketballAnimations.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#Dataset: https://www.kaggle.com/datasets/robertsunderhaft/nba-playoffs
# Import necessary libraries
import pandas as pd  # For data manipulation
import plotly.express as px  # For plots

# Read the CSV file into a pandas DataFrame
df = pd.read_csv('sample_data/playoffStats.csv')

# Select only the columns we're interested in for analysis
df = df[['season', 'fg3a_per_g', 'pts_per_g', 'fta_per_g', 'efg_pct']]

# Drop rows with missing values to ensure clean data for plotting
df.dropna(inplace=True)

# Function to create an animated histogram for a given column over multiple seasons
def plot_animated_histogram(data, x_column, title):
    """
    Parameters:
    data: pandas DataFrame containing the data to plot
    x_column: str, the column in the DataFrame to plot on the x-axis
    title: str, the title of the plot

    The function creates an animated histogram, with each frame representing
    a different season. The histogram will show the distribution of the values
    in x_column for each season.
    """
    fig = px.histogram(data,
                       x=x_column,  # Column to plot on x-axis
                       animation_frame="season",  # Animate based on the 'season' column
                       range_x=[data[x_column].min(), data[x_column].max()],  # Set consistent x-axis range for all frames
                       nbins=30,  # Number of bins for the histogram
                       title=title)  # Title of the plot

    # Update the layout of the plot with appropriate axis labels
    fig.update_layout(yaxis_title='Count', xaxis_title=title)

    # Display the plot
    fig.show()

# Create animated histograms for different statistics
plot_animated_histogram(df, 'fg3a_per_g', '3PT Attempts per Game')  # Histogram for 3-point attempts per game
plot_animated_histogram(df, 'pts_per_g', 'Total Points per Game')  # Histogram for points per game
plot_animated_histogram(df, 'fta_per_g', 'Free Throw Attempts per Game')  # Histogram for free throw attempts per game
plot_animated_histogram(df, 'efg_pct', 'Effective FG %')  # Histogram for effective field goal percentage



In [None]:
import pandas as pd
import plotly.express as px

# Load and preprocess the data
def load_data(file_path):
    """
    Loads the CSV file and selects relevant columns.

    Parameters:
    file_path: str, path to the CSV file

    Returns:
    A pandas DataFrame with selected columns and no missing values.
    """
    df = pd.read_csv(file_path)
    # Select specific columns for analysis
    df = df[['season', 'fg3a_per_g', 'fg3_pct', 'ft_pct', 'fg3_per_g']]
    # Drop rows with missing values
    df.dropna(inplace=True)
    return df

# Function to create an animated scatter plot
def create_animated_scatter(df, x_column, y_column, size_column, color_column, title):
    """
    Creates an animated scatter plot with size and color dimensions.

    Parameters:
    df: pandas DataFrame, input data for plotting
    x_column: str, the column to be plotted on the x-axis
    y_column: str, the column to be plotted on the y-axis
    size_column: str, the column used to size the points
    color_column: str, the column used to color the points
    title: str, title of the plot

    Returns:
    A Plotly figure object.
    """
    fig = px.scatter(
        df,
        x=x_column,
        y=y_column,
        animation_frame='season',  # Animate by 'season'
        animation_group=y_column,  # Group animations by y_column values
        size=size_column,  # Size points based on 'size_column'
        color=color_column,  # Color points based on 'color_column'
        hover_name=y_column,  # Hover tooltip shows 'y_column'
        range_x=[df[x_column].min(), df[x_column].max()],  # Set x-axis range
        range_y=[df[y_column].min(), df[y_column].max()],  # Set y-axis range
        color_continuous_scale=px.colors.sequential.Plasma,  # Use a color scale for the points
        labels={x_column: '3PT Attempts per Game',
                y_column: '3PT Made per Game',
                size_column: 'Free Throw %',
                color_column: '3PT %'}  # Custom labels for better readability
    )

    # Update layout with a custom title
    fig.update_layout(title=title)
    return fig

# Load data
df = load_data('sample_data/playoffStats.csv')

# Create animated scatter plot
fig = create_animated_scatter(
    df,
    x_column='fg3a_per_g',
    y_column='fg3_per_g',
    size_column='ft_pct',
    color_column='fg3_pct',
    title='NBA Playoff Player Performance Over Years'
)

# Show the figure
fig.show()



In [None]:
import pandas as pd
import plotly.express as px

# Load and preprocess the data
def load_and_clean_data(file_path, columns, drop_columns):
    """
    Loads the CSV file and selects relevant columns, dropping rows with missing values.

    Parameters:
    file_path: str, path to the CSV file
    columns: list, columns to select for analysis
    drop_columns: list, columns to check for missing values and drop if NaN

    Returns:
    A cleaned pandas DataFrame with no missing values in the specified columns.
    """
    df = pd.read_csv(file_path)
    print("Missing values per column before cleaning:\n", df[columns].isna().sum())  # Show missing values

    # Select specific columns for analysis
    df = df[columns]

    # Drop rows with missing values in the specified columns
    df.dropna(subset=drop_columns, inplace=True)

    return df

# Function to create an animated scatter plot with position-based coloring
def create_position_based_scatter(df, x_column, y_column, size_column, color_column, title):
    """
    Creates an animated scatter plot grouped by player positions.

    Parameters:
    df: pandas DataFrame, input data for plotting
    x_column: str, the column to be plotted on the x-axis
    y_column: str, the column to be plotted on the y-axis
    size_column: str, the column used to size the points
    color_column: str, the column used to color the points (in this case, player position)
    title: str, title of the plot

    Returns:
    A Plotly figure object.
    """
    # Create a scatter plot with position-based coloring and animations per season
    fig = px.scatter(
        df,
        x=x_column,  # X-axis: 3-point attempts per game
        y=y_column,  # Y-axis: 3-point percentage
        animation_frame='season',  # Animate over seasons
        animation_group=y_column,  # Group animations by 3PT%
        size=size_column,  # Size points based on free throw percentage
        color=color_column,  # Color points based on player positions
        hover_name=y_column,  # Hover to show 3PT percentage
        range_x=[df[x_column].min(), df[x_column].max()],  # X-axis range based on data
        range_y=[df[y_column].min(), df[y_column].max()],  # Y-axis range based on data
        color_discrete_map={'C': 'blue', 'PG': 'red', 'SG': 'green', 'SF': 'purple', 'PF': 'orange'},  # Custom colors for positions
        labels={x_column: '3PT Attempts per Game',
                y_column: '3PT%',
                size_column: 'Free Throw %',
                color_column: 'Position'}  # Custom labels for better readability
    )

    # Update the layout with a title
    fig.update_layout(title=title)

    return fig

# Load and clean the data
columns = ['season', 'fg3a_per_g', 'fg3_pct', 'ft_pct', 'fg3_per_g', 'pos']  # Columns to select
drop_columns = ['fg3a_per_g', 'fg3_pct', 'ft_pct', 'fg3_per_g', 'pos']  # Columns to drop if NaN
df = load_and_clean_data('playoffStats.csv', columns, drop_columns)

# Create the animated scatter plot
fig = create_position_based_scatter(
    df,
    x_column='fg3a_per_g',  # X-axis: 3-point attempts per game
    y_column='fg3_pct',  # Y-axis: 3-point percentage
    size_column='ft_pct',  # Size points by free throw percentage
    color_column='pos',  # Color points by player position
    title='NBA Playoff Player Performance Over Years by Position'  # Title of the plot
)

# Show the plot
fig.show()


FileNotFoundError: [Errno 2] No such file or directory: 'playoffStats.csv'

In [1]:
import pandas as pd
import plotly.express as px

# Load the dataset and prepare it
df = pd.read_csv('sample_data/playoffStats.csv')
df = df[['season', 'fg3a_per_g', 'fg3_pct', 'age', 'pos']]  # Select relevant columns

# Drop any rows with missing data in the important columns
df.dropna(subset=['fg3a_per_g', 'fg3_pct', 'age', 'pos'], inplace=True)

# Sort by season to ensure smooth animations in the scatter plot
df.sort_values('season', ascending=True, inplace=True)

# This is the function you've created, which generates a scatter plot
def create_position_based_scatter(df, x_column, y_column, size_column, color_column, title):
    """
    This function creates a scatter plot based on the input DataFrame and selected columns.
    It includes animation by season and colors the points based on the chosen column.

    Parameters:
    - df: pandas DataFrame
    - x_column: column name for the x-axis
    - y_column: column name for the y-axis
    - size_column: column name for sizing the points
    - color_column: column name for coloring the points
    - title: title of the plot
    """
    fig = px.scatter(
        df,
        x=x_column,  # Set x-axis using the provided x_column
        y=y_column,  # Set y-axis using the provided y_column
        animation_frame='season',  # Animate over the seasons
        animation_group=size_column,  # Group by the size column for smooth transitions
        size=size_column,  # Use the size_column to size the points
        color=color_column,  # Color the points based on the provided color_column
        hover_name=color_column,  # Display the hover information
        range_x=[df[x_column].min(), df[x_column].max()],  # Set the x-axis range
        range_y=[df[y_column].min(), df[y_column].max()],  # Set the y-axis range
        labels={x_column: x_column, y_column: y_column, color_column: color_column},  # Custom labels for axes
    )

    # Customize the layout of the plot
    fig.update_layout(
        title=title,  # Set the plot title
        paper_bgcolor='rgba(245, 246, 250, 1)',  # Set background color for the plot area
        plot_bgcolor='rgba(245, 246, 250, 1)',   # Set background color for the chart area
        font=dict(family="Arial, sans-serif", size=12, color="RebeccaPurple")  # Customize font
    )

    # Display the plot
    fig.show()

# Example usage of the function with the desired parameters
fig = create_position_based_scatter(
    df,
    x_column='fg3a_per_g',  # X-axis: 3-point attempts per game
    y_column='fg3_pct',     # Y-axis: 3-point percentage
    size_column='fg3_pct',  # Size points based on 3-point percentage
    color_column='pos',     # Color points based on player position
    title='NBA Playoff Player 3-Point Performance Over Years'  # Title for the plot
)



FileNotFoundError: [Errno 2] No such file or directory: 'sample_data/playoffStats.csv'

In [4]:
import pandas as pd
import plotly.express as px

# Load the dataset and prepare it
df = pd.read_csv('playoffStats.csv')
df = df[['season', 'fg3a_per_g', 'fg3_pct', 'age', 'pos']]  # Select relevant columns

# Drop any rows with missing data in the important columns
df.dropna(subset=['fg3a_per_g', 'fg3_pct', 'age', 'pos'], inplace=True)

# Sort by season to ensure smooth animations in the scatter plot
df.sort_values('season', ascending=True, inplace=True)

fig = create_position_based_scatter(
    df,
    x_column='fg3a_per_g',  # X-axis: 3-point attempts per game
    y_column='fg3_pct',     # Y-axis: 3-point percentage
    size_column='fg3_pct',  # Size points based on 3-point percentage
    color_column='pos',     # Color points based on player position
    title='NBA Playoff Player 3-Point Performance Over Years'  # Title for the plot
)
