In [1]:
import requests
import pandas as pd

# Initialize a list to store data frames for each season and season type
dfs = []

# Loop through the seasons from a starting year up to 2023
for year in range(2019, 2025):
    # Define the season types to loop through, normally both 2 and 3
    season_types = [2, 3]
    
    # If the year is 2024, only look for season_type 2
    if year == 2024:
        season_types = [2]

    # Loop through the defined season types
    for season_type in season_types:
        # Modify the API URL to include the current year and season type
        url = f"https://site.web.api.espn.com/apis/common/v3/sports/basketball/nba/statistics/byathlete?region=us&lang=en&contentorigin=espn&isqualified=true&page=1&limit=350&sort=offensive.avgPoints:desc&season={year}&seasontype={season_type}"

        # Fetch the data from the API
        response = requests.get(url)
        data = response.json()

        # Check if 'athletes' key exists in the response
        if 'athletes' in data:
            athletes = data['athletes']
        else:
            print(f"No athlete data available for season {year} and season type {season_type}.")
            continue

        # Process the data
        athletes = data['athletes']

        category_map = {}

        for category in data['categories']:
            category_map[category['name']] = category['displayNames']

        athlete_map = {}

        for athlete in athletes:
            athlete_stats = {}

            position = athlete['athlete'].get('position', {}).get('abbreviation', 'N/A')
            athlete_map[athlete['athlete']['displayName']] = athlete_stats
            athlete_stats['Position'] = position  
            for category in athlete['categories']:
                category_name = category['name']
                category_display_name = category_map.get(category_name, ['Unknown']) 
                for i in range(len(category['totals'])):
                    athlete_stats[category_display_name[i]] = category['totals'][i]  
        
        # Create a DataFrame for the current year and season type
        df = pd.DataFrame.from_dict(athlete_map, orient='index')
        df['Year'] = year
        df['SeasonType'] = 'Regular Season' if season_type == 2 else 'Postseason'
        
        # Append the DataFrame to the list
        dfs.append(df)

# Concatenate all DataFrames into a single DataFrame
final_df = pd.concat(dfs, ignore_index=True)


Welcome to the Glue Interactive Sessions Kernel
For more information on available magic commands, please type %help in any new cell.

Please view our Getting Started page to access the most up-to-date information on the Interactive Sessions kernel: https://docs.aws.amazon.com/glue/latest/dg/interactive-sessions.html
Installed kernel version: 1.0.4 
Trying to create a Glue session for the kernel.
Session Type: glueetl
Worker Type: G.1X
Number of Workers: 5
Session ID: dab69c04-835e-49df-a796-590d67e1759d
Applying the following default arguments:
--glue_kernel_version 1.0.4
--enable-glue-datacatalog true
Waiting for session dab69c04-835e-49df-a796-590d67e1759d to get into ready status...
Session dab69c04-835e-49df-a796-590d67e1759d has been created.



In [2]:
final_df.columns

Index(['Position', 'Games Played', 'Minutes Per Game', 'Fouls Per Game',
       'Flagrant Fouls', 'Technical Fouls', 'Ejections', 'Double Double',
       'Triple Double', 'Minutes', 'Rebounds', 'Fouls', 'Rebounds Per Game',
       'Points Per Game', 'Average Field Goals Made',
       'Average Field Goals Attempted', 'Field Goal Percentage',
       'Average 3-Point Field Goals Made',
       'Average 3-Point Field Goals Attempted',
       '3-Point Field Goal Percentage', 'Average Free Throws Made',
       'Average Free Throws Attempted', 'Free Throw Percentage',
       'Assists Per Game', 'Turnovers Per Game', 'Points', 'Field Goals Made',
       'Field Goals Attempted', '3-Point Field Goals Made',
       '3-Point Field Goals Attempted', 'Free Throws Made',
       'Free Throws Attempted', 'Assists', 'Turnovers', 'Steals Per Game',
       'Blocks Per Game', 'Steals', 'Blocks', 'Year', 'SeasonType'],
      dtype='object')


In [3]:
import pandas as pd
import boto3
from io import StringIO
from datetime import datetime

# Convert the DataFrame to a CSV string
csv_buffer = StringIO()
final_df.to_csv(csv_buffer)

# Get the current date to name the folder
date_str = datetime.now().strftime('%Y-%m-%d')
file_name = f"{date_str}/daily_stats.csv"

# Bucket name
bucket_name = 'nbagraphssamsean'

# Initialize a session using boto3
s3_resource = boto3.resource('s3')

# Upload the CSV file to S3
s3_resource.Object(bucket_name, file_name).put(Body=csv_buffer.getvalue())

print(f"File successfully uploaded to s3://{bucket_name}/{file_name}")

File successfully uploaded to s3://nbagraphssamsean/2024-03-13/daily_stats.csv


In [4]:
from io import BytesIO
import boto3
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# Convert 'Field Goal Percentage' to numeric, coercing errors to NaN and then dropping them
final_df['Field Goal Percentage'] = pd.to_numeric(final_df['Field Goal Percentage'], errors='coerce')
final_df.dropna(subset=['Field Goal Percentage'], inplace=True)

# Define the position names mapping
position_names = {
    'PG': 'Point Guard',
    'SG': 'Shooting Guard',
    'SF': 'Small Forward',
    'PF': 'Power Forward',
    'C': 'Center',
    'F': 'Forward',
    'G': 'Guard'
}

# Map the 'Position' to 'Position Full Name'
final_df['Position Full Name'] = final_df['Position'].map(position_names)

# Define the desired order for the positions
position_order = ['Point Guard', 'Shooting Guard', 'Small Forward', 'Power Forward', 'Center', 'Guard', 'Forward']

# Convert the 'Position Full Name' column to a categorical type with the specified order
final_df['Position Full Name'] = pd.Categorical(final_df['Position Full Name'], categories=position_order, ordered=True)

g = sns.FacetGrid(final_df, col='Year', col_wrap=3, sharex=False, sharey=True, height=5, aspect=1.5)
g.map_dataframe(sns.boxplot, x='Position Full Name', y='Field Goal Percentage', palette='Set2', order=position_order)

# Additional formatting to enhance readability
g.set_titles(col_template='Year: {col_name}')
g.set_axis_labels('Position', 'Field Goal Percentage')
g.set_xticklabels(rotation=45)

# Adjust the spacing between plots
g.fig.subplots_adjust(hspace=0.4, wspace=0.4) 

buf = BytesIO() 
plt.savefig(buf, format='png', bbox_inches='tight')
buf.seek(0)
s3 = boto3.client('s3')

object_name = f'{date_str}/field_goal_percentage_by_position_yearly.png'
s3.upload_fileobj(buf, bucket_name, object_name)

buf.close()  

print(f"Plot successfully uploaded to s3://{bucket_name}/{object_name}")


Plot successfully uploaded to s3://nbagraphssamsean/2024-03-13/field_goal_percentage_by_position_yearly.png


In [5]:
final_df['3-Point Field Goal Percentage'] = pd.to_numeric(final_df['3-Point Field Goal Percentage'], errors='coerce')
final_df.dropna(subset=['3-Point Field Goal Percentage'], inplace=True)

g = sns.FacetGrid(final_df, col='Year', col_wrap=3, sharex=False, sharey=True, height=5, aspect=1.5)
g.map_dataframe(sns.boxplot, x='Position Full Name', y='3-Point Field Goal Percentage', palette='Set2', order=position_order)

# Additional formatting to enhance readability
g.set_titles(col_template='Year: {col_name}')
g.set_axis_labels('Position', '3-Point Field Goal Percentage')
g.set_xticklabels(rotation=45)

# Adjust the spacing between plots
g.fig.subplots_adjust(hspace=0.4, wspace=0.4)  # Adjust the spacing between plots

buf = BytesIO() 
plt.savefig(buf, format='png', bbox_inches='tight')
buf.seek(0)
s3 = boto3.client('s3')

object_name = f'{date_str}/3-Point_Field Goal_Percentage_by_position_yearly.png'
s3.upload_fileobj(buf, bucket_name, object_name)

buf.close()  

print(f"Plot successfully uploaded to s3://{bucket_name}/{object_name}")

Plot successfully uploaded to s3://nbagraphssamsean/2024-03-13/3-Point_Field Goal_Percentage_by_position_yearly.png


In [6]:
final_df['Free Throw Percentage'] = pd.to_numeric(final_df['Free Throw Percentage'], errors='coerce')
final_df.dropna(subset=['Free Throw Percentage'], inplace=True)

g = sns.FacetGrid(final_df, col='Year', col_wrap=3, sharex=False, sharey=True, height=5, aspect=1.5)
g.map_dataframe(sns.boxplot, x='Position Full Name', y='Free Throw Percentage', palette='Set2', order=position_order)

# Additional formatting to enhance readability
g.set_titles(col_template='Year: {col_name}')
g.set_axis_labels('Position', 'Free Throw Percentage')
g.set_xticklabels(rotation=45)

# Adjust the spacing between plots
g.fig.subplots_adjust(hspace=0.4, wspace=0.4)  # Adjust the spacing between plots


buf = BytesIO() 
plt.savefig(buf, format='png', bbox_inches='tight')
buf.seek(0)
s3 = boto3.client('s3')

object_name = f'{date_str}/Free_Throw Percentage_by_position_yearly.png'
s3.upload_fileobj(buf, bucket_name, object_name)

buf.close()  

print(f"Plot successfully uploaded to s3://{bucket_name}/{object_name}")

Plot successfully uploaded to s3://nbagraphssamsean/2024-03-13/Free_Throw Percentage_by_position_yearly.png


In [7]:
final_df['Steals Per Game'] = pd.to_numeric(final_df['Steals Per Game'], errors='coerce')
final_df.dropna(subset=['Steals Per Game'], inplace=True)

g = sns.FacetGrid(final_df, col='Year', col_wrap=3, sharex=False, sharey=True, height=5, aspect=1.5)
g.map_dataframe(sns.boxplot, x='Position Full Name', y='Steals Per Game', palette='Set2', order=position_order)

# Additional formatting to enhance readability
g.set_titles(col_template='Year: {col_name}')
g.set_axis_labels('Position', 'Steals Per Game')
g.set_xticklabels(rotation=45)

# Adjust the spacing between plots
g.fig.subplots_adjust(hspace=0.4, wspace=0.4)  # Adjust the spacing between plots

buf = BytesIO()
plt.savefig(buf, format='png', bbox_inches='tight')
buf.seek(0)
s3 = boto3.client('s3')

object_name = f'{date_str}/Steals_Per_Game_by_position_yearly.png'
s3.upload_fileobj(buf, bucket_name, object_name)

buf.close()  

print(f"Plot successfully uploaded to s3://{bucket_name}/{object_name}")

Plot successfully uploaded to s3://nbagraphssamsean/2024-03-13/Steals_Per_Game_by_position_yearly.png


In [8]:
final_df['Blocks Per Game'] = pd.to_numeric(final_df['Blocks Per Game'], errors='coerce')
final_df.dropna(subset=['Blocks Per Game'], inplace=True)

g = sns.FacetGrid(final_df, col='Year', col_wrap=3, sharex=False, sharey=True, height=5, aspect=1.5)
g.map_dataframe(sns.boxplot, x='Position Full Name', y='Blocks Per Game', palette='Set2', order=position_order)

# Additional formatting to enhance readability
g.set_titles(col_template='Year: {col_name}')
g.set_axis_labels('Position', 'Blocks Per Game')
g.set_xticklabels(rotation=45)

# Adjust the spacing between plots
g.fig.subplots_adjust(hspace=0.4, wspace=0.4)  # Adjust the spacing between plots

buf = BytesIO()
plt.savefig(buf, format='png', bbox_inches='tight')
buf.seek(0)
s3 = boto3.client('s3')

object_name = f'{date_str}/Blocks_Per_Game_by_position_yearly.png'
s3.upload_fileobj(buf, bucket_name, object_name)

buf.close()  

print(f"Plot successfully uploaded to s3://{bucket_name}/{object_name}")

Plot successfully uploaded to s3://nbagraphssamsean/2024-03-13/Blocks_Per_Game_by_position_yearly.png


In [9]:
final_df['SeasonType'].unique()

array(['Regular Season', 'Postseason'], dtype=object)


In [10]:
g = sns.FacetGrid(final_df, col='SeasonType', col_wrap=2, sharex=False, sharey=True, height=5, aspect=1.5)
g.map_dataframe(sns.boxplot, x='Position Full Name', y='Field Goal Percentage', palette='Set2', order=position_order)

# Additional formatting to enhance readability
g.set_titles(col_template='{col_name}')
g.set_axis_labels('Position', 'Field Goal Percentage')
g.set_xticklabels(rotation=45)

# Adjust the spacing between plots
g.fig.subplots_adjust(hspace=0.4, wspace=0.4)  # Adjust the spacing between plots

buf = BytesIO()
plt.savefig(buf, format='png', bbox_inches='tight')
buf.seek(0)
s3 = boto3.client('s3')

object_name = f'{date_str}/field_goal_percentage_by_position_seasonal.png'
s3.upload_fileobj(buf, bucket_name, object_name)

buf.close()  

print(f"Plot successfully uploaded to s3://{bucket_name}/{object_name}")

Plot successfully uploaded to s3://nbagraphssamsean/2024-03-13/field_goal_percentage_by_position_seasonal.png


In [11]:
g = sns.FacetGrid(final_df, col='SeasonType', col_wrap=2, sharex=False, sharey=True, height=5, aspect=1.5)
g.map_dataframe(sns.boxplot, x='Position Full Name', y='3-Point Field Goal Percentage', palette='Set2', order=position_order)

# Additional formatting to enhance readability
g.set_titles(col_template='{col_name}')
g.set_axis_labels('Position', '3-Point Field Goal Percentage')
g.set_xticklabels(rotation=45)

# Adjust the spacing between plots
g.fig.subplots_adjust(hspace=0.4, wspace=0.4)  # Adjust the spacing between plots

buf = BytesIO() 
plt.savefig(buf, format='png', bbox_inches='tight')
buf.seek(0)
s3 = boto3.client('s3')

object_name = f'{date_str}/3-Point_Field_Goal_Percentage_by_position_seasonal.png'
s3.upload_fileobj(buf, bucket_name, object_name)

buf.close()  

print(f"Plot successfully uploaded to s3://{bucket_name}/{object_name}")

Plot successfully uploaded to s3://nbagraphssamsean/2024-03-13/3-Point_Field_Goal_Percentage_by_position_seasonal.png


In [12]:
g = sns.FacetGrid(final_df, col='SeasonType', col_wrap=2, sharex=False, sharey=True, height=5, aspect=1.5)
g.map_dataframe(sns.boxplot, x='Position Full Name', y='Steals Per Game', palette='Set2', order=position_order)

# Additional formatting to enhance readability
g.set_titles(col_template='{col_name}')
g.set_axis_labels('Position', 'Steals Per Game')
g.set_xticklabels(rotation=45)

# Adjust the spacing between plots
g.fig.subplots_adjust(hspace=0.4, wspace=0.4)  # Adjust the spacing between plots

buf = BytesIO()
plt.savefig(buf, format='png', bbox_inches='tight')
buf.seek(0)
s3 = boto3.client('s3')

object_name = f'{date_str}/Steals_Per_Game_by_position_seasonal.png'
s3.upload_fileobj(buf, bucket_name, object_name)

buf.close()  

print(f"Plot successfully uploaded to s3://{bucket_name}/{object_name}")

Plot successfully uploaded to s3://nbagraphssamsean/2024-03-13/Steals_Per_Game_by_position_seasonal.png


In [13]:
g = sns.FacetGrid(final_df, col='SeasonType', col_wrap=2, sharex=False, sharey=True, height=5, aspect=1.5)
g.map_dataframe(sns.boxplot, x='Position Full Name', y='Free Throw Percentage', palette='Set2', order=position_order)

# Additional formatting to enhance readability
g.set_titles(col_template='{col_name}')
g.set_axis_labels('Position', 'Free Throw Percentage')
g.set_xticklabels(rotation=45)

# Adjust the spacing between plots
g.fig.subplots_adjust(hspace=0.4, wspace=0.4)  # Adjust the spacing between plots

buf = BytesIO()
plt.savefig(buf, format='png', bbox_inches='tight')
buf.seek(0)
s3 = boto3.client('s3')

object_name = f'{date_str}/Free_Throw_Percentage_by_position_seasonal.png'
s3.upload_fileobj(buf, bucket_name, object_name)

buf.close()  

print(f"Plot successfully uploaded to s3://{bucket_name}/{object_name}")

Plot successfully uploaded to s3://nbagraphssamsean/2024-03-13/Free_Throw_Percentage_by_position_seasonal.png


In [14]:
g = sns.FacetGrid(final_df, col='SeasonType', col_wrap=2, sharex=False, sharey=True, height=5, aspect=1.5)
g.map_dataframe(sns.boxplot, x='Position Full Name', y='Blocks Per Game', palette='Set2', order=position_order)

# Additional formatting to enhance readability
g.set_titles(col_template='{col_name}')
g.set_axis_labels('Position', 'Blocks Per Game')
g.set_xticklabels(rotation=45)

# Adjust the spacing between plots
g.fig.subplots_adjust(hspace=0.4, wspace=0.4)  # Adjust the spacing between plots

buf = BytesIO()
plt.savefig(buf, format='png', bbox_inches='tight')
buf.seek(0)
s3 = boto3.client('s3')

object_name = f'{date_str}/Blocks_Per_Game_by_position_seasonal.png'
s3.upload_fileobj(buf, bucket_name, object_name)

buf.close()  

print(f"Plot successfully uploaded to s3://{bucket_name}/{object_name}")

Plot successfully uploaded to s3://nbagraphssamsean/2024-03-13/Blocks_Per_Game_by_position_seasonal.png
