# Code To Find All The Injury Events

In [2]:
import pandas as pd
import os
import matplotlib.pyplot as plt

#--------------------------User Inputs---------------------------------

injury_events_file_path = '/Users/nya/NBA project/NBA Project/02/processed datasets/injury_movement_stats_merged.csv'

#-------------------------Load Files------------------------------------------

injury_events_df = pd.read_csv(injury_events_file_path)

#-------------------------Process Dataframe----------------------------------

# add column for total (regular and post season) games missed
injury_events_df['Tot_games_missed'] = injury_events_df['Reg_games_missed'] + injury_events_df['Post_games_missed']

# slice data set
# only identify players that averaged more than 10 minutes per game
injury_events_df = injury_events_df[injury_events_df['MPPG'] > 10.0]

# exclude those 'injuries' which are not relevant
injury_events_df = injury_events_df[~ injury_events_df['category'].isin(['healthy inactive','rest','sick','other','n/a'])]

#------------------------Make plots-------------------------------------------

# group by year, category, and sum total missed games -- unstack to plot
data = injury_events_df.groupby(['Year']).size()

# create plot
ax = data.plot(kind='bar', stacked=True, figsize=(15,10), color = ['dimgray', 'dimgray', 'dimgray', 'dimgray', 'dimgray', 'dimgray', 'dimgray', 'dimgray', 'dimgray', 'red'])

# Set the x-axis label
ax.set_xlabel("Year", fontsize = 16, weight='bold')

# Set the y-axis label
ax.set_ylabel("Count of Injury Events", fontsize =16,weight='bold')

# Set the x-axis tick labels
ax.set_xticklabels(data.index,rotation = 0, fontsize = 16)

#plot title
ax.set_title('Injury Events', fontsize=24, weight='bold')
#----------------------Save plot---------------------------------------------

save_directory = '/Users/nya/NBA project/NBA Project/03/visuals'
injury_events_figure = "injury_events_plot.png"

full_path = os.path.join(save_directory, injury_events_figure)

fig = ax.get_figure()
fig.savefig(full_path)

fig.clf();
plt.close(fig)

print(f"Plot saved to {full_path}")


Plot saved to /Users/nya/NBA project/NBA Project/03/visuals/injury_events_plot.png


# Code to Find Missed Games Due to Injury

In [5]:



import pandas as pd
import os
import matplotlib.pyplot as plt

#--------------------------User Inputs---------------------------------

injury_events_file_path = '/Users/nya/NBA project/NBA Project/02/processed datasets/injury_movement_stats_merged.csv'

#-------------------------Load Files------------------------------------------

injury_events_df = pd.read_csv(injury_events_file_path)

#-------------------------Process Dataframe----------------------------------

# add column for total (regular and post season) games missed
injury_events_df['Tot_games_missed'] = injury_events_df['Reg_games_missed'] + injury_events_df['Post_games_missed']

# slice data set
# only identify players that averaged more than 10 minutes per game
injury_events_df = injury_events_df[injury_events_df['MPPG'] > 10.0]

# exclude those 'injuries' which are not relevant
injury_events_df = injury_events_df[~ injury_events_df['category'].isin(['healthy inactive','rest','sick','other','n/a'])]

#------------------------Make plots-------------------------------------------

# group by year, category, and sum total missed games -- unstack to plot
data = injury_events_df.groupby(['Year'])['Tot_games_missed'].sum()

# create plot
ax = data.plot(kind='bar', stacked=True, figsize=(15,10), color = ['dimgray', 'dimgray', 'dimgray', 'dimgray', 'dimgray', 'dimgray', 'dimgray', 'dimgray', 'dimgray', 'red'])

# Set the x-axis label
ax.set_xlabel("Year", fontsize = 16, weight='bold')

# Set the y-axis label
ax.set_ylabel("Count of Games Missed Due to Injury", fontsize =16,weight='bold')

# Set the x-axis tick labels
ax.set_xticklabels(data.index,rotation = 0, fontsize = 16)

#plot title
ax.set_title('Games Missed Due to Injury', fontsize=24, weight='bold')
#----------------------Save plot---------------------------------------------

save_directory = '/Users/nya/NBA project/NBA Project/03/visuals'
injury_events_figure = "missed_games_plot.png"

full_path = os.path.join(save_directory, injury_events_figure)

fig = ax.get_figure()
fig.savefig(full_path)

fig.clf();
plt.close(fig)

print(f"Plot saved to {full_path}")

Plot saved to /Users/nya/NBA project/NBA Project/03/visuals/missed_games_plot.png


# Code to Find Missed Games Due to Injury For Specific Positions

In [2]:
import pandas as pd
import os
import matplotlib.pyplot as plt

#--------------------------User Inputs---------------------------------

injury_events_file_path = '/Users/nya/NBA project/NBA Project/02/processed datasets/injury_movement_stats_merged.csv'

#-------------------------Load Files------------------------------------------

injury_events_df = pd.read_csv(injury_events_file_path)

#-------------------------Process Dataframe----------------------------------

# Add column for total (regular and post season) games missed
injury_events_df['Tot_games_missed'] = injury_events_df['Reg_games_missed'] + injury_events_df['Post_games_missed']

# Slice data set to only include players that averaged more than 10 minutes per game
injury_events_df = injury_events_df[injury_events_df['MPPG'] > 10.0]

# Exclude irrelevant injury categories
injury_events_df = injury_events_df[~injury_events_df['category'].isin(['healthy inactive','rest','sick','other','n/a'])]

# Filter positions to include only PG, SG, SF, PF, C
valid_positions = ['PG', 'SG', 'SF', 'PF', 'C']
injury_events_df = injury_events_df[injury_events_df['Pos'].isin(valid_positions)]

# Group by position and year, then sum total missed games for each position
position_missed_games = injury_events_df.groupby(['Year', 'Pos'])['Tot_games_missed'].sum().unstack(fill_value=0)

#------------------------Make plots-------------------------------------------

# Create plot
ax = position_missed_games.plot(kind='bar', stacked=True, figsize=(15,10))

# Set the x-axis label
ax.set_xlabel("Year", fontsize=16, weight='bold')

# Set the y-axis label
ax.set_ylabel("Count of Games Missed Due to Injury", fontsize=16, weight='bold')

# Set the x-axis tick labels
ax.set_xticklabels(position_missed_games.index, rotation=0, fontsize=16)

# Plot title
ax.set_title('Games Missed Due to Injury by Position', fontsize=24, weight='bold')

#----------------------Save plot---------------------------------------------

save_directory = '/Users/nya/NBA project/NBA Project/03/visuals'
missed_games_position_figure = "missed_games_position_plot.png"

full_path = os.path.join(save_directory, missed_games_position_figure)

fig = ax.get_figure()
fig.savefig(full_path)

fig.clf()
plt.close(fig)

print(f"Plot saved to {full_path}")

Plot saved to /Users/nya/NBA project/NBA Project/03/visuals/missed_games_position_plot.png


# Code To Find Injuries At Specific Positions

In [2]:
import pandas as pd
import os
import matplotlib.pyplot as plt

#--------------------------User Inputs---------------------------------

injury_events_file_path = '/Users/nya/NBA project/NBA Project/02/processed datasets/injury_movement_stats_merged.csv'

#-------------------------Load Files------------------------------------------

injury_events_df = pd.read_csv(injury_events_file_path)

#-------------------------Process Dataframe----------------------------------

# add column for total (regular and post season) games missed
injury_events_df['Tot_games_missed'] = injury_events_df['Reg_games_missed'] + injury_events_df['Post_games_missed']

# slice data set
# only identify players that averaged more than 10 minutes per game
injury_events_df = injury_events_df[injury_events_df['MPPG'] > 10.0]

# exclude those 'injuries' which are not relevant
injury_events_df = injury_events_df[~ injury_events_df['category'].isin(['healthy inactive','rest','sick','other','n/a'])]

# preprocess 'Pos' column to extract the primary position
injury_events_df['Primary_Position'] = injury_events_df['Pos'].str.split('-').str[0]

# exclude positions other than PG, SG, SF, PF, C
valid_positions = ['PG', 'SG', 'SF', 'PF', 'C']
injury_events_df = injury_events_df[injury_events_df['Primary_Position'].isin(valid_positions)]

#------------------------Make plots-------------------------------------------

# group by year, category, and sum total missed games -- unstack to plot
data = injury_events_df.groupby(['Year', 'Primary_Position']).size().unstack()

# create plot
ax = data.plot(kind='bar', stacked=True, figsize=(15,10))

# Set the x-axis label
ax.set_xlabel("Year", fontsize = 16, weight='bold')

# Set the y-axis label
ax.set_ylabel("Count of Injury Events", fontsize =16,weight='bold')

# Set the x-axis tick labels
ax.set_xticklabels(data.index,rotation = 0, fontsize = 16)

#plot title
ax.set_title('Injury Events', fontsize=24, weight='bold')
#----------------------Save plot---------------------------------------------

save_directory = '/Users/nya/NBA project/NBA Project/03/visuals'
injury_events_figure = "injury_events_positions.png"

full_path = os.path.join(save_directory, injury_events_figure)

fig = ax.get_figure()
fig.savefig(full_path)

fig.clf();
plt.close(fig)

print(f"Plot saved to {full_path}")

Plot saved to /Users/nya/NBA project/NBA Project/03/visuals/injury_events_positions.png


# Code to Find The Specific Injuries That Occur At The Guard Position

In [12]:
import pandas as pd
import os
import matplotlib.pyplot as plt

#--------------------------User Inputs---------------------------------

injury_events_file_path = '/Users/nya/NBA project/NBA Project/02/processed datasets/injury_movement_stats_merged.csv'

#-------------------------Load Files------------------------------------------

injury_events_df = pd.read_csv(injury_events_file_path)

#-------------------------Process Dataframe----------------------------------

# Add column for total (regular and post season) games missed
injury_events_df['Tot_games_missed'] = injury_events_df['Reg_games_missed'] + injury_events_df['Post_games_missed']

# Slice data set to only include players that averaged more than 10 minutes per game
injury_events_df = injury_events_df[injury_events_df['MPPG'] > 10.0]

# Exclude irrelevant injury categories
injury_events_df = injury_events_df[~injury_events_df['category'].isin(['healthy inactive','rest','sick','other','n/a'])]

# Filter for injuries occurring at the guard position
pg_sg_injuries_df = injury_events_df[injury_events_df['Pos'].str.startswith(('PG', 'SG'))]

# Group by year and injury type, then count occurrences
pg_injury_counts = pg_injuries_df.groupby(['Year', 'category']).size().unstack(fill_value=0)

#------------------------Make plots-------------------------------------------

# Create plot
ax = pg_injury_counts.plot(kind='bar', stacked=True, figsize=(15, 10))

# Set the x-axis label
ax.set_xlabel("Year", fontsize=16, weight='bold')

# Set the y-axis label
ax.set_ylabel("Count of Injury Events", fontsize=16, weight='bold')

# Set the plot title
ax.set_title('Injury Events at the Guard Position by Year', fontsize=24, weight='bold')

#----------------------Save plot---------------------------------------------

save_directory = '/Users/nya/NBA project/NBA Project/03/visuals'
pg_injury_figure = "guard_injury_events_by_year_plot.png"

full_path = os.path.join(save_directory, pg_injury_figure)

fig = ax.get_figure()
fig.savefig(full_path)

plt.close(fig)

print(f"Plot saved to {full_path}")

Plot saved to /Users/nya/NBA project/NBA Project/03/visuals/guard_injury_events_by_year_plot.png


# Code to Find The Specific Injuries That Occur At The Forward Position

In [13]:
import pandas as pd
import os
import matplotlib.pyplot as plt

#--------------------------User Inputs---------------------------------

injury_events_file_path = '/Users/nya/NBA project/NBA Project/02/processed datasets/injury_movement_stats_merged.csv'

#-------------------------Load Files------------------------------------------

injury_events_df = pd.read_csv(injury_events_file_path)

#-------------------------Process Dataframe----------------------------------

# Add column for total (regular and post season) games missed
injury_events_df['Tot_games_missed'] = injury_events_df['Reg_games_missed'] + injury_events_df['Post_games_missed']

# Slice data set to only include players that averaged more than 10 minutes per game
injury_events_df = injury_events_df[injury_events_df['MPPG'] > 10.0]

# Exclude irrelevant injury categories
injury_events_df = injury_events_df[~injury_events_df['category'].isin(['healthy inactive','rest','sick','other','n/a'])]

# Filter for injuries occurring at the guard position
pg_sg_injuries_df = injury_events_df[injury_events_df['Pos'].str.startswith(('SF', 'PF'))]

# Group by year and injury type, then count occurrences
pg_injury_counts = pg_injuries_df.groupby(['Year', 'category']).size().unstack(fill_value=0)

#------------------------Make plots-------------------------------------------

# Create plot
ax = pg_injury_counts.plot(kind='bar', stacked=True, figsize=(15, 10))

# Set the x-axis label
ax.set_xlabel("Year", fontsize=16, weight='bold')

# Set the y-axis label
ax.set_ylabel("Count of Injury Events", fontsize=16, weight='bold')

# Set the plot title
ax.set_title('Injury Events at the Forward Position by Year', fontsize=24, weight='bold')

#----------------------Save plot---------------------------------------------

save_directory = '/Users/nya/NBA project/NBA Project/03/visuals'
pg_injury_figure = "forward_injury_events_by_year_plot.png"

full_path = os.path.join(save_directory, pg_injury_figure)

fig = ax.get_figure()
fig.savefig(full_path)

plt.close(fig)

print(f"Plot saved to {full_path}")

Plot saved to /Users/nya/NBA project/NBA Project/03/visuals/forward_injury_events_by_year_plot.png


# Code to Find The Specific Injuries That Occur At The Center Position

In [14]:
import pandas as pd
import os
import matplotlib.pyplot as plt

#--------------------------User Inputs---------------------------------

injury_events_file_path = '/Users/nya/NBA project/NBA Project/02/processed datasets/injury_movement_stats_merged.csv'

#-------------------------Load Files------------------------------------------

injury_events_df = pd.read_csv(injury_events_file_path)

#-------------------------Process Dataframe----------------------------------

# Add column for total (regular and post season) games missed
injury_events_df['Tot_games_missed'] = injury_events_df['Reg_games_missed'] + injury_events_df['Post_games_missed']

# Slice data set to only include players that averaged more than 10 minutes per game
injury_events_df = injury_events_df[injury_events_df['MPPG'] > 10.0]

# Exclude irrelevant injury categories
injury_events_df = injury_events_df[~injury_events_df['category'].isin(['healthy inactive','rest','sick','other','n/a'])]

# Filter for injuries occurring at the PG position
pg_injuries_df = injury_events_df[injury_events_df['Pos'].str.startswith('C')]

# Group by year and injury type, then count occurrences
pg_injury_counts = pg_injuries_df.groupby(['Year', 'category']).size().unstack(fill_value=0)

#------------------------Make plots-------------------------------------------

# Create plot
ax = pg_injury_counts.plot(kind='bar', stacked=True, figsize=(15, 10))

# Set the x-axis label
ax.set_xlabel("Year", fontsize=16, weight='bold')

# Set the y-axis label
ax.set_ylabel("Count of Injury Events", fontsize=16, weight='bold')

# Set the plot title
ax.set_title('Injury Events at the Center Position by Year', fontsize=24, weight='bold')

#----------------------Save plot---------------------------------------------

save_directory = '/Users/nya/NBA project/NBA Project/03/visuals'
pg_injury_figure = "center_injury_events_by_year_plot.png"

full_path = os.path.join(save_directory, pg_injury_figure)

fig = ax.get_figure()
fig.savefig(full_path)

plt.close(fig)

print(f"Plot saved to {full_path}")

Plot saved to /Users/nya/NBA project/NBA Project/03/visuals/center_injury_events_by_year_plot.png
