Import Libraries and choose style(s) for our future plots.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pylab as plt
import seaborn as sns
plt.style.use('ggplot')

Step 0: Import and Read Data

In [None]:
df_playoffs = pd.read_csv(r'C:\Users\Kerin B\Desktop\Datasets\nba_playoffs_2023.csv', delimiter=',')

Step 1: Understanding Our Data
- Dataframe shape
- head and tail of data
- dtypes

In [None]:
df_playoffs.shape

In [None]:
df_playoffs.head()

In [None]:
df_playoffs.tail(5)

In [None]:
df_playoffs.columns

In [None]:
df_playoffs.dtypes

Step 2: Data Preparation and Cleaning
- checking for and removing duplicates
- removing unneeded/unnecessary columns

In [None]:
counts = df_playoffs['Player'].value_counts()
players_to_drop = counts[counts > 1].index
total = 'TOT'
(players_to_drop)

In [None]:
filtered_df = df_playoffs[~(df_playoffs['Player'].isin(players_to_drop) & (df_playoffs['Tm'] != total))]
filtered_df.shape

In [None]:
number_of_players = filtered_df['Player'].value_counts().sum()
unique_players = df_playoffs['Player'].nunique()
print('Number of Entries:', number_of_players, '-- Number of Unique Players:', unique_players)

In [None]:
duplicates_mask = filtered_df['Player'].duplicated()
duplicates_df = filtered_df[duplicates_mask]
print("Duplicate Rows:")
(duplicates_df)

In [None]:
filtered_df_new = filtered_df.drop(['Rk', 'Age', 'Tm', 'GS'], axis=1)

In [None]:
filtered_df_new.set_index('Player', inplace = True)

Step 3: Creating New Columns
- MVP Eligibility based on Games Played and Average Minutes Played
- Drop Rows of Players who are not MVP Eligible

In [None]:
criteria_games = 14
criteria_minutes = 20
filtered_df_new['Playoff MVP Eligibility'] = np.where((filtered_df_new['G'] >= criteria_games) & (filtered_df_new['MP'] >= criteria_minutes), 'Yes', 'No')

In [None]:
df_playoff_new = filtered_df_new[filtered_df_new['Playoff MVP Eligibility'] != 'No']

In [None]:
df_playoff_new['Tot_OFF'] = df_playoff_new['PTS'] + df_playoff_new['AST'] + df_playoff_new['ORB']
df_playoff_new['Tot_DEF'] = df_playoff_new['STL'] + df_playoff_new['BLK'] + df_playoff_new['DRB']

In [None]:
df_playoff_new.head(5)

Step 4: Feature Relationships; Data Exploration

In [None]:
position_counts = df_playoff_new['Pos'].value_counts()

plt.figure(figsize = (8,6))
plt.bar(position_counts.index, position_counts.values, color = 'blue')

plt.xlabel('Position')
plt.ylabel('Number of Playoff MVP Eligible Players')
plt.title('Distribution of MVP Eligible Player Positions')
plt.show()


In [None]:
df_playoff_top = df_playoff_new.sort_values(by = 'PTS', ascending=False).head(10)
df_playoff_top['PTS'].plot(kind='bar', title='Top 10 Scorers (Players)', xlabel = 'Player', ylabel = 'Points Per Game')

In [None]:
pointsfg = ['PTS','FG', 'FGA', 'FG%', 'eFG%']

ax = df_playoff_top.plot(kind = 'scatter', title = 'Field Goal Efficiency of Top Scorers (Players)', x = 'FGA', y = 'FG')
for i, row in df_playoff_top.iterrows():
    ax.annotate(row.name, (row['FGA'], row['FG']), textcoords="offset points", xytext=(5, -10), ha='center')
    ax.annotate(row['FG%'], (row['FGA'], row['FG']), textcoords="offset points", xytext=(5, -20), ha='center', color = 'red')
    ax.annotate(row['eFG%'], (row['FGA'], row['FG']), textcoords="offset points", xytext=(5, -30), ha='center', color = 'blue')

ax.scatter([], [], marker='o', label='eFG%', color='blue', alpha=1)
ax.scatter([], [], marker='o', label='FG%', color='red', alpha=1)

ax.legend()

plt.show()
df_playoff_top[pointsfg]


In [None]:
points2 = ['PTS','2P', '2PA', '2P%']

ax = df_playoff_top.plot(kind = 'scatter', title = '2 Pointers of Top Scorers (Players)', x = '2PA', y = '2P')
for i, row in df_playoff_top.iterrows():
    ax.annotate(row.name, (row['2PA'], row['2P']), textcoords="offset points", xytext=(5, -10), ha='center')
    ax.annotate(row['2P%'], (row['2PA'], row['2P']), textcoords="offset points", xytext=(5, -20), ha='center', color = 'red')

ax.scatter([], [], marker='o', label='2P%', color='red', alpha=1)

ax.legend()

plt.show()
df_playoff_top[points2]


In [None]:
points3 = ['PTS','3P', '3PA', '3P%']

ax = df_playoff_top.plot(kind = 'scatter', title = '3 Pointers of Top Scorers (Players)', x = '3PA', y = '3P')
for i, row in df_playoff_top.iterrows():
    ax.annotate(row.name, (row['3PA'], row['3P']), textcoords="offset points", xytext=(5, -10), ha='center')
    ax.annotate(row['3P%'], (row['3PA'], row['3P']), textcoords="offset points", xytext=(5, -20), ha='center', color = 'red')

ax.scatter([], [], marker='o', label='3P%', color='red', alpha=1)

ax.legend()

plt.show()
df_playoff_top[points3]


In [None]:
points_STL_BLK_PF = ['PTS', 'STL', 'BLK', 'PF']

ind = np.arange(len(df_playoff_top))
width = 0.3
plt.figure(figsize=(20, 6))
plt.bar(ind - width, df_playoff_top['STL'], width, label='STL', color='blue')
plt.bar(ind - width, df_playoff_top['BLK'], width, label='BLK', color='red', bottom=df_playoff_top['STL'])
plt.bar(ind, df_playoff_top['PF'], width, label='PF', color='green')

plt.xlabel('Player')
plt.ylabel('Values (Steals, Blocks, Fouls)')
plt.title('Steals and Blocks vs Personal Fouls of Top Players')
plt.xticks(ind, df_playoff_top.index)  # Set x-axis labels
plt.legend()
plt.show()

df_playoff_top[points_STL_BLK_PF]

In [None]:
points_tov_ast = ['PTS', 'AST', 'TOV']

ax = df_playoff_top.plot(kind = 'scatter', title = 'Assists vs Turnovers of Top Players', x = 'TOV', y = 'AST')
for i, row in df_playoff_top.iterrows():
    ax.annotate(row.name, (row['TOV'], row['AST']), textcoords="offset points", xytext=(5, -10), ha='center')

plt.show()
df_playoff_top[points_tov_ast]


In [None]:
rebounds = ['ORB', 'DRB', 'TRB']
df_playoff_top[rebounds]

ind = np.arange(len(df_playoff_top))
width = 0.3
plt.figure(figsize=(20, 6))
plt.bar(ind - width, df_playoff_top['ORB'], width, label='ORB', color='blue')
plt.bar(ind - width, df_playoff_top['DRB'], width, label='DRB', color='red', bottom=df_playoff_top['ORB'])

plt.xlabel('Player')
plt.ylabel('Number of Rebounds')
plt.title('Offensive and Defensive Rebounds of Top Players')
plt.xticks(ind, df_playoff_top.index)  # Set x-axis labels
plt.legend()
plt.show()

In [None]:
off_def_stats = ['Tot_OFF', 'Tot_DEF']

ax = df_playoff_top.plot(kind = 'scatter', title = 'Total Offensive and Defensive Stats of Top Players', x = 'Tot_DEF', y = 'Tot_OFF')
for i, row in df_playoff_top.iterrows():
    ax.annotate(row.name, (row['Tot_DEF'], row['Tot_OFF']), textcoords="offset points", xytext=(5, -10), ha='center')

plt.show()
df_playoff_top[off_def_stats]

Step 5: Saving the Data as a File

In [None]:
df_playoff_top.to_csv('nba_playoff_stats_top.csv', index = False)

In [None]:
df_playoff_new.to_csv('nba_playoff_stats_playoffmvp_eligible', index = False)