In [1]:
import altair as alt
import pandas as pd
from altair import datum

In [2]:
#Load in data
url = 'https://github.com/asmdrk/eda/blob/main/NBA.csv?raw=true'
nba = pd.read_csv(url).drop_duplicates()
nba.head()

FileNotFoundError: [Errno 2] No such file or directory: 'data/NBA.csv'

In [None]:
include = ["name", "positions", "age", "team", "made_three_point_field_goals", "attempted_three_point_field_goals", 
           "made_free_throws", "attempted_free_throws", "offensive_rebounds", "defensive_rebounds", "assists", "steals",
           "blocks", "turnovers", "personal_fouls", "points", "Year", "FG%"]

nba = nba[include]

In [None]:
nba.info()

In [None]:
nba.describe()

In [None]:
alt.data_transformers.disable_max_rows()

box_plot = alt.Chart(nba).mark_boxplot().encode(
    alt.X('positions:N', title='Position'),
    alt.Y('age:Q', title='Age'),
    alt.Tooltip(['name:N', 'age:Q'])
).properties(
    title='Distribution of age of NBA players by position',
    width=400,
    height=300
)

# display the chart
box_plot


In [None]:
point_guards = nba[nba['positions'] == 'POINT GUARD']

# calculate defensive rebound percentage
point_guards['Total_rebound'] = (point_guards['defensive_rebounds'] + point_guards['offensive_rebounds'])

# select the top 10 point guards with the highest defensive rebound percentage
top_10_rebounders = point_guards.sort_values('Total_rebound', ascending=False).head(10)

# create the Altair chart
alt.Chart(top_10_rebounders).mark_bar().encode(
    x=alt.X('Total_rebound:Q', title=' Total Rebounds'),
    y=alt.Y('name:N', sort='x', title='Player'),
    tooltip=['name','Total_rebound']
).properties(
    title='Top 10 point guards with the highest defensive rebound percentage'
)

shooting_guards = nba[nba['positions'] == 'POINT GUARD']

# calculate defensive rebound percentage
point_guards['Total_rebound'] = (point_guards['defensive_rebounds'] + point_guards['offensive_rebounds'])

# select the top 10 point guards with the highest defensive rebound percentage
top_10_rebounders = point_guards.sort_values('Total_rebound', ascending=False).head(10)

# create the Altair chart
alt.Chart(top_10_rebounders).mark_bar().encode(
    x=alt.X('Total_rebound:Q', title=' Total Rebounds'),
    y=alt.Y('name:N', sort='x', title='Player'),
    tooltip=['name','Total_rebound']
).properties(
    title='Top 10 point guards with the highest defensive rebound percentage'
)

In [None]:
pos = nba['positions'].unique()

In [None]:
# calculate the block-foul ratio
df['block_foul_ratio'] = df['blocks'] / df['personal_fouls']

# create scatter plot
scatter = alt.Chart(df).mark_point(opacity = 0.5, size = 5).encode(
    x=alt.X('block_foul_ratio:Q', axis=alt.Axis(title='Block-Foul Ratio')),
    y=alt.Y('blocks:Q', axis=alt.Axis(title='Blocks Without Fouling')),
    color=alt.Color(alt.repeat('row'), legend=alt.Legend(title='Positions')),
    tooltip=['name', 'team', 'positions', 'block_foul_ratio', 'blocks']
).properties(
    title='Players Good at Getting Blocks Without Fouling',
    width=700,
    height=700
).repeat(
    row = pos,
)

scatter


In [None]:
teamwise_3pa = df.groupby(['team', 'Year'])['attempted_three_point_field_goals'].sum().reset_index()

# Create an Altair chart
chart = alt.Chart(teamwise_3pa).mark_line().encode(
    x='Year:O',
    y='attempted_three_point_field_goals:Q',
    color='team:N'
).properties(
    title='Teamwise 3PA trend',
    width=700,
    height=700
)

chart

In [None]:
df = nba[['name', 'made_three_point_field_goals', 'Year']]

# Group the data by year and sort by made_three_point_field_goals
df_top = df.groupby('Year').apply(lambda x: x.nlargest(10, 'made_three_point_field_goals')).reset_index(drop=True)

# Create the Altair chart
chart = alt.Chart(df_top).mark_bar().encode(
    x=alt.X('Year:O', title='Season'),
    y=alt.Y('made_three_point_field_goals:Q', title='Made 3PM'),
    color=alt.Color('name:N'),
    tooltip=['name', 'made_three_point_field_goals']
).properties(
    title='Top 10 players with the Most Made 3PM per Season',
    width=700,
    height=700
)

chart