In [7]:
import pandas as pd
import plotly.express as px
from google.colab import files
import plotly.graph_objects as go




In [None]:
uploaded = files.upload()

In [2]:
df = pd.read_csv('nba_drafts_data.csv')
df

Unnamed: 0,Pk,Tm,Player,College,Yrs,G,MP,PTS,TRB,AST,...,FT%,MPperG,PTSperG,RBperG,ASTperG,WS,WS/48,BPM,VORP,Year
0,3,PHW,Paul Arizin,Villanova,10.0,713.0,24897.0,16266.0,6129.0,1665.0,...,0.810,38.4,22.8,8.6,2.3,108.8,0.183,,,1950
1,1,BOS,Chuck Share,Bowling Green,9.0,596.0,13023.0,4928.0,4986.0,809.0,...,0.693,21.9,8.3,8.4,1.4,42.0,0.155,,,1950
2,2,BLB,Don Rehfeldt,Wisconsin,2.0,98.0,788.0,692.0,494.0,118.0,...,0.758,20.2,7.1,5.0,1.2,3.5,0.048,,,1950
3,4,TRI,Bob Cousy,Holy Cross,14.0,924.0,30165.0,16960.0,4786.0,6955.0,...,0.803,35.3,18.4,5.2,7.5,91.1,0.139,,,1950
4,5,WSC,Dick Schnittker,Ohio State,6.0,364.0,6744.0,3030.0,1372.0,479.0,...,0.825,20.1,8.3,3.8,1.3,22.0,0.132,,,1950
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6975,56,TOR,Edin Bavčić,,,,0.0,,,,...,,,,,,,,,,2006
6976,57,MIN,Loukas Mavrokefalidis,,,,0.0,,,,...,,,,,,,,,,2006
6977,58,DAL,J.R. Pinnock,George Washington,,,0.0,,,,...,,,,,,,,,,2006
6978,59,SAS,Damir Markota,,1.0,30.0,170.0,51.0,31.0,6.0,...,0.636,5.7,1.7,1.0,0.2,-0.2,-0.070,-8.5,-0.3,2006


In [3]:
non_players = (df['MP'] == 0.0).sum()
total_players = len(df)

In [4]:
data = {
    'Category': ['Played in NBA', 'Did Not Play in NBA'],
    'Count': [total_players - non_players, non_players]
}

df_plot = pd.DataFrame(data)


**Let's take a look how many of drafted players actually played at least 1 minute in the NBA**

In [5]:
fig = px.bar(df_plot, x='Category', y='Count', title='Number of NBA Drafted Players Who Played vs. Who Did Not Play (drafts 1950-2006)',
             labels={'Count':'Number of Players', 'Category':'NBA Playing Status'})

fig.show()

**And now let's take a look at the number of draft picks in every NBA draft**

In [6]:
draft_counts = df['Year'].value_counts().sort_index()

data = {
    'Year': draft_counts.index,
    'Number of Drafted Players': draft_counts.values
}

df_plot = pd.DataFrame(data)


fig = px.bar(df_plot, x='Year', y='Number of Drafted Players', title='Number of Players Drafted by Year (1950-2006)',
             labels={'Year': 'Draft Year', 'Number of Drafted Players': 'Number of Players'})

fig.show()

**Let's add the number of players who actually played in the NBA on the plot**

In [11]:
import plotly.graph_objects as go

draft_counts = df['Year'].value_counts().sort_index()

# played at least one game
played_counts = df[df['G'] > 0]['Year'].value_counts().sort_index()

played_counts = played_counts.reindex(draft_counts.index, fill_value=0)


fig = go.Figure()
fig.add_trace(go.Bar(
    x=draft_counts.index,
    y=draft_counts.values,
    name='Drafted Players',
    marker_color='blue'
))

fig.add_trace(go.Bar(
    x=played_counts.index,
    y=played_counts.values,
    name='Players Played in NBA',
    marker_color='green'
))

fig.update_layout(
    title='Number of Players Drafted vs Played in NBA by Year (1950-2006)',
    xaxis_title='Draft Year',
    yaxis_title='Number of Players',
    barmode='overlay',  # Наложение столбцов для сравнения
    bargap=0.15  # Расстояние между столбцами одной группы
)

fig.show()


**Now let's find out what was the lowest draft pick to ever play in the NBA**

In [22]:
df['Pk'] = pd.to_numeric(df['Pk'], errors='coerce')
played_in_nba = df[df['G'] > 0]
lowest_draft_pick = played_in_nba['Pk'].max()
int(lowest_draft_pick)

224

In [24]:
player_info = played_in_nba[played_in_nba['Pk'] == lowest_draft_pick]
player_info

Unnamed: 0,Pk,Tm,Player,College,Yrs,G,MP,PTS,TRB,AST,...,FT%,MPperG,PTSperG,RBperG,ASTperG,WS,WS/48,BPM,VORP,Year
2517,224.0,DET,Harvey Marlatt,Eastern Michigan,3.0,61.0,746.0,225.0,86.0,94.0,...,0.85,12.2,3.7,1.4,1.5,-0.1,-0.007,,,1970


**What about best scorers who was lower than 30 pick in the NBA drafts**

In [38]:
players_above_30_pick = df[df['Pk'] > 30]

top_scorers = players_above_30_pick.sort_values(by='PTS', ascending=False).head(20)

fig = px.scatter(top_scorers, x='Pk', y='Year', size='PTS', color='PTS',
                 hover_name='Player', size_max=60,
                 title='Top 20 Scorers (10k+ PTS) Drafted Below 30th Pick',
                 labels={'Pk': 'Draft Pick Number', 'Year': 'Draft Year', 'PTS': 'Points Scored'})

fig.update_layout(xaxis_title="Draft Pick Number", yaxis_title="Draft Year")

fig.show()

**Now let's look at the scorers distrubution (number of 10k+ point total for each draft pick)**

In [30]:
high_scorers = df[df['PTS'] > 10000]

scorers_by_pick = high_scorers['Pk'].value_counts().sort_index()


data = {
    'Draft Pick': scorers_by_pick.index,
    'Number of Players': scorers_by_pick.values
}
df_plot = pd.DataFrame(data)

fig = px.bar(df_plot, x='Draft Pick', y='Number of Players', title='Players Scoring Over 10,000 Points by Draft Pick',
             labels={'Draft Pick': 'Draft Pick Number', 'Number of Players': 'Number of High Scorers'})

fig.show()

**Now let's take a look at the colleges**

In [36]:
college_counts = df['College'].value_counts()

filtered_colleges = college_counts[college_counts > 50]

high_scorers = df[df['PTS'] > 10000]['College'].value_counts()

data = {
    'College': filtered_colleges.index,
    'Number of Drafted Players': filtered_colleges.values,
    'High Scorers': [high_scorers[college] if college in high_scorers else 0 for college in filtered_colleges.index]
}
df_plot = pd.DataFrame(data)

fig = px.bar(df_plot, x='College', y='Number of Drafted Players',
             color='High Scorers',
             title='Universities with the Most NBA Drafted Players (More than 50 players)',
             labels={'College': 'University', 'Number of Drafted Players': 'Number of Players', 'High Scorers': 'Number of High Scorers (10k+ PTS)'})

fig.show()

**What teams had more high picks in the draft history?**

In [37]:
first_picks = df[df['Pk'] == 1]['Tm'].value_counts()
second_picks = df[df['Pk'] == 2]['Tm'].value_counts()
third_picks = df[df['Pk'] == 3]['Tm'].value_counts()

fig = go.Figure(data=[
    go.Bar(name='1st Picks', x=first_picks.index, y=first_picks.values),
    go.Bar(name='2nd Picks', x=second_picks.index, y=second_picks.values),
    go.Bar(name='3rd Picks', x=third_picks.index, y=third_picks.values)
])

fig.update_layout(barmode='group', title='NBA Teams with the Most Top 3 Draft Picks',
                  xaxis_title='Teams', yaxis_title='Number of Picks')

fig.show()