In [148]:
import pandas as pd
import numpy as np
import seaborn as sns
import plotly
from plotly.graph_objs import Figure, Histogram, Layout
from matplotlib import pyplot as plt
plt.style.use('ggplot')

In [31]:
nba = pd.read_csv('nba.csv', names = ['Player','Team','Total_games_played','minutes_per_game','offensive_rating','defensive_rating',\
                                     'overall_rating', 'win_contribution_index'])

In [16]:
nba.head()

Unnamed: 0,Player,Team,Total_games_played,minutes_per_game,offensive_rating,defensive_rating,overall_rating,win_contribution_index
0,"Chris Paul, PG",HOU,58,31.8,5.36,1.63,6.99,11.75
1,"Stephen Curry, PG",GS,51,32.0,6.59,0.06,6.65,10.97
2,"Nikola Jokic, C",DEN,75,32.5,4.02,1.95,5.97,14.03
3,"Anthony Davis, PF",NO,75,36.4,1.85,3.7,5.55,15.56
4,"Kyle Lowry, PG",TOR,78,32.2,4.15,1.03,5.18,13.59


# Which team has the most top 100 players? (best roster)

-We determine a player's value by his overall_rating

In [160]:
# First order the data set by overall_rating descending and groupy by the teams
top_players = nba.sort_values('overall_rating', ascending = False)
top_players = top_players.reindex(range(nba.shape[0]))
nba_team = top_players.loc[top_players.index < 100,].groupby('Team')
roster_stat = nba_team.overall_rating.count()
roster_stat = roster_stat.reset_index().sort_values('overall_rating', ascending = False)

In [161]:
roster_stat.columns = ['Team','Number of Top 100 Players']

In [168]:
roster_stat

Unnamed: 0,Team,Number of Top 100 Players
10,GS,9
27,TOR,8
11,HOU,7
28,UTAH,6
29,WSH,4
2,BOS,4
4,CLE,4
26,SA,4
25,POR,4
24,PHI,4


In [177]:
best_attackers = nba.sort_values('offensive_rating', ascending = False).reset_index()
best_attackers = best_attackers[best_attackers.index < 50]

In [178]:
best_attackers

Unnamed: 0,index,Player,Team,Total_games_played,minutes_per_game,offensive_rating,defensive_rating,overall_rating,win_contribution_index
0,20,"James Harden, PG",HOU,72,35.4,6.69,0.02,6.71,16.03
1,1,"Stephen Curry, PG",GS,51,32.0,6.59,0.06,6.65,10.97
2,25,"LeBron James, SF",CLE,82,36.9,5.64,-0.68,4.96,15.86
3,0,"Chris Paul, PG",HOU,58,31.8,5.36,1.63,6.99,11.75
4,26,"Damian Lillard, PG",POR,73,36.6,5.28,-0.38,4.9,13.67
5,10,"Kemba Walker, PG",CHA,80,34.2,4.36,-0.55,3.81,12.27
6,31,"Kevin Durant, SF",GS,68,34.2,4.26,-0.65,3.61,10.51
7,99,"Lou Williams, SG",LAC,79,32.8,4.25,-3.64,0.61,6.42
8,4,"Kyle Lowry, PG",TOR,78,32.2,4.15,1.03,5.18,13.59
9,2,"Nikola Jokic, C",DEN,75,32.5,4.02,1.95,5.97,14.03


In [119]:
best_defenders = nba.sort_values('defensive_rating', ascending = False).reset_index()
best_defenders = best_defenders[best_defenders.index < 50]

In [179]:
best_defenders

Unnamed: 0,index,Player,Team,Total_games_played,minutes_per_game,offensive_rating,defensive_rating,overall_rating,win_contribution_index
0,29,"Rudy Gobert, C",UTAH,56,32.4,-1.24,5.06,3.82,8.02
1,51,"Andre Roberson, SG",OKC,39,26.6,-2.57,4.34,1.77,3.16
2,23,"Robert Covington, SF",PHI,80,31.6,1.21,4.24,5.45,14.31
3,50,"Jusuf Nurkic, C",POR,79,26.4,-2.28,4.08,1.8,6.53
4,142,"Aron Baynes, C",BOS,81,18.3,-3.26,3.81,0.55,3.38
5,3,"Anthony Davis, PF",NO,75,36.4,1.85,3.7,5.55,15.56
6,43,"Ekpe Udoh, C",UTAH,63,12.9,-1.35,3.66,2.31,2.78
7,62,"David West, PF",GS,73,13.7,-1.25,3.63,2.38,3.51
8,71,"Dejounte Murray, PG",SA,81,21.5,-1.86,3.6,1.74,5.33
9,5,"Joel Embiid, C",PHI,63,30.3,1.51,3.59,5.1,10.13


In [122]:
best_both = set(best_attackers.Player).intersection(set(best_defenders.Player))

In [123]:
type(best_both)

set

In [124]:
list(best_both)

['Otto Porter Jr., SF',
 'Victor Oladipo, SG',
 'Anthony Davis, PF',
 'Joel Embiid, C',
 'Jimmy Butler, SG',
 'Tyus Jones, PG',
 'Jrue Holiday, PG']

In [132]:
nba[nba.Player.isin(list(best_both))]

Unnamed: 0,Player,Team,Total_games_played,minutes_per_game,offensive_rating,defensive_rating,overall_rating,win_contribution_index
3,"Anthony Davis, PF",NO,75,36.4,1.85,3.7,5.55,15.56
5,"Joel Embiid, C",PHI,63,30.3,1.51,3.59,5.1,10.13
6,"Otto Porter Jr., SF",WSH,77,31.6,2.91,2.05,4.96,12.51
7,"Tyus Jones, PG",MIN,82,17.9,2.38,2.4,4.78,7.27
21,"Jimmy Butler, SG",MIN,59,36.7,3.61,2.78,6.39,12.8
22,"Victor Oladipo, SG",IND,75,34.0,2.74,3.17,5.91,14.77
30,"Jrue Holiday, PG",NO,81,36.1,1.63,2.1,3.73,13.12


In [189]:
# Next we will find players who have the most discrepancy between their offense and defense
nba_diff = nba.assign(discrepancy = abs(nba.offensive_rating - nba.defensive_rating))
nba_diff = nba_diff.sort_values('discrepancy', ascending = False)
nba_diff = nba_diff.reset_index()
nba_diff.loc[nba_diff.index < 10]

Unnamed: 0,index,Player,Team,Total_games_played,minutes_per_game,offensive_rating,defensive_rating,overall_rating,win_contribution_index,discrepancy
0,99,"Lou Williams, SG",LAC,79,32.8,4.25,-3.64,0.61,6.42,7.89
1,142,"Aron Baynes, C",BOS,81,18.3,-3.26,3.81,0.55,3.38,7.07
2,51,"Andre Roberson, SG",OKC,39,26.6,-2.57,4.34,1.77,3.16,6.91
3,20,"James Harden, PG",HOU,72,35.4,6.69,0.02,6.71,16.03,6.67
4,144,"Hassan Whiteside, C",MIA,54,25.3,-3.04,3.53,0.49,3.18,6.57
5,1,"Stephen Curry, PG",GS,51,32.0,6.59,0.06,6.65,10.97,6.53
6,50,"Jusuf Nurkic, C",POR,79,26.4,-2.28,4.08,1.8,6.53,6.36
7,25,"LeBron James, SF",CLE,82,36.9,5.64,-0.68,4.96,15.86,6.32
8,29,"Rudy Gobert, C",UTAH,56,32.4,-1.24,5.06,3.82,8.02,6.3
9,134,"Eric Moreland, C",DET,67,12.0,-2.96,3.21,0.25,1.68,6.17


In [197]:
nba[['Name','Position']] = nba['Player'].str.split(',', expand=True)
nba

Unnamed: 0,Player,Team,Total_games_played,minutes_per_game,offensive_rating,defensive_rating,overall_rating,win_contribution_index,Name,Position
0,"Chris Paul, PG",HOU,58,31.8,5.36,1.63,6.99,11.75,Chris Paul,PG
1,"Stephen Curry, PG",GS,51,32.0,6.59,0.06,6.65,10.97,Stephen Curry,PG
2,"Nikola Jokic, C",DEN,75,32.5,4.02,1.95,5.97,14.03,Nikola Jokic,C
3,"Anthony Davis, PF",NO,75,36.4,1.85,3.70,5.55,15.56,Anthony Davis,PF
4,"Kyle Lowry, PG",TOR,78,32.2,4.15,1.03,5.18,13.59,Kyle Lowry,PG
5,"Joel Embiid, C",PHI,63,30.3,1.51,3.59,5.10,10.13,Joel Embiid,C
6,"Otto Porter Jr., SF",WSH,77,31.6,2.91,2.05,4.96,12.51,Otto Porter Jr.,SF
7,"Tyus Jones, PG",MIN,82,17.9,2.38,2.40,4.78,7.27,Tyus Jones,PG
8,"Giannis Antetokounmpo, PF",MIL,75,36.7,2.63,1.60,4.23,12.90,Giannis Antetokounmpo,PF
9,"Al Horford, PF",BOS,72,31.6,1.21,2.68,3.89,10.05,Al Horford,PF


In [222]:
nba.Position = nba.Position.str.strip()

In [225]:
nba_pg = nba.loc[nba.Position == 'PG']

In [233]:
nba_pg = nba_pg.sort_values('win_contribution_index', ascending = False)
nba_pg = nba_pg.reset_index()
nba_top_pg = nba_pg.loc[nba_pg.index < 20]
nba_top_pg

Unnamed: 0,level_0,index,Player,Team,Total_games_played,minutes_per_game,offensive_rating,defensive_rating,overall_rating,win_contribution_index,Name,Position
0,0,20,"James Harden, PG",HOU,72,35.4,6.69,0.02,6.71,16.03,James Harden,PG
1,1,24,"Russell Westbrook, PG",OKC,80,36.4,3.96,1.2,5.16,15.73,Russell Westbrook,PG
2,2,26,"Damian Lillard, PG",POR,73,36.6,5.28,-0.38,4.9,13.67,Damian Lillard,PG
3,3,4,"Kyle Lowry, PG",TOR,78,32.2,4.15,1.03,5.18,13.59,Kyle Lowry,PG
4,4,30,"Jrue Holiday, PG",NO,81,36.1,1.63,2.1,3.73,13.12,Jrue Holiday,PG
5,5,10,"Kemba Walker, PG",CHA,80,34.2,4.36,-0.55,3.81,12.27,Kemba Walker,PG
6,6,0,"Chris Paul, PG",HOU,58,31.8,5.36,1.63,6.99,11.75,Chris Paul,PG
7,7,1,"Stephen Curry, PG",GS,51,32.0,6.59,0.06,6.65,10.97,Stephen Curry,PG
8,8,17,"Ben Simmons, PG",PHI,81,33.7,1.44,1.45,2.89,10.89,Ben Simmons,PG
9,9,40,"Eric Bledsoe, PG",MIL/PHX,74,31.4,2.49,0.06,2.55,8.45,Eric Bledsoe,PG


In [236]:
from plotly.graph_objs import Scatter


data = [Scatter(x=nba_top_pg['offensive_rating'], y=nba_top_pg['defensive_rating'], mode = 'markers')]#, text=df['movie_title'])]
layout = Layout(title="Offense vs Defense for Top 20 NBA PGs")

fig = Figure(data=data, layout=layout)

plotly.offline.iplot(fig, show_link=False)