## Import data#

At the beginning the data with players' stats is imported

In [1]:
import pandas as pd
import numpy as np
import re

df = pd.read_csv('stats_2019.csv')
# rooks = pd.read_csv('rookies.csv')

df.head(5)
# df.get_dtype_counts()

Unnamed: 0,Rk,Player,Pos,Age,Tm,G,GS,MP,FG,FGA,...,FT%,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS
0,1,Alex Abrines,SG,25,OKC,31,2,19.0,1.8,5.1,...,0.923,0.2,1.4,1.5,0.6,0.5,0.2,0.5,1.7,5.3
1,2,Quincy Acy,PF,28,PHO,10,0,12.3,0.4,1.8,...,0.7,0.3,2.2,2.5,0.8,0.1,0.4,0.4,2.4,1.7
2,3,Jaylen Adams,PG,22,ATL,34,1,12.6,1.1,3.2,...,0.778,0.3,1.4,1.8,1.9,0.4,0.1,0.8,1.3,3.2
3,4,Steven Adams,C,25,OKC,80,80,33.4,6.0,10.1,...,0.5,4.9,4.6,9.5,1.6,1.5,1.0,1.7,2.6,13.9
4,5,Bam Adebayo,C,21,MIA,82,28,23.3,3.4,5.9,...,0.735,2.0,5.3,7.3,2.2,0.9,0.8,1.5,2.5,8.9


## Cleaning and converting the data

Every row that is only a reminder of columns' names needs to be deleted
Then every column not named Rk, Player, Pos or Tm is converted to numeric

In [2]:
rows = df.loc[df['Player'] == 'Player']
df = df.drop(df.loc[df['Player'] == 'Player'].index)
# df.drop(rows.index)
df.groupby(df['Player']).count().sort_values(['Rk'], ascending=False)

cols = df.columns.values[5:]
df[cols] = df[cols].apply(pd.to_numeric, errors='coerce')
df['Age'] = pd.to_numeric(df['Age'])
df.dtypes
# df.get_dtype_counts()

Rk         object
Player     object
Pos        object
Age         int64
Tm         object
G           int64
GS          int64
MP        float64
FG        float64
FGA       float64
FG%       float64
3P        float64
3PA       float64
3P%       float64
2P        float64
2PA       float64
2P%       float64
eFG%      float64
FT        float64
FTA       float64
FT%       float64
ORB       float64
DRB       float64
TRB       float64
AST       float64
STL       float64
BLK       float64
TOV       float64
PF        float64
PTS       float64
dtype: object

## Sorting values

In [3]:
scorers = df.sort_values(['PTS'], ascending=False)
scorers.head(20)

Unnamed: 0,Rk,Player,Pos,Age,Tm,G,GS,MP,FG,FGA,...,FT%,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS
277,206,James Harden,PG,29,HOU,78,78,36.8,10.8,24.5,...,0.879,0.8,5.8,6.6,7.5,2.0,0.7,5.0,3.1,36.1
249,183,Paul George,SF,28,OKC,77,77,36.9,9.2,21.0,...,0.839,1.4,6.8,8.2,4.1,2.2,0.4,2.7,2.8,28.0
19,18,Giannis Antetokounmpo,PF,24,MIL,72,72,32.8,10.0,17.3,...,0.729,2.2,10.3,12.5,5.9,1.3,1.5,3.7,3.2,27.7
207,155,Joel Embiid,C,24,PHI,64,64,33.7,9.1,18.7,...,0.804,2.5,11.1,13.6,3.7,0.7,1.9,3.5,3.3,27.5
344,258,LeBron James,SF,34,LAL,55,55,35.2,10.1,19.9,...,0.665,1.0,7.4,8.5,8.3,1.3,0.6,3.6,1.7,27.4
165,124,Stephen Curry,PG,30,GSW,69,69,33.8,9.2,19.4,...,0.916,0.7,4.7,5.3,5.2,1.3,0.4,2.8,2.4,27.3
71,61,Devin Booker,SG,22,PHO,64,64,35.0,9.2,19.6,...,0.866,0.6,3.5,4.1,6.8,0.9,0.2,4.1,3.1,26.6
412,301,Kawhi Leonard,SF,27,TOR,60,60,34.0,9.3,18.8,...,0.854,1.3,6.0,7.3,3.3,1.8,0.4,2.0,1.5,26.6
198,150,Kevin Durant,PF,30,GSW,78,78,34.6,9.2,17.7,...,0.885,0.4,5.9,6.4,5.9,0.7,1.1,2.9,2.0,26.0
167,126,Anthony Davis,C,25,NOP,56,56,33.0,9.5,18.3,...,0.794,3.1,8.9,12.0,3.9,1.6,2.4,2.0,2.4,25.9


## Adding totals

In [4]:
df['Totals'] = df[['PTS', 'TRB', 'AST', 'BLK', 'STL']].sum(axis=1)
df['Totals'] = df['Totals'] - df['TOV']
totals = df.sort_values(['Totals'], ascending=False)

totals.head(20)

Unnamed: 0,Rk,Player,Pos,Age,Tm,G,GS,MP,FG,FGA,...,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS,Totals
277,206,James Harden,PG,29,HOU,78,78,36.8,10.8,24.5,...,0.8,5.8,6.6,7.5,2.0,0.7,5.0,3.1,36.1,47.9
19,18,Giannis Antetokounmpo,PF,24,MIL,72,72,32.8,10.0,17.3,...,2.2,10.3,12.5,5.9,1.3,1.5,3.7,3.2,27.7,45.2
207,155,Joel Embiid,C,24,PHI,64,64,33.7,9.1,18.7,...,2.5,11.1,13.6,3.7,0.7,1.9,3.5,3.3,27.5,43.9
167,126,Anthony Davis,C,25,NOP,56,56,33.0,9.5,18.3,...,3.1,8.9,12.0,3.9,1.6,2.4,2.0,2.4,25.9,43.8
700,506,Russell Westbrook,PG,30,OKC,73,73,36.0,8.6,20.2,...,1.5,9.6,11.1,10.7,1.9,0.5,4.5,3.4,22.9,42.6
344,258,LeBron James,SF,34,LAL,55,55,35.2,10.1,19.9,...,1.0,7.4,8.5,8.3,1.3,0.6,3.6,1.7,27.4,42.5
249,183,Paul George,SF,28,OKC,77,77,36.9,9.2,21.0,...,1.4,6.8,8.2,4.1,2.2,0.4,2.7,2.8,28.0,40.2
670,480,Karl-Anthony Towns,C,23,MIN,77,77,33.1,8.8,17.1,...,3.4,9.0,12.4,3.4,0.9,1.6,3.1,3.8,24.4,39.6
412,301,Kawhi Leonard,SF,27,TOR,60,60,34.0,9.3,18.8,...,1.3,6.0,7.3,3.3,1.8,0.4,2.0,1.5,26.6,37.4
366,269,Nikola Jokic,C,23,DEN,80,80,31.3,7.7,15.1,...,2.9,8.0,10.8,7.3,1.4,0.7,3.1,2.9,20.1,37.2


## More filtering and looking for interesting stats

In [5]:
# Top Bricks

cols = ['Player', 'Tm','G', 'PTS','FG', 'FGA', 'FG%', '3P', '3PA', '3P%']
temp = df.loc[(df['PTS'] > 10) & (df['3PA'] > 2) & (df['3P%'] < 0.30)]
sort_temp = temp.sort_values(['3PA'], ascending=False)
sort_temp = sort_temp.reset_index(drop=True)
sort_temp.index = np.arange(1, len(sort_temp)+1)
sort_temp[cols]

Unnamed: 0,Player,Tm,G,PTS,FG,FGA,FG%,3P,3PA,3P%
1,Russell Westbrook,OKC,73,22.9,8.6,20.2,0.428,1.6,5.6,0.29
2,Dennis Smith,NYK,21,14.7,5.8,14.0,0.413,1.1,4.0,0.289
3,J.J. Barea,DAL,38,10.9,4.2,10.1,0.418,1.0,3.4,0.297
4,DeMarcus Cousins,GSW,30,16.3,5.9,12.4,0.48,0.9,3.2,0.274
5,Jabari Parker,WAS,25,15.0,6.0,11.4,0.523,1.0,3.2,0.296
6,Delon Wright,MEM,26,12.2,4.4,10.2,0.434,0.8,3.0,0.256
7,Giannis Antetokounmpo,MIL,72,27.7,10.0,17.3,0.578,0.7,2.8,0.256
8,Christian Wood,NOP,8,16.9,6.1,11.5,0.533,0.8,2.6,0.286
9,Serge Ibaka,TOR,74,15.0,6.3,11.9,0.529,0.7,2.3,0.29


In [6]:
cols = ['Player', 'Tm','PTS','FG', 'FGA', 'FG%', '2P', '2PA', '2P%', '3P', '3PA', '3P%']
temp = df.loc[(df['PTS'] > 10) & (df['3PA'] > 2) & (df['3P%'] > 0.44)]
sort_temp = temp.sort_values(['3P%'], ascending=False)
sort_temp = sort_temp.reset_index(drop=True)
sort_temp.index = np.arange(1, len(sort_temp)+1)
sort_temp[cols]
# new_df = df.loc[df['Player'] == 'Landry Shamet']
# new_df[cols]

Unnamed: 0,Player,Tm,PTS,FG,FGA,FG%,2P,2PA,2P%,3P,3PA,3P%
1,Otto Porter,CHI,17.5,6.5,13.4,0.483,3.9,8.1,0.479,2.6,5.3,0.488
2,Joe Harris,BRK,13.7,4.9,9.8,0.5,2.5,4.8,0.528,2.4,5.1,0.474
3,George Hill,CLE,10.8,4.2,8.2,0.514,3.2,6.1,0.532,1.0,2.2,0.464
4,Danny Green,TOR,10.3,3.7,7.9,0.465,1.2,2.4,0.487,2.5,5.4,0.455
5,Landry Shamet,LAC,10.9,3.5,8.4,0.414,0.8,2.4,0.322,2.7,6.0,0.45


In [7]:
sc = pd.read_csv('steph_2018.csv')
row = sc.loc[(sc['Rk'] == 'Rk') | (sc['GS'] == 'Inactive')]
sc = sc.drop(row.index)
sc

Unnamed: 0,Rk,G,Date,Age,Tm,Unnamed: 5,Opp,Unnamed: 7,GS,MP,...,DRB,TRB,AST,STL,BLK,TOV,PF,PTS,GmSc,+/-
0,1,1,2017-10-17,29-217,GSW,,HOU,L (-1),1,29:33,...,5,5,4,1,0,2,4,22,14.3,9
1,2,2,2017-10-20,29-220,GSW,@,NOP,W (+8),1,35:07,...,3,3,8,0,0,1,1,28,24.7,7
2,3,3,2017-10-21,29-221,GSW,@,MEM,L (-10),1,30:05,...,6,6,3,0,0,2,5,37,28.6,-6
3,4,4,2017-10-23,29-223,GSW,@,DAL,W (+30),1,31:09,...,0,2,8,4,0,6,1,29,25.9,22
4,5,5,2017-10-25,29-225,GSW,,TOR,W (+5),1,37:31,...,4,4,5,3,1,2,2,30,25.2,14
5,6,6,2017-10-27,29-227,GSW,,WAS,W (+3),1,36:12,...,5,5,8,2,0,2,2,20,15.8,-8
6,7,7,2017-10-29,29-229,GSW,,DET,L (-8),1,35:26,...,5,6,8,1,0,5,3,27,21.7,12
7,8,8,2017-10-30,29-230,GSW,@,LAC,W (+28),1,29:54,...,3,5,6,2,1,1,3,31,31.0,23
8,9,9,2017-11-02,29-233,GSW,@,SAS,W (+20),1,31:51,...,7,8,5,3,0,2,2,21,21.2,16
9,10,10,2017-11-04,29-235,GSW,@,DEN,W (+19),1,29:38,...,0,0,11,2,0,3,0,22,21.3,44
