### Overview of the Dataset
The dataset contains aggregate individual statistics for 67 NBA seasons. It includes basic box-score attributes such as points, assists, rebounds etc. The data was originally scraped from basketball reference.

The dataset includes 24691 observations across 51 columns.

https://www.kaggle.com/drgilermo/nba-players-stats/data

In [12]:
import math
import warnings

from IPython.display import display
from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from sklearn import linear_model
import statsmodels.formula.api as smf
from statsmodels.sandbox.regression.predstd import wls_prediction_std

# Display preferences.
%matplotlib inline
pd.options.display.float_format = '{:.3f}'.format

# Suppress annoying harmless error.
warnings.filterwarnings(
    action="ignore",
    module="scipy",
    message="^internal gelsd")

In [13]:
player_stats = pd.read_csv('Seasons_Stats.csv')

In [14]:
player_stats.head(5)

Unnamed: 0.1,Unnamed: 0,Year,Player,Pos,Age,Tm,G,GS,MP,PER,...,FT%,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS
0,0,1950.0,Curly Armstrong,G-F,31.0,FTW,63.0,,,,...,0.705,,,,176.0,,,,217.0,458.0
1,1,1950.0,Cliff Barker,SG,29.0,INO,49.0,,,,...,0.708,,,,109.0,,,,99.0,279.0
2,2,1950.0,Leo Barnhorst,SF,25.0,CHS,67.0,,,,...,0.698,,,,140.0,,,,192.0,438.0
3,3,1950.0,Ed Bartels,F,24.0,TOT,15.0,,,,...,0.559,,,,20.0,,,,29.0,63.0
4,4,1950.0,Ed Bartels,F,24.0,DNN,13.0,,,,...,0.548,,,,20.0,,,,27.0,59.0


In [15]:
player_stats.drop(columns=['Unnamed: 0', 'Year'], inplace=True)

In [16]:
player_stats.describe()

Unnamed: 0,Age,G,GS,MP,PER,TS%,3PAr,FTr,ORB%,DRB%,...,FT%,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS
count,24616.0,24624.0,18233.0,24138.0,24101.0,24538.0,18839.0,24525.0,20792.0,20792.0,...,23766.0,20797.0,20797.0,24312.0,24624.0,20797.0,20797.0,19645.0,24624.0,24624.0
mean,26.664,50.837,23.593,1209.72,12.479,0.493,0.159,0.325,6.182,13.709,...,0.719,62.189,147.199,224.637,114.853,39.897,24.47,73.94,116.339,510.116
std,3.842,26.496,28.632,941.147,6.039,0.094,0.187,0.219,4.873,6.636,...,0.142,67.325,145.922,228.19,135.864,38.713,36.935,67.714,84.792,492.923
min,18.0,1.0,0.0,0.0,-90.6,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,24.0,27.0,0.0,340.0,9.8,0.458,0.005,0.208,2.6,8.8,...,0.657,12.0,33.0,51.0,19.0,9.0,3.0,18.0,39.0,106.0
50%,26.0,58.0,8.0,1053.0,12.7,0.506,0.064,0.296,5.4,12.7,...,0.743,38.0,106.0,159.0,68.0,29.0,11.0,55.0,109.0,364.0
75%,29.0,75.0,45.0,1971.0,15.6,0.544,0.288,0.4,9.0,18.1,...,0.808,91.0,212.0,322.0,160.0,60.0,29.0,112.0,182.0,778.0
max,44.0,88.0,83.0,3882.0,129.1,1.136,1.0,6.0,100.0,100.0,...,1.0,587.0,1111.0,2149.0,1164.0,301.0,456.0,464.0,386.0,4029.0


In [17]:
player_stats.shape

(24691, 51)

In [18]:
player_stats.dtypes

Player     object
Pos        object
Age       float64
Tm         object
G         float64
GS        float64
MP        float64
PER       float64
TS%       float64
3PAr      float64
FTr       float64
ORB%      float64
DRB%      float64
TRB%      float64
AST%      float64
STL%      float64
BLK%      float64
TOV%      float64
USG%      float64
blanl     float64
OWS       float64
DWS       float64
WS        float64
WS/48     float64
blank2    float64
OBPM      float64
DBPM      float64
BPM       float64
VORP      float64
FG        float64
FGA       float64
FG%       float64
3P        float64
3PA       float64
3P%       float64
2P        float64
2PA       float64
2P%       float64
eFG%      float64
FT        float64
FTA       float64
FT%       float64
ORB       float64
DRB       float64
TRB       float64
AST       float64
STL       float64
BLK       float64
TOV       float64
PF        float64
PTS       float64
dtype: object

In [19]:
missing_values_count = player_stats.isnull().sum()
print(missing_values_count)

Player       67
Pos          67
Age          75
Tm           67
G            67
GS         6458
MP          553
PER         590
TS%         153
3PAr       5852
FTr         166
ORB%       3899
DRB%       3899
TRB%       3120
AST%       2136
STL%       3899
BLK%       3899
TOV%       5109
USG%       5051
blanl     24691
OWS         106
DWS         106
WS          106
WS/48       590
blank2    24691
OBPM       3894
DBPM       3894
BPM        3894
VORP       3894
FG           67
FGA          67
FG%         166
3P         5764
3PA        5764
3P%        9275
2P           67
2PA          67
2P%         195
eFG%        166
FT           67
FTA          67
FT%         925
ORB        3894
DRB        3894
TRB         379
AST          67
STL        3894
BLK        3894
TOV        5046
PF           67
PTS          67
dtype: int64
