#### Which traditional stat is most important?
* Data Needed: Player name, points, assists, rebounds, FG%, team margin

* Find correlation between winning and an imputed stats (points, assists, rebounds, FG%).
  Scatter Plot

In [1]:
# dependencies
import requests
import json
import os
import pandas as pd
import numpy as np

In [2]:
# consuming data file with years from 2014 and 2018
filepath = os.path.join('winloss_by_teams.csv')

tradStats_df = pd.read_csv(filepath)
tradStats_df.head()

Unnamed: 0,TeamName,League,Season,Division,Franchise,Team,W,L,%
0,NBAWashington Capitols,NBA,(1946-47),Eastern Division,Washington Capitols,WAS,49,11,81.70%
1,NBAGolden State Warriors,NBA,(1946-47),Eastern Division,Golden State Warriors,PHI,35,25,58.30%
2,NBANew York Knicks,NBA,(1946-47),Eastern Division,New York Knicks,NYK,33,27,55.00%
3,NBAProvidence Steam Rollers,NBA,(1946-47),Eastern Division,Providence Steam Rollers,PRO,28,32,46.70%
4,NBABoston Celtics,NBA,(1946-47),Eastern Division,Boston Celtics,BOS,22,38,36.70%


### Data Cleansing and Checks

In [3]:
# check all columns and names
tradStats_df.columns

Index(['TeamName', 'League', 'Season', 'Division', 'Franchise', 'Team', 'W',
       'L', '%'],
      dtype='object')

In [4]:
tradStats_removed = tradStats_df.replace({'NBA':''}, regex=True)
tradStats_removed

Unnamed: 0,TeamName,League,Season,Division,Franchise,Team,W,L,%
0,Washington Capitols,,(1946-47),Eastern Division,Washington Capitols,WAS,49,11,81.70%
1,Golden State Warriors,,(1946-47),Eastern Division,Golden State Warriors,PHI,35,25,58.30%
2,New York Knicks,,(1946-47),Eastern Division,New York Knicks,NYK,33,27,55.00%
3,Providence Steam Rollers,,(1946-47),Eastern Division,Providence Steam Rollers,PRO,28,32,46.70%
4,Boston Celtics,,(1946-47),Eastern Division,Boston Celtics,BOS,22,38,36.70%
...,...,...,...,...,...,...,...,...,...
1539,Houston Rockets,,(2019-20),Southwest Division,Houston Rockets,HOU,44,28,61.10%
1540,Dallas Mavericks,,(2019-20),Southwest Division,Dallas Mavericks,DAL,43,32,57.30%
1541,Memphis Grizzlies,,(2019-20),Southwest Division,Memphis Grizzlies,MEM,34,39,46.60%
1542,San Antonio Spurs,,(2019-20),Southwest Division,San Antonio Spurs,SAS,32,39,45.10%


In [5]:
tradStats_clean = tradStats_removed.loc[(tradStats_removed['Season'] >= '(2014-15)') & (tradStats_removed['Season'] <= '(2018-19)'),:]
tradStats_clean.head()

Unnamed: 0,TeamName,League,Season,Division,Franchise,Team,W,L,%
1364,Toronto Raptors,,(2014-15),Atlantic Division,Toronto Raptors,TOR,49,33,59.80%
1365,Boston Celtics,,(2014-15),Atlantic Division,Boston Celtics,BOS,40,42,48.80%
1366,Brooklyn Nets,,(2014-15),Atlantic Division,Brooklyn Nets,BRO,38,44,46.30%
1367,Philadelphia 76ers,,(2014-15),Atlantic Division,Philadelphia 76ers,PHI,18,64,22.00%
1368,New York Knicks,,(2014-15),Atlantic Division,New York Knicks,NYK,17,65,20.70%


In [6]:
# checking data counts
tradStats_clean.count()

TeamName     150
League       150
Season       150
Division     150
Franchise    150
Team         150
W            150
L            150
%            150
dtype: int64

In [7]:
# check NaN values before replacing it with 0
tradStats_clean.isnull().values.any()

False

In [8]:
tradStats_clean.reset_index(drop=True)

Unnamed: 0,TeamName,League,Season,Division,Franchise,Team,W,L,%
0,Toronto Raptors,,(2014-15),Atlantic Division,Toronto Raptors,TOR,49,33,59.80%
1,Boston Celtics,,(2014-15),Atlantic Division,Boston Celtics,BOS,40,42,48.80%
2,Brooklyn Nets,,(2014-15),Atlantic Division,Brooklyn Nets,BRO,38,44,46.30%
3,Philadelphia 76ers,,(2014-15),Atlantic Division,Philadelphia 76ers,PHI,18,64,22.00%
4,New York Knicks,,(2014-15),Atlantic Division,New York Knicks,NYK,17,65,20.70%
...,...,...,...,...,...,...,...,...,...
145,Houston Rockets,,(2018-19),Southwest Division,Houston Rockets,HOU,53,29,64.60%
146,San Antonio Spurs,,(2018-19),Southwest Division,San Antonio Spurs,SAS,48,34,58.50%
147,Memphis Grizzlies,,(2018-19),Southwest Division,Memphis Grizzlies,MEM,33,49,40.20%
148,New Orleans Pelicans,,(2018-19),Southwest Division,New Orleans Pelicans,NOP,33,49,40.20%


In [9]:
# saving output
#tradStats_df.to_csv("output/traditional_stats_clean2.csv", encoding="utf-8", index=True, header=True)