In [1]:
import pandas as pd
from nba_py import team
from nba_py import game
from nba_py import Scoreboard
from datetime import date, timedelta



### Part 1- Data Wrangling

1) Downloading data from NBA.stats for the season 2018-2019 (16 Oct to 10th April) <br>
2) Coverting data into Pandas DataFrame for futher processing <br>

In [19]:
# Date Generator - Date from 16th Oct 2018 to 10th April 2019

date_list = []
start_date = date(2018, 10, 16)
end_date = date(2019, 4, 10)
delta = timedelta(days=1)
while start_date <= end_date:
    #print (start_date.strftime("%Y-%m-%d"))
    date_list.append(start_date)
    start_date += delta

In [None]:
# Converting data into Pandas DataFrame
df = pd.DataFrame()
for i in range(len(date_list)):
    date_month = date_list[i].month
    date_day   = date_list[i].day
    date_year  = date_list[i].year
    data = Scoreboard(month=date_month, day=date_day, year=date_year, league_id='00', offset=0)
    df_temp = data.line_score()
    df = df.append(df_temp)

In [7]:
# Check for Missing Data
len(df)

124

### Part 2 : Splitting Data into 4 Parts
1) First Half - QTR1 + QTR2 <br>
2) Second Half (including Overtime) = Total - First Half <br>
3) Full Game (Already in PTS) <br>
4) Regulation Time = QTR1 + QTR2 + QTR3 + QTR4 <br>

In [8]:
df['PTS_FHALF'] = df['PTS_QTR1'] + df['PTS_QTR2']
df['PTS_SHALF'] = df['PTS'] - df['PTS_FHALF']
df['REG_PTS']   = df['PTS_QTR1'] + df['PTS_QTR2'] + df['PTS_QTR3'] + df['PTS_QTR4']


In [9]:
df.to_csv('2018-2019.csv')
df.head()

Unnamed: 0,GAME_DATE_EST,GAME_SEQUENCE,GAME_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_CITY_NAME,TEAM_WINS_LOSSES,PTS_QTR1,PTS_QTR2,PTS_QTR3,...,PTS,FG_PCT,FT_PCT,FG3_PCT,AST,REB,TOV,PTS_FHALF,PTS_SHALF,REG_PTS
0,2018-10-16T00:00:00,1,21800001,1610612755,PHI,Philadelphia,0-1,21,21,24,...,87,0.391,0.609,0.192,18,47,16,42,45,87
1,2018-10-16T00:00:00,1,21800001,1610612738,BOS,Boston,1-0,21,26,30,...,105,0.433,0.714,0.297,21,55,14,47,58,105
2,2018-10-16T00:00:00,2,21800002,1610612760,OKC,Oklahoma City,0-1,23,24,32,...,100,0.363,0.649,0.27,21,45,14,47,53,100
3,2018-10-16T00:00:00,2,21800002,1610612744,GSW,Golden State,1-0,31,26,26,...,108,0.442,0.944,0.269,28,58,21,57,51,108
0,2018-10-17T00:00:00,1,21800003,1610612749,MIL,Milwaukee,1-0,36,31,26,...,113,0.494,0.75,0.412,26,57,21,67,46,113


### Part 3: Find the Factors of Game
1) For finding the factors we need broader BoxScore (not just the game details above)
2) We are using Box Score from NBA.stats


In [14]:
# Reading Data Back from CSV File (Multiple Reuqest is getting rejected)
#df = pd.read_csv('2018-2019.csv')
# Finding all unique game_ids
game_ids = df['GAME_ID'].unique()
#print(game_ids)

In [15]:
# Getting Factors data for all the games
df_factor = pd.DataFrame()
for i in range(len(game_ids)):
    data = game.BoxscoreFourFactors(game_id=game_ids[i], season='2018-19', season_type='Regular Season', range_type='0', start_period='0', end_period='0', start_range='0', end_range='0')
    df_temp = data.sql_team_four_factors()
    #print(df_temp)
    df_factor = df_factor.append(df_temp)

In [16]:
df_factor.to_csv('2018-2019_factor.csv')
df_factor.head()

Unnamed: 0,GAME_ID,TEAM_ID,TEAM_NAME,TEAM_ABBREVIATION,TEAM_CITY,MIN,EFG_PCT,FTA_RATE,TM_TOV_PCT,OREB_PCT,OPP_EFG_PCT,OPP_FTA_RATE,OPP_TOV_PCT,OPP_OREB_PCT
0,21800001,1610612755,76ers,PHI,Philadelphia,240:00,0.42,0.264,0.149,0.105,0.49,0.144,0.141,0.25
1,21800001,1610612738,Celtics,BOS,Boston,240:00,0.49,0.144,0.141,0.214,0.42,0.264,0.149,0.175
0,21800002,1610612760,Thunder,OKC,Oklahoma City,240:00,0.418,0.407,0.141,0.242,0.479,0.189,0.196,0.415
1,21800002,1610612744,Warriors,GSW,Golden State,240:00,0.479,0.189,0.196,0.321,0.418,0.407,0.141,0.348
0,21800003,1610612749,Bucks,MIL,Milwaukee,240:00,0.576,0.235,0.202,0.239,0.533,0.239,0.106,0.193


In [18]:
len()

70

In [17]:
# Finding the QTRs Points Mean and Diff of Each Match from Team
df_mean = df.mean()
print(df_mean)

GAME_SEQUENCE    4.870968e+00
TEAM_ID          1.610613e+09
PTS_QTR1         2.776613e+01
PTS_QTR2         2.912097e+01
PTS_QTR3         2.774194e+01
PTS_QTR4         2.714516e+01
PTS_OT1          6.532258e-01
PTS_OT2          0.000000e+00
PTS_OT3          0.000000e+00
PTS_OT4          0.000000e+00
PTS_OT5          0.000000e+00
PTS_OT6          0.000000e+00
PTS_OT7          0.000000e+00
PTS_OT8          0.000000e+00
PTS_OT9          0.000000e+00
PTS_OT10         0.000000e+00
PTS              1.124274e+02
FG_PCT           4.554274e-01
FT_PCT           7.589355e-01
FG3_PCT          3.562661e-01
AST              2.410484e+01
REB              4.604839e+01
TOV              1.388710e+01
dtype: float64


In [18]:
df_factor = pd.read_csv('2018-2019_factor.csv')
df_mean_factor = df_factor.mean()
print(df_mean_factor)

Unnamed: 0      5.000000e-01
GAME_ID         2.180003e+07
TEAM_ID         1.610613e+09
EFG_PCT         5.175726e-01
FTA_RATE        2.726855e-01
TM_TOV_PCT      1.374758e-01
OREB_PCT        2.059355e-01
OPP_EFG_PCT     5.175726e-01
OPP_FTA_RATE    2.726855e-01
OPP_TOV_PCT     1.374758e-01
OPP_OREB_PCT    2.788065e-01
dtype: float64


In [None]:
# Finding Difference between Mean and QTRs and Factor

