In [1]:
# Import library
from nba_api.stats.static import teams
from nba_api.stats.endpoints import leaguegamefinder, playbyplayv2
import pandas as pd
from tqdm import tqdm
import matplotlib.pyplot as plt
import numpy as np
import requests, datetime, time, sqlalchemy
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

In [2]:
# data type for each column
types = {'opposite': 'int',
 'daysdiff': 'int',
 'season_id': 'str',
 'team_id': 'int',
 'team_abbreviation': 'str',
 'team_name': 'str',
 'game_id': 'str',
 'game_date': 'str',
 'matchup': 'str',
 'wl': 'str',
 'min': 'int',
 'pts': 'int',
 'fgm': 'int',
 'fga': 'int',
 'fg_pct': 'float',
 'fg3m': 'int',
 'fg3a': 'float',
 'fg3_pct': 'float',
 'ftm': 'int',
 'fta': 'int',
 'ft_pct': 'float',
 'oreb': 'float',
 'dreb': 'float',
 'reb': 'float',
 'ast': 'int',
 'stl': 'float',
 'blk': 'int',
 'tov': 'int',
 'pf': 'int',
 'pts_oppos': 'int',
 'fgm_oppos': 'int',
 'fga_oppos': 'int',
 'fg_pct_oppos': 'float',
 'fg3m_oppos': 'int',
 'fg3a_oppos': 'float',
 'fg3_pct_oppos': 'float',
 'ftm_oppos': 'int',
 'fta_oppos': 'int',
 'ft_pct_oppos': 'float',
 'oreb_oppos': 'float',
 'dreb_oppos': 'float',
 'reb_oppos': 'float',
 'ast_oppos': 'int',
 'stl_oppos': 'float',
 'blk_oppos': 'int',
 'tov_oppos': 'int',
 'pf_oppos': 'int',
 'days': 'int',
 'hometeam': 'str',
 '1diff': 'int',
 '2diff': 'int',
 'awayteam': 'str'}

## Update game history 

In [3]:
# connect to local database
engine = sqlalchemy.create_engine('postgresql://postgres:password@localhost:5432/NBA')

In [4]:
# get the game results today
today = datetime.date.today() - datetime.timedelta(days=1)
l = str(today).split('-')
today = l[1]+"/"+l[2]+"/"+l[0]
df = leaguegamefinder.LeagueGameFinder(date_from_nullable=today).get_data_frames()[0]

In [5]:
# Get the colunm names for the home team to be PTS_home
c = df.columns.tolist()[9:-1]
l = []
for i in range(len(c)):
    l.append(c[i] + '_oppos')

# away team abbreviation
away = df[df['MATCHUP'].str.contains('@')].reset_index(drop=True)

# home team abbreviation
home = df[df['MATCHUP'].str.contains('vs.')]

# convert a home team row and an away team row to 1 away and home team row
new = pd.DataFrame()
for i in tqdm(range(away.shape[0])):
    tmp = home[home['GAME_ID']==away.iloc[i,:]['GAME_ID']].iloc[:,9:-1].reset_index(drop=True)
    tmp.columns = l
    new = pd.concat([new, pd.concat([away.iloc[i:i+1,:].reset_index(drop=True), tmp],axis=1)])

100%|█████████████████████████████████████████████████████████████████████████████████| 13/13 [00:00<00:00, 130.35it/s]


In [6]:
# drop column and fill averages with 0
new.drop(columns=['PLUS_MINUS'],inplace=True)
new = new.fillna(0)

In [7]:
# make column names lowercase
cols = [item.lower() for item in new.columns.to_list()]

In [8]:
new.columns = cols

In [15]:
# add them to the database
new.reset_index(drop=True).to_sql('raw',engine,if_exists='append',index=False)

## Update training data

In [10]:
# get home team abbreviation for todays games
new['hometeam'] = new['matchup'].str.extract(r'\w* @ (\w*)')

In [11]:
# get all past game data
games = pd.read_sql_query("select * from raw", con=engine)

In [12]:
# convert date to number of days after the first game
def convert_days(date):
    d = pd.Timestamp(1983,10,28)
    return (date - d).days

# compute
games['days'] = pd.to_datetime(games['game_date']).apply(convert_days)

# get home team abbreviation for past games
games['hometeam'] = games['matchup'].str.extract(r'\w* @ (\w*)')

In [17]:
# Find the last time before today both teams faced each other
z = 0
for i in range (new.shape[0]):
    z += 1
    home = new.iloc[i]['hometeam']
    away = new.iloc[i]['team_abbreviation']
    one = games[(games['team_abbreviation']==home) & (games['hometeam'] == away)].reset_index(drop=True)
    two = games[(games['team_abbreviation']==away) & (games['hometeam'] == home)].reset_index(drop=True)
    tmp = pd.concat([one,two]).sort_values('days').iloc[-2:,:]
    
    d = {}
    if tmp.shape[0] > 1:
        if tmp.iloc[0]['team_abbreviation'] != tmp.iloc[1]['team_abbreviation']:
            d['opposite'] = 1
        else:
            d['opposite'] = 0
        d['daysdiff'] = tmp.iloc[1]['days']-tmp.iloc[0]['days']
        tmp1 = pd.Series(d).append(tmp.iloc[0])
        d = {}
        
        # compute score difference
        d['1diff'] = tmp.iloc[0]['pts'] - tmp.iloc[0]['pts_oppos']
        d['2diff'] = tmp.iloc[1]['pts'] - tmp.iloc[1]['pts_oppos']
        d['awayteam'] = tmp.iloc[0]['team_abbreviation']
        tmp2 = tmp1.append(pd.Series(d))
        
        # save to database
        tmp2.to_frame().transpose().astype(types).to_sql('train',con=engine,if_exists='append',index=False)

In [23]:
# Find the last time before today both teams faced each other
z = 0
for i in range (new.shape[0]):
    z += 1
    home = new.iloc[i]['hometeam']
    away = new.iloc[i]['team_abbreviation']
    one = games[(games['team_abbreviation']==home) & (games['hometeam'] == away)].reset_index(drop=True)
    two = games[(games['team_abbreviation']==away) & (games['hometeam'] == home)].reset_index(drop=True)
    tmp = pd.concat([one,two]).sort_values('days').iloc[-2:,:]
    
    d = {}
    if tmp.shape[0] > 1:
        if tmp.iloc[0]['team_abbreviation'] != tmp.iloc[1]['team_abbreviation']:
            d['opposite'] = 1
        else:
            d['opposite'] = 0
        d['daysdiff'] = tmp.iloc[1]['days']-tmp.iloc[0]['days']
        tmp1 = pd.Series(d).append(tmp.iloc[0])
        d = {}
        
        # compute score total
        d['1diff'] = tmp.iloc[0]['pts'] + tmp.iloc[0]['pts_oppos']
        d['2diff'] = tmp.iloc[1]['pts'] + tmp.iloc[1]['pts_oppos']
        d['awayteam'] = tmp.iloc[0]['team_abbreviation']
        tmp2 = tmp1.append(pd.Series(d))
        
        # save to database
        tmp2.to_frame().transpose().astype(types).to_sql('train_total',con=engine,if_exists='append',index=False)

## Update Play-by-Play Data

In [21]:
# function to convert score string to score difference
def calc_scorediff(x):
    if x != None:
        return eval(x)

In [22]:
# get play-by-play data for the games today
for game in new['game_id']:
    play = playbyplayv2.PlayByPlayV2(game).get_data_frames()[0]
    if play.shape[0] != 0:
        score = play['SCORE'].apply(calc_scorediff).fillna(method='ffill').fillna(0)

        # Record the last index of 0:00 of each quarter
        zeroindex = 0
        ends = []
        l = play['PCTIMESTRING'].to_list()
        for i in range (len(l)):
            if l[i] == '0:00':
                zeroindex = i
            else:
                if zeroindex != 0:
                    ends.append(zeroindex)
                    zeroindex=0
        ends.append(i)
        if len(ends) < 4:
            continue
        
        # 4 quarters
        q1 = [(datetime.datetime.strptime(item, "%M:%S") + datetime.timedelta(minutes=36)) for item in l[:ends[0]+1]]
        q2 = [(datetime.datetime.strptime(item, "%M:%S") + datetime.timedelta(minutes=24)) for item in l[ends[0]+1:ends[1]+1]]
        q3 = [(datetime.datetime.strptime(item, "%M:%S") + datetime.timedelta(minutes=12)) for item in l[ends[1]+1:ends[2]+1]]
        q4 = [datetime.datetime.strptime(item, "%M:%S") for item in l[ends[2]+1:ends[3]+1]]
        times = [(datetime.datetime(1900,1,1,0,48) - event).total_seconds() for event in q1+q2+q3+q4]

        timedf = pd.DataFrame([times,score]).transpose()
        tdata = []
        try:
            for i in range (360):
                tdata.append(timedf[timedf[0] <= 8*(i+1)].iloc[-1][1])
        except IndexError:
            continue
        tdf = pd.DataFrame(tdata).transpose()
        tdf.columns = ["t"+str(col) for col in tdf.columns]
        tdf.insert(0,'game_id',game)
        
        # overtime indicator
        if len(ends) > 4:
            tdf.insert(1,'overtime', 1)
        else:
            tdf.insert(1,'overtime', 0)
            
        # save to database
        tdf.to_sql('playbyplay', con=engine, if_exists='append', index=False)
        time.sleep(3)