In [1]:
import datetime
import gspread
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np
from urllib.request import urlopen
import time
from time import sleep
from basketball_reference_scraper.pbp import get_pbp

pd.options.mode.chained_assignment = None  # default='warn'


In [3]:
# read data
nba_results_22_23 = pd.read_csv('22_23_aggregated_results.csv')
nba_results_22_23['DATE'] = pd.to_datetime(nba_results_22_23['DATE'])

In [4]:
# calculate 2022-23 LT% and TIE%
nba_results_22_23['AWAY_LT'] = nba_results_22_23['HOME_WT']
nba_results_22_23['HOME_LT'] = nba_results_22_23['AWAY_WT']
nba_results_22_23['HOME_LT'] = nba_results_22_23['AWAY_WT']
nba_results_22_23['TIE_PC'] = 1 - (nba_results_22_23['AWAY_WT'] + nba_results_22_23['HOME_WT'])

In [5]:
# function to calculate team average for each metric
def calculate_metric(df, metric):
    # set metric columns
    if metric in ('WT','LT'):
        away_col = 'AWAY_' + metric
        home_col = 'HOME_' + metric
    elif metric == 'TIE_PC':
        away_col = metric
        home_col = metric
    
    # reformat into single column
    nba_results_22_23_away = df[['DATE','AWAY_TEAM', away_col]].rename(columns={'AWAY_TEAM':'TEAM',away_col:metric})
    nba_results_22_23_home = df[['DATE','HOME_TEAM', home_col]].rename(columns={'HOME_TEAM':'TEAM',home_col:metric})
        
        

    # concatenate
    nba_results_22_23_reformat = pd.concat([nba_results_22_23_away, nba_results_22_23_home]).reset_index(drop=True)
    
    # find team averages
    nba_results_22_23_agg = nba_results_22_23_reformat.groupby(['TEAM']).mean().reset_index()
    return nba_results_22_23_agg

In [6]:
# create datatset of wt/lt/tie%
wt_results = calculate_metric(nba_results_22_23, 'WT')
lt_results = calculate_metric(nba_results_22_23, 'LT')
tie_results = calculate_metric(nba_results_22_23, 'TIE_PC')

nba_results_22_23_agg = pd.merge(wt_results, lt_results, how='inner', on='TEAM')
nba_results_22_23_agg = pd.merge(nba_results_22_23_agg, tie_results, how='inner', on='TEAM')
display((nba_results_22_23_agg['WT'] + nba_results_22_23_agg['LT'] + nba_results_22_23_agg['TIE_PC']).min())
display((nba_results_22_23_agg['WT'] + nba_results_22_23_agg['LT'] + nba_results_22_23_agg['TIE_PC']).max())

0.9999999999999999

1.0

In [7]:
## Add Supplemental Info
supp_df = pd.read_csv('22_23_supplemental_info.csv')
supp_df['EXPECTED_WP'] = supp_df['EXPECTED_WIN'] / 82

In [8]:
## Add initial deltas
nba_results_22_23_agg_fin = pd.merge(nba_results_22_23_agg, supp_df, how='inner', on='TEAM')
nba_results_22_23_agg_fin['WT_v_WP'] = nba_results_22_23_agg_fin['WT'] - nba_results_22_23_agg_fin['WP']
nba_results_22_23_agg_fin['WT_v_EXP_WP'] = nba_results_22_23_agg_fin['WT'] - nba_results_22_23_agg_fin['EXPECTED_WP']

In [9]:
## final df
nba_results_22_23_agg_fin.sort_values('TEAM')

Unnamed: 0,TEAM,WT,LT,TIE_PC,WP,PT_DIFF,EXPECTED_WIN,EXPECTED_WP,WT_v_WP,WT_v_EXP_WP
0,ATL,0.512354,0.440656,0.04699,0.5,0.1,41.2,0.502439,0.012354,0.009915
1,BOS,0.627872,0.316295,0.055833,0.695,6.9,57.2,0.697561,-0.067128,-0.069689
2,BRK,0.488815,0.456958,0.054227,0.549,1.0,43.5,0.530488,-0.060185,-0.041673
3,CHI,0.466179,0.475931,0.05789,0.488,1.3,44.2,0.539024,-0.021821,-0.072845
4,CHO,0.336131,0.612646,0.051223,0.329,-6.2,25.9,0.315854,0.007131,0.020277
5,CLE,0.532688,0.415923,0.05139,0.622,5.8,55.0,0.670732,-0.089312,-0.138044
6,DAL,0.456318,0.483746,0.059937,0.463,0.3,41.7,0.508537,-0.006682,-0.052219
7,DEN,0.547795,0.398861,0.053344,0.646,3.8,50.1,0.610976,-0.098205,-0.063181
8,DET,0.268981,0.678496,0.052523,0.207,-8.6,21.2,0.258537,0.061981,0.010445
9,GSW,0.477099,0.470285,0.052616,0.537,2.4,46.9,0.571951,-0.059901,-0.094852


In [10]:
nba_results_22_23_agg_fin.to_csv('22_23_wp_final_results.csv')

In [68]:
html = urlopen('https://cleaningtheglass.com/stats/league/summary')
soup = BeautifulSoup(html, 'html.parser')
table = soup.find("table")
df_ctg = pd.read_html(str(table))[0]
df_ctg.columns = df_ctg.columns.get_level_values(1)
df_ctg = df_ctg.iloc[: , 1:]
df_ctg = df_ctg.iloc[: , :7]
df_ctg_1 = df_ctg['Team']
df_ctg_2 = df_ctg.iloc[: , 2]
df_ctg_3 = df_ctg.iloc[: , 5:]
df_ctg = pd.concat([df_ctg_1, df_ctg_2, df_ctg_3], axis=1)

  df_ctg = pd.read_html(str(table))[0]


In [69]:
df_ctg

Unnamed: 0,Team,Point Diff,Win%,Exp W82
0,LA Clippers,20.0,66.7%,75.7
1,Orlando,15.2,100.0%,72.2
2,Denver,14.4,100.0%,70.0
3,Golden State,11.6,75.0%,66.7
4,Boston,10.7,100.0%,64.9
5,Philadelphia,9.7,66.7%,62.8
6,Indiana,9.6,66.7%,62.2
7,Phoenix,8.9,66.7%,62.9
8,Dallas,8.8,100.0%,60.8
9,Atlanta,6.3,50.0%,55.8


In [2]:
# reduce to yesterday
nba_schedule_df = pd.read_csv('utils/wt_support/23_24_nba_schedule.csv')
nba_schedule_df['DATE'] = pd.to_datetime(nba_schedule_df['DATE'])
nba_schedule_df_retro = nba_schedule_df[
    nba_schedule_df.DATE == (datetime.date.today().strftime('%Y-%m-%d') - datetime.timedelta(1))
].reset_index(drop=True)

nba_schedule_df_retro

  nba_schedule_df['DATE'] = pd.to_datetime(nba_schedule_df['DATE'])


TypeError: unsupported operand type(s) for -: 'str' and 'datetime.timedelta'

In [4]:
# datetime.date.today().strftime('%Y-%m-%d') - datetime.timedelta(1)
(datetime.date.today() - datetime.timedelta(1)).strftime('%Y-%m-%d')

'2023-10-30'