## Defensive Dashboard

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import matplotlib.ticker as mtick
import sqlite3
import seaborn as sns
from matplotlib.offsetbox import OffsetImage, AnnotationBbox
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from bs4 import BeautifulSoup
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import requests   
import shutil      
import datetime
from scipy.stats import norm
import os
import winsound

home_folder = 'C:\\Users\\Travis\\OneDrive\\Data Science\\Personal_Projects\\Sports\\NBA_Prediction_V3_1'
os.chdir(home_folder)

In [2]:
if os.path.isdir('data/player/defensive_dashboard') is False:
    os.mkdir('data/player/defensive_dashboard')

In [3]:
def replace_name_values2(filename):
        # replace values with dashes for compatibility
    filename = filename.replace('%','_')
    filename = filename.replace('=','_')
    filename = filename.replace('?','_')
    filename = filename.replace('&','_')
    filename = filename.replace('20Season_','')
    filename = filename.replace('_20Season','')
    filename = filename.replace('SeasonType_','')
    filename = filename.replace('sort_gdate_dir_-1_','')
    filename = filename.replace('SeasonYear_','')
    return filename

In [4]:
def append_the_data(folder, data_prefix, filename_selector):
    # Appending data together via folder and/or file name

    path = folder
    p = os.listdir(path)
    pf = pd.DataFrame(p)


    # filter for files that contain the filename_selector
    pf_reg = pf.loc[pf[0].astype(str).str.contains(filename_selector)] 

    appended_data = []
    for file in pf_reg[0]:
        data = pd.read_csv(folder + '/' + file)
        # if "Season" a column, drop it
        if 'Season' in data.columns:
            data = data.drop(columns = ['Season'])
        
        data['season'] = file[(file.find('20')):(file.find('20'))+4]
        data['season_type'] = np.where('Regular' in file, 'Regular', 'Playoffs')
        # add prefix to columns
        data = data.add_prefix(data_prefix)
        data.columns = data.columns.str.lower()
        appended_data.append(data)
    
    appended_data = pd.concat(appended_data)
    return appended_data

In [5]:
def trans_urls(url):
    new_url = str(url)[34:].replace('/', '_')
    filename = replace_name_values2(new_url)
    filename = filename.replace('SeasonYear_', '')
    return filename

In [6]:
def grab_player_data2(url_list, file_folder):    
        
        # Scrape Season-Level player data from the url_list

        i = 0
        for u in url_list:
                
                driver.get(u)
                time.sleep(2)

                # if the page does not load, go to the next in the list
                try:
                        xpath = '//*[@id="__next"]/div[2]/div[2]/div[3]/section[2]/div/div[2]/div[2]/div[1]/div[3]/div/label/div/select/option[1]'
                        elem = WebDriverWait(driver, 30).until(EC.presence_of_element_located((By.XPATH, xpath)))
                except:
                        print(f'{u} did not load. Moving to next url.')
                        continue

                # click "all pages"
                xpath_all = '//*[@id="__next"]/div[2]/div[2]/div[3]/section[2]/div/div[2]/div[2]/div[1]/div[3]/div/label/div/select/option[1]' 
                elem = WebDriverWait(driver, 30).until(EC.presence_of_element_located((By.XPATH, xpath_all)))
                
                driver.find_element(by=By.XPATH, value=xpath_all).click()
                src = driver.page_source
                parser = BeautifulSoup(src, "lxml")
                table = parser.find("table", attrs = {"class":"Crom_table__p1iZz"})
                headers = table.findAll('th')
                headerlist = [h.text.strip() for h in headers[0:]] 
                row_names = table.findAll('a')                             # find rows
                row_list = [b.text.strip() for b in row_names[0:]] 
                rows = table.findAll('tr')[0:]
                player_stats = [[td.getText().strip() for td in rows[i].findAll('td')[0:]] for i in range(len(rows))]
                tot_cols = len(player_stats[1])                           #set the length to ignore hidden columns
                headerlist = headerlist[:tot_cols]   
                stats = pd.DataFrame(player_stats, columns = headerlist)

                # assign filename
                filename = file_folder + str(u[34:]).replace('/', '_') + '.csv'
                filename = replace_name_values2(filename)
                pd.DataFrame.to_csv(stats, filename)
                i += 1
                lu = len(url_list)
                # close driver
                print(f'{filename} Completed Successfully! {i} / {lu} Complete!')

        winsound.Beep(523, 500)

In [7]:
def get_urls():

    # This one is weird... https://www.nba.com/stats/players/defense-dash-overall/?SeasonYear=2018-19&SeasonType=Regular%20Season&Season=2018-19
    
    years =['2021-22', '2020-21', '2019-20', '2018-19', '2017-18', '2016-17', '2015-16','2014-15', '2013-14']
    types = ['defense-dash-overall', 'defense-dash-3pt', 'defense-dash-2pt', 'defense-dash-lt6',
            'defense-dash-lt10', 'defense-dash-gt15' ]
    season_types = ['Playoffs', 'Regular%20Season']

    def_dash_urls = []

    for year in years:
        for typ in types:
            for s_types in season_types:
                url = 'https://www.nba.com/stats/players/'+ typ +'/?SeasonYear=' + year + '&SeasonType=' + s_types + '&Season=' + year
                def_dash_urls.append(str(url))

    to_download = pd.DataFrame(def_dash_urls, columns = ['urls'])
        # create new column with filename
    to_download['filename'] = to_download.apply(lambda row: trans_urls(row['urls']), axis=1)
    return to_download

In [8]:
to_download = get_urls()

In [9]:
# TODO: This is not working

# # Check to see if files exist already
# folder1 = 'data/player/defensive_dashboard/regular_season/'
# folder2 = 'data/player/defensive_dashboard/playoffs/'
# file_list1 = os.listdir(folder1)
# file_list2 = os.listdir(folder2)
# file_list = file_list1 + file_list2
# # replace .csv in file_list
# file_list = [x.replace('.csv', '') for x in file_list]

# # get list of files not downloaded
# to_download = to_download.loc[~to_download['filename'].isin(file_list)]
# to_download = to_download['urls'].tolist()

# # download files
# if len(to_download) > 0:
#     driver = webdriver.Chrome()
#     grab_player_data2(to_download, 'data/player/defensive_dashboard/')
#     driver.close()
# else:
#     print('All files already downloaded.')


data/player/defensive_dashboard/defense-dash-overall__2021-22_Regular_Season_2021-22.csv Completed Successfully! 1 / 54 Complete!
data/player/defensive_dashboard/defense-dash-3pt__2021-22_Regular_Season_2021-22.csv Completed Successfully! 2 / 54 Complete!
https://www.nba.com/stats/players/defense-dash-2pt/?SeasonYear=2021-22&SeasonType=Regular%20Season&Season=2021-22 did not load. Moving to next url.
data/player/defensive_dashboard/defense-dash-lt6__2021-22_Regular_Season_2021-22.csv Completed Successfully! 3 / 54 Complete!
https://www.nba.com/stats/players/defense-dash-lt10/?SeasonYear=2021-22&SeasonType=Regular%20Season&Season=2021-22 did not load. Moving to next url.


NoSuchWindowException: Message: no such window: target window already closed
from unknown error: web view not found
  (Session info: chrome=108.0.5359.125)


### Update This Year

In [10]:

# get list of this years urls

types = ['defense-dash-overall', 'defense-dash-3pt', 'defense-dash-2pt', 'defense-dash-lt6',
            'defense-dash-lt10', 'defense-dash-gt15' ]

this_year = []

for t in types: 
    url = 'https://www.nba.com/stats/players/'+ t +'/?SeasonYear=2022-23&SeasonType=Regular%20Season&Season=2022-23'
    this_year.append(url)


In [12]:
driver = webdriver.Chrome()
grab_player_data2(this_year, 'data/player/defensive_dashboard/')

data/player/defensive_dashboard/defense-dash-overall__2022-23_Regular_Season_2022-23.csv Completed Successfully! 1 / 6 Complete!
data/player/defensive_dashboard/defense-dash-3pt__2022-23_Regular_Season_2022-23.csv Completed Successfully! 2 / 6 Complete!
data/player/defensive_dashboard/defense-dash-2pt__2022-23_Regular_Season_2022-23.csv Completed Successfully! 3 / 6 Complete!
data/player/defensive_dashboard/defense-dash-lt6__2022-23_Regular_Season_2022-23.csv Completed Successfully! 4 / 6 Complete!
data/player/defensive_dashboard/defense-dash-lt10__2022-23_Regular_Season_2022-23.csv Completed Successfully! 5 / 6 Complete!
data/player/defensive_dashboard/defense-dash-gt15__2022-23_Regular_Season_2022-23.csv Completed Successfully! 6 / 6 Complete!


### Folders

In [13]:
# move files to correct folders
for file in os.listdir('data/player/defensive_dashboard'):
    if 'Regular' in file:
        shutil.move('data/player/defensive_dashboard/' + file, 'data/player/defensive_dashboard/regular_season/')
    if 'Playoffs' in file:
        shutil.move('data/player/defensive_dashboard/' + file, 'data/player/defensive_dashboard/playoffs/')


In [14]:
to_download = get_urls()

### Append Together

In [15]:
# append the files by subcat
dd_2_reg = append_the_data('data/player/defensive_dashboard/regular_season/', 'def_dash_2pt_', '2pt')
dd_2_play = append_the_data('data/player/defensive_dashboard/playoffs/', 'def_dash_2pt_', '2pt')
dd_2_tot = pd.concat([dd_2_reg, dd_2_play])
dd_2_tot



Unnamed: 0,def_dash_2pt_unnamed: 0,def_dash_2pt_player,def_dash_2pt_team,def_dash_2pt_age,def_dash_2pt_position,def_dash_2pt_gp,def_dash_2pt_g,def_dash_2pt_freq%,def_dash_2pt_dfgm,def_dash_2pt_dfga,def_dash_2pt_dfg%,def_dash_2pt_fg%,def_dash_2pt_diff%,def_dash_2pt_season,def_dash_2pt_season_type
0,0,,,,,,,,,,,,,2022,Regular
1,1,Brook Lopez,MIL,34.0,C,32.0,32.0,76.0,8.2,16.6,49.4,55.7,-6.2,2022,Regular
2,2,Joel Embiid,PHI,28.0,C-F,24.0,24.0,79.6,8.2,16.4,50.1,54.6,-4.5,2022,Regular
3,3,Domantas Sabonis,SAC,26.0,F-C,31.0,31.0,84.9,8.3,15.2,54.8,55.6,-0.8,2022,Regular
4,4,Nikola Jokic,DEN,27.0,C,29.0,29.0,74.4,8.2,15.1,54.3,56.6,-2.2,2022,Regular
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
207,207,Kevin Knox II,ATL,22.0,F,2.0,1.0,33.3,0.5,0.5,100.0,55.0,45.0,2021,Playoffs
208,208,Luca Vildoza,MIL,26.0,G,2.0,1.0,33.3,0.0,0.5,0.0,16.7,-16.7,2021,Playoffs
209,209,Zeke Nnaji,DEN,21.0,F-C,2.0,1.0,50.0,0.5,0.5,100.0,53.7,46.3,2021,Playoffs
210,210,Juwan Morgan,BOS,25.0,F,5.0,2.0,33.3,0.2,0.4,50.0,54.8,-4.8,2021,Playoffs


In [16]:
dd_3_reg = append_the_data('data/player/defensive_dashboard/regular_season/', 'def_dash_3pt_', '3pt')
dd_3_play = append_the_data('data/player/defensive_dashboard/playoffs/', 'def_dash_3pt_', '3pt')
dd_3_tot = pd.concat([dd_3_reg, dd_3_play])

lt_6_reg = append_the_data('data/player/defensive_dashboard/regular_season/', 'def_dash_lt6_', 'lt6')
lt_6_play = append_the_data('data/player/defensive_dashboard/playoffs/', 'def_dash_lt6_', 'lt6')
lt_6_tot = pd.concat([lt_6_reg, lt_6_play])

lt_10_reg = append_the_data('data/player/defensive_dashboard/regular_season/', 'def_dash_lt10_', 'lt10')
lt_10_play = append_the_data('data/player/defensive_dashboard/playoffs/', 'def_dash_lt10_', 'lt10')
lt_10_tot = pd.concat([lt_10_reg, lt_10_play])

gt_15_reg = append_the_data('data/player/defensive_dashboard/regular_season/', 'def_dash_gt15_', 'gt15')
gt_15_play = append_the_data('data/player/defensive_dashboard/playoffs/', 'def_dash_gt15_', 'gt15')
gt_15_tot = pd.concat([gt_15_reg, gt_15_play])

dd_overall_reg = append_the_data('data/player/defensive_dashboard/regular_season/', 'def_dash_overall_', 'overall')
dd_overall_play = append_the_data('data/player/defensive_dashboard/playoffs/', 'def_dash_overall_', 'overall')
dd_overall_tot = pd.concat([dd_overall_reg, dd_overall_play])

In [17]:
# check tot dataframe sizes
print(dd_2_tot.shape, dd_3_tot.shape, lt_6_tot.shape, lt_10_tot.shape, gt_15_tot.shape, dd_overall_tot.shape)


(2348, 15) (2878, 15) (2851, 15) (2298, 15) (2331, 15) (2981, 15)


In [18]:
# start with overall
all_def_dash = pd.merge(dd_overall_tot, dd_2_tot,
                                left_on = ['def_dash_overall_player', 'def_dash_overall_team' ,'def_dash_overall_season', 'def_dash_overall_season_type'],
                                right_on = ['def_dash_2pt_player', 'def_dash_2pt_team', 'def_dash_2pt_season', 'def_dash_2pt_season_type'],
                                how = 'left')

all_def_dash2 = pd.merge(all_def_dash, dd_3_tot,
                                left_on = ['def_dash_overall_player', 'def_dash_overall_team' ,'def_dash_overall_season', 'def_dash_overall_season_type'],
                                right_on = ['def_dash_3pt_player', 'def_dash_3pt_team', 'def_dash_3pt_season', 'def_dash_3pt_season_type'],
                                how = 'left')

all_def_dash3 = pd.merge(all_def_dash2, lt_6_tot,
                                left_on = ['def_dash_overall_player', 'def_dash_overall_team' ,'def_dash_overall_season', 'def_dash_overall_season_type'],
                                right_on = ['def_dash_lt6_player', 'def_dash_lt6_team', 'def_dash_lt6_season', 'def_dash_lt6_season_type'],
                                how = 'left')

all_def_dash4 = pd.merge(all_def_dash3, lt_10_tot,
                                left_on = ['def_dash_overall_player', 'def_dash_overall_team' ,'def_dash_overall_season', 'def_dash_overall_season_type'],
                                right_on = ['def_dash_lt10_player', 'def_dash_lt10_team', 'def_dash_lt10_season', 'def_dash_lt10_season_type'],
                                how = 'left')

all_def_dash5 = pd.merge(all_def_dash4, gt_15_tot,
                                left_on = ['def_dash_overall_player', 'def_dash_overall_team' ,'def_dash_overall_season', 'def_dash_overall_season_type'],
                                right_on = ['def_dash_gt15_player', 'def_dash_gt15_team', 'def_dash_gt15_season', 'def_dash_gt15_season_type'],
                                how = 'left')


In [19]:
all_def_dash5 = pd.merge(all_def_dash4, gt_15_tot,
                                left_on = ['def_dash_overall_player', 'def_dash_overall_team' ,'def_dash_overall_season', 'def_dash_overall_season_type'],
                                right_on = ['def_dash_gt15_player', 'def_dash_gt15_team', 'def_dash_gt15_season', 'def_dash_gt15_season_type'],
                                how = 'left')

print(f' init_size = {all_def_dash5.shape}')

 init_size = (2981, 90)


In [20]:
all_def_dash5.to_csv('data/player/aggregates/All_Defensive_Dashboard.csv', index = False)