## Hustle

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import matplotlib.ticker as mtick
import sqlite3
import seaborn as sns
from matplotlib.offsetbox import OffsetImage, AnnotationBbox
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from bs4 import BeautifulSoup
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import requests   
import shutil      
import datetime
from scipy.stats import norm
import os
import winsound

home_folder = 'C:\\Users\\Travis\\OneDrive\\Data Science\\Personal_Projects\\Sports\\NBA_Prediction_V3_1'
os.chdir(home_folder)


In [2]:
# move the files to the correct folder
for file in os.listdir('data/player/hustle/'):
    if '.csv' in file:
        if 'Playoffs' in file:
            os.rename('data/player/hustle/' + file, 'data/player/hustle/playoffs/' + file)
        else:
            os.rename('data/player/hustle/' + file, 'data/player/hustle/regular_season/' + file)

In [8]:
def append_the_data(folder, data_prefix, filename_selector):
    # Appending data together via folder and/or file name

    path = folder
    p = os.listdir(path)
    pf = pd.DataFrame(p)


    # filter for files that contain the filename_selector
    pf_reg = pf.loc[pf[0].astype(str).str.contains(filename_selector)] 

    appended_data = []
    for file in pf_reg[0]:
        data = pd.read_csv(folder + '/' + file)
        # if "Season" a column, drop it
        if 'Season' in data.columns:
            data = data.drop(columns = ['Season'])
        
        data['season'] = file[(file.find('20')):(file.find('20'))+4]
        data['season_type'] = np.where('Regular' in file, 'Regular', 'Playoffs')
        # add prefix to columns
        data = data.add_prefix(data_prefix)
        data.columns = data.columns.str.lower()
        appended_data.append(data)
    
    appended_data = pd.concat(appended_data)
    return appended_data

In [5]:
def replace_name_values(filename):
        # replace values with dashes for compatibility
    filename = filename.replace('%','_')
    filename = filename.replace('=','_')
    filename = filename.replace('?','_')
    filename = filename.replace('&','_')
    filename = filename.replace('20Season_','')
    filename = filename.replace('20Season','')
    return filename

In [6]:
def grab_player_data(url_list, file_folder):    
        
        # Scrape Season-Level player data from the url_list

        i = 0
        for u in url_list:
                
                driver.get(u)
                time.sleep(2)

                # if the page does not load, go to the next in the list
                try:
                        xpath = '//*[@id="__next"]/div[2]/div[2]/div[3]/section[2]/div/div[2]/div[2]/div[1]/div[3]/div/label/div/select/option[1]'
                        elem = WebDriverWait(driver, 30).until(EC.presence_of_element_located((By.XPATH, xpath)))
                except:
                        print(f'{u} did not load. Moving to next url.')
                        continue

                # click "all pages"
                xpath_all = '//*[@id="__next"]/div[2]/div[2]/div[3]/section[2]/div/div[2]/div[2]/div[1]/div[3]/div/label/div/select/option[1]' 
                elem = WebDriverWait(driver, 30).until(EC.presence_of_element_located((By.XPATH, xpath_all)))
                
                driver.find_element(by=By.XPATH, value=xpath_all).click()
                src = driver.page_source
                parser = BeautifulSoup(src, "lxml")
                table = parser.find("table", attrs = {"class":"Crom_table__p1iZz"})
                headers = table.findAll('th')
                headerlist = [h.text.strip() for h in headers[0:]] 
                row_names = table.findAll('a')                             # find rows
                row_list = [b.text.strip() for b in row_names[0:]] 
                rows = table.findAll('tr')[0:]
                player_stats = [[td.getText().strip() for td in rows[i].findAll('td')[0:]] for i in range(len(rows))]
                tot_cols = len(player_stats[1])                           #set the length to ignore hidden columns
                headerlist = headerlist[:tot_cols]   
                stats = pd.DataFrame(player_stats, columns = headerlist)

                # assign filename
                filename = file_folder + str(u[34:]).replace('/', '_') + '.csv'
                filename = replace_name_values(filename)
                pd.DataFrame.to_csv(stats, filename)
                i += 1
                lu = len(url_list)
                # close driver
                print(f'{filename} Completed Successfully! {i} / {lu} Complete!')

        winsound.Beep(523, 500)

In [7]:
player_hustle = 'https://www.nba.com/stats/players/hustle/'
hustle_urls = []
years =['2021-22', '2020-21', '2019-20', '2018-19', '2017-18', '2016-17']
season_types = ['Regular%20Season', 'Playoffs']

for year in years:
    for s_types in season_types:
        url = player_hustle + '?Season=' + year + '&SeasonType=' + s_types
        hustle_urls.append(str(url))


### Update This Year

In [None]:
player_hustle = 'https://www.nba.com/stats/players/hustle/?Season=2022-23&SeasonType=Regular%20Season'

In [10]:
driver = webdriver.Chrome()
grab_player_data([player_hustle], 'data/player/hustle/')

data/player/hustle/hustle_.csv Completed Successfully! 1 / 1 Complete!


In [11]:
# move the files to the correct folder
for file in os.listdir('data/player/hustle/'):
    if '.csv' in file:
        if 'Playoffs' in file:
            os.rename('data/player/hustle/' + file, 'data/player/hustle/playoffs/' + file)
        else:
            os.rename('data/player/hustle/' + file, 'data/player/hustle/regular_season/' + file)

In [12]:
hustle_data = append_the_data('data/player/hustle/regular_season/', 'hust_', 'hustle')
hustle_data

Unnamed: 0,hust_unnamed: 0,hust_player,hust_team,hust_age,hust_gp,hust_min,hust_screenassists,hust_screenassists pts,hust_deflections,hust_off loose ballsrecovered,hust_def loose ballsrecovered,hust_loose ballsrecovered,hust_% loose ballsrecovered off,hust_% loose ballsrecovered def,hust_chargesdrawn,hust_contested2pt shots,hust_contested3pt shots,hust_contestedshots,hust_season,hust_season_type
0,0,,,,,,,,,,,,,,,,,,,Playoffs
1,1,A.J. Lawson,MIN,22.0,1.0,1.8,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00,0.0,0.0,0.0,,Playoffs
2,2,AJ Green,MIL,23.0,12.0,5.8,0.1,0.3,0.3,0.1,0.1,0.2,50.0,50.0,0.00,0.3,0.2,0.5,,Playoffs
3,3,AJ Griffin,ATL,19.0,29.0,21.5,0.3,0.8,1.3,0.3,0.3,0.6,50.0,50.0,0.00,1.5,2.0,3.5,,Playoffs
4,4,Aaron Gordon,DEN,27.0,29.0,30.0,0.7,1.7,1.4,0.4,0.6,1.0,42.9,57.1,0.00,3.4,1.9,5.3,,Playoffs
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
601,601,Zach LaVine,CHI,27.0,67.0,34.7,0.5,0.9,1.0,0.4,0.2,0.6,61.9,38.1,0.01,2.5,3.4,5.9,2021,Regular
602,602,Zavier Simpson,OKC,25.0,4.0,43.5,0.0,0.0,2.3,0.3,0.8,1.0,25.0,75.0,0.00,3.8,6.0,9.8,2021,Regular
603,603,Zeke Nnaji,DEN,21.0,41.0,17.0,0.9,2.4,0.8,0.2,0.1,0.4,62.5,37.5,0.00,2.0,1.7,3.7,2021,Regular
604,604,Ziaire Williams,MEM,20.0,62.0,21.7,0.1,0.2,1.0,0.1,0.2,0.3,42.1,57.9,0.00,2.1,2.5,4.6,2021,Regular


In [13]:
hustle_data.to_csv('data/player/aggregates/All_Hustle.csv')