#### Imports

In [1]:
import os
import time
import selenium
import pandas as pd

from tqdm import tqdm
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options

#### Scraping Functions

In [12]:
def _open_chromedriver(download_directory=r'C:\Users\lukar\Desktop\Sports Analytics\NBA Ball Handle Rate\data\seconds-per-possession'):
    '''
    returns browser object of chromedriver with a specified download directory
    '''
    # create chromeOptions object
    chrome_options = Options()
    
    prefs = {"download.default_directory" : download_directory}
    chrome_options.add_experimental_option("prefs",prefs)
    
    browser = webdriver.Chrome(options=chrome_options)
    return browser

def _scrape_table_data(browser, season, team_id, folderpath, rs=True):
    '''
    returns browser object & scrapes a specific table on the page and saves it to a local directory
    '''
    data = []
    attempts = 0
    
    while True:
        try:
            
            # find table parent element
            table_parent = browser.find_element(By.ID, 'vgt-table')

            # find tbody element within parent
            tbody = table_parent.find_element(By.TAG_NAME, 'tbody')

            # iterate over all the table rows & extract cell data
            rows = tbody.find_elements(By.TAG_NAME, 'tr')

            columns = ['PLAYER_NAME','MINUTES_ON','MINUTES_OFF',
                       'SECONDS_PER_POSS_OFFENSE_PLAYER_ON','SECONDS_PER_POSS_OFFENSE_PLAYER_OFF','SECONDS_PER_POSS_OFFENSE_PLAYER_ON_OFF',
                       'SEASON','TEAM_ID']
            data = []

            # iterate over all the table rows and extract cell data text
            for row in rows:
                # create empty lst to store player data
                player_data = []

                # get all the cells in the row
                cells = row.find_elements(By.TAG_NAME, 'td')

                # iterate over the cells and extract the cell text
                for cell in cells:
                    player_data.append(cell.text)

                player_data.append(season)
                player_data.append(team_id)

                # append player data to list
                data.append(player_data)
            
            break
                
        except:
            if (attempts == 10):
                print("There was an error scraping data for the following team: {} | {}".format(team_id, season))
                break
                
            # find the button and click it           
            button = browser.find_element(By.XPATH, '/html/body/div/div/main/div[2]/button')
            button.click()
            
            time.sleep(2)
                
            attempts += 1
            
    # create dataframe
    if rs:
        team_seconds_df = pd.DataFrame(data, columns=columns)
        team_seconds_df.to_csv(folderpath + "{}_rs_seconds_per_poss_offense_{}.csv".format(team_id, season), index=False)
    else:
        if data != []:
            team_seconds_df = pd.DataFrame(data, columns=columns)
            team_seconds_df.to_csv(folderpath + "{}_playoffs_seconds_per_poss_offense_{}.csv".format(team_id, season), index=False)
        else:
            pass
    
    return browser

def _scrape_rs_seconds_per_offense(browser, start_year=2013, end_year=2023, teams_lst=teams.get_teams(), folderpath="data/seconds-per-possession/regular-season/"):
    '''
    returns nothing, but downloads a .csv file of seconds per possession - offense to a local directory given parameters to a specific URL
    '''
    print('Starting to Scrape Regular Season: Seconds per Possession - Offense Data...')
    
    # iterate through the seasons
    for season in tqdm(range(start_year, end_year)):
        season_param = str(season) + '-' + str(season+1)[2:]
        
        # iterate through all the teams
        for team in teams_lst:
            team_id = team['id']
            
            # input URL into browser and let it load
            url = "https://www.pbpstats.com/on-off/nba/stat?Season={}&SeasonType=Regular%2BSeason&TeamId={}&Stat=SecondsPerPossOff".format(season_param, team_id)
            browser.get(url)
            
            time.sleep(8)
            
            browser = _scrape_table_data(browser, season_param, team_id, folderpath)
    
    print('Finished Scraping Regular Season: Seconds per Possession - Offense Data!')
    
    return 

def _scrape_playoffs_seconds_per_offense(browser, start_year=2013, end_year=2023, teams_lst=teams.get_teams(), folderpath="data/seconds-per-possession/playoffs/"):
    '''
    returns nothing, but downloads a .csv file of seconds per possession - offense to a local directory given parameters to a specific URL
    '''
    print('Starting to Scrape Playoffs: Seconds per Possession - Offense Data...')
    
    # iterate through the seasons
    for season in tqdm(range(start_year, end_year)):
        season_param = str(season) + '-' + str(season+1)[2:]
        
        # iterate through all the teams
        for team in teams_lst:
            team_id = team['id']
            
            # input URL into browser and let it load
            url = "https://www.pbpstats.com/on-off/nba/stat?Season={}&SeasonType=Playoffs&TeamId={}&Stat=SecondsPerPossOff".format(season_param, team_id)
            browser.get(url)
            
            time.sleep(8)
            
            browser = _scrape_table_data(browser, season_param, team_id, folderpath, rs=False)

    print('Finished Scraping Playoffs: Seconds per Possession - Offense Data!')
            
    return 

#### Scrape the Data

In [13]:
browser = _open_chromedriver()
_scrape_rs_seconds_per_offense(browser)

Starting to Scrape Regular Season: Seconds per Possession - Offense Data...


100%|██████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<?, ?it/s]

Finished Scraping Regular Season: Seconds per Possession - Offense Data!





In [14]:
browser = _open_chromedriver()
_scrape_playoffs_seconds_per_offense(browser)

Starting to Scrape Playoffs: Seconds per Possession - Offense Data...


100%|██████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<?, ?it/s]

Finished Scraping Playoffs: Seconds per Possession - Offense Data!



