In [70]:
# Scraping data with Selenium
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import pandas as pd
from bs4 import BeautifulSoup
from selenium import webdriver
from datetime import datetime
from bs4 import BeautifulSoup
from selenium.webdriver.chrome.options import Options
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.support.ui import WebDriverWait
import time
from selenium.webdriver.support import expected_conditions as EC

In [68]:
def new_driver():
    chrome_options = Options()
    # chrome_options.add_argument("--headless")
    driver = webdriver.Chrome(options=chrome_options)
    return driver

In [69]:
def create_soup_object(driver):
    page_source = driver.page_source
    soup = BeautifulSoup(page_source, 'html.parser')
    return soup

In [65]:
def find_shot_stats(soup):
    html = soup.find_all('table', class_='Crom_table__p1iZz')
    stepback_tr = html[6].select_one('tr:has(td:nth-of-type(1):contains("Step Back Jump shot"))')
    pullup_tr = html[6].select_one('tr:has(td:nth-of-type(1):contains("Pullup Jump shot"))')
    # may be none; if there are 0 attempts there will be no a tag
    stepback_fgm = stepback_tr.select_one('td:nth-of-type(5) a')
    stepback_fgm_final = stepback_fgm.text if stepback_fgm else stepback_tr.select_one('td:nth-of-type(5)').text
    stepback_fga = stepback_tr.select_one('td:nth-of-type(6) a')
    stepback_fga_final = stepback_fga.text if stepback_fga else stepback_tr.select_one('td:nth-of-type(6)').text
    pullup_fgm = pullup_tr.select_one('td:nth-of-type(5) a')
    pullup_fgm_final = pullup_fgm.text if pullup_fgm else pullup_tr.select_one('td:nth-of-type(5)').text
    pullup_fga = pullup_tr.select_one('td:nth-of-type(6) a')
    pullup_fga_final = pullup_fga.text if pullup_fga else pullup_tr.select_one('td:nth-of-type(6)').text
    return {'Stepback FGM': stepback_fgm_final, 'Stepback FGA': stepback_fga_final, 'Pullup FGM': pullup_fgm_final, 'Pullup FGA': pullup_fga_final}
    

In [66]:
def scrape_bron():
    driver = new_driver()
    # compiling list of statistics dictionaries
    final_data = []
    for year in range(2008, 2023, 1):
        # generating year string for url
        year_abbrev = str(year) + '-' + str(year + 1)[-2:]
        try:
            driver.get(f'https://www.nba.com/stats/player/2544/shooting?Season={year_abbrev}')
        except Exception:
            print("Error occurred. Creating a new driver instance...")
            driver.quit()
            driver = new_driver()
            driver.get(f'https://www.nba.com/stats/player/2544/shooting?Season={year_abbrev}')
        # waiting for dynamic content to load
        wait = WebDriverWait(driver, 20)
        wait.until(EC.presence_of_element_located((By.CLASS_NAME, 'Crom_table__p1iZz')))
        soup = create_soup_object(driver)
        # creating a dictionary with the relevant step back jumper stats
        year_stats = {'Year': year_abbrev}
        year_stats.update(find_shot_stats(soup))
        final_data.append(year_stats)
    return pd.DataFrame(final_data)

In [128]:
scraped_data = scrape_bron()

In [129]:
scraped_data

Unnamed: 0,Year,Stepback FGM,Stepback FGA,Pullup FGM,Pullup FGA
0,2008-09,4,4,4,7
1,2009-10,3,9,2,3
2,2010-11,1,1,5,6
3,2011-12,0,0,1,1
4,2012-13,2,5,4,5
5,2013-14,3,6,4,9
6,2014-15,3,9,0,2
7,2015-16,2,10,8,15
8,2016-17,12,24,12,22
9,2017-18,20,49,13,30


In [130]:
def shorten_abbrev(year):
    return year[2:]

In [131]:
def clean_bron(bron_df):
    for col in bron_df.columns[1:]:
        bron_df[col] = bron_df[col].apply(float)
    bron_df['FGM'] = bron_df['Pullup FGM'] + bron_df['Stepback FGM']
    bron_df['FGA'] = bron_df['Pullup FGA'] + bron_df['Stepback FGA']
    bron_df['FG%'] = round(bron_df['FGM'] / bron_df['FGA'], 4) * 100
    bron_df['Pullup FG%'] = round(bron_df['Pullup FGM'] / bron_df['Pullup FGA'], 4) * 100
    bron_df['Stepback FG%'] = round(bron_df['Stepback FGM'] / bron_df['Stepback FGA'], 4) * 100
    bron_df['Year'] = bron_df['Year'].apply(shorten_abbrev)
    bron_df.fillna(0, inplace=True)
    return bron_df

In [132]:
bron_df = clean_bron(scraped_data)

In [134]:
bron_df.to_csv('bron_df.csv')

In [125]:
bron_df = pd.read_csv('bron_df.csv')

In [135]:
bron_df

Unnamed: 0,Year,Stepback FGM,Stepback FGA,Pullup FGM,Pullup FGA,FGM,FGA,FG%,Pullup FG%,Stepback FG%
0,08-09,4.0,4.0,4.0,7.0,8.0,11.0,72.73,57.14,100.0
1,09-10,3.0,9.0,2.0,3.0,5.0,12.0,41.67,66.67,33.33
2,10-11,1.0,1.0,5.0,6.0,6.0,7.0,85.71,83.33,100.0
3,11-12,0.0,0.0,1.0,1.0,1.0,1.0,100.0,100.0,0.0
4,12-13,2.0,5.0,4.0,5.0,6.0,10.0,60.0,80.0,40.0
5,13-14,3.0,6.0,4.0,9.0,7.0,15.0,46.67,44.44,50.0
6,14-15,3.0,9.0,0.0,2.0,3.0,11.0,27.27,0.0,33.33
7,15-16,2.0,10.0,8.0,15.0,10.0,25.0,40.0,53.33,20.0
8,16-17,12.0,24.0,12.0,22.0,24.0,46.0,52.17,54.55,50.0
9,17-18,20.0,49.0,13.0,30.0,33.0,79.0,41.77,43.33,40.82


In [154]:
import plotly.graph_objects as go
import pandas as pd

def plot_efficiency(shot_type, start_year, end_year, include_text, bron_df):
    #Include space if specifying shot_type (i.e. 'Stepback ')
    #Dict defining labels
    if shot_type == '':
        label = 'LeFuck You '
    else:
        label = shot_type
    bron_df = bron_df.loc[(bron_df.Year >= start_year) & (bron_df.Year <= end_year)]
    # Creating the bar trace for FGM
    fgm_trace = go.Bar(x=bron_df['Year'], y=bron_df[shot_type + 'FGM'], name=f'{label}3PM', text=bron_df[shot_type + 'FGM'] if include_text else None, textposition='auto')

    # Creating the bar trace for FGA
    fga_trace = go.Bar(x=bron_df['Year'], y=bron_df['FGA'], name=f'{label}3PA', text=bron_df[shot_type + 'FGA'] if include_text else None, textposition='auto')

    # Creating the line trace for the percentage
    percentage_trace = go.Scatter(x=bron_df['Year'], y=bron_df[shot_type + 'FG%'], name='3pt Percentage', yaxis='y2', mode='lines')

    # Combining the traces
    data = [fga_trace, fgm_trace, percentage_trace]

    # Creating the layout
    layout = go.Layout(
        title=dict(text=f'{label}Three Point Efficiency', x=.5),
        xaxis=dict(title='Year', categoryorder='array', categoryarray=list(bron_df['Year'])),
        yaxis=dict(title=f'{label}3s'),
        yaxis2=dict(title='3pt Percentage', overlaying='y', side='right', showgrid=False),
        font=dict(family='Roboto Condensed'),
        legend=dict(y=1.2, x=.85)
    )

    # Creating the figure
    fig = go.Figure(data=data, layout=layout)

    # Configuring the bar mode to overlay
    fig.update_layout(barmode='overlay')

    fig.update_layout(xaxis_type='category')

    # Configuring autosize to fit width
    fig.update_layout(width=1000, height=600)

    # Displaying the plot
    fig.show()


In [155]:
plot_efficiency('', '08-09', '22-23', False, bron_df)

In [156]:
plot_efficiency('', '15-16', '22-23', True, bron_df)

In [157]:
plot_efficiency('Stepback ', '08-09', '22-23', False, bron_df)

In [158]:
plot_efficiency('Pullup ', '15-16', '22-23', True, bron_df)

In [159]:
plot_efficiency('Stepback ', '15-16', '22-23', True, bron_df)

In [166]:
bron_df['FGM'].sum() / bron_df['FGA'].sum() * 1.5

0.5786802030456852

In [163]:
bron_df['FGM'].sum()

304.0

In [164]:
bron_df['FGA'].sum()

788.0

In [169]:
volume = bron_df.loc[bron_df.Year > '14-15']

In [171]:
volume['Pullup FGM'].sum() / volume['Pullup FGA'].sum()

0.3793103448275862

In [161]:
bron_df

Unnamed: 0,Year,Stepback FGM,Stepback FGA,Pullup FGM,Pullup FGA,FGM,FGA,FG%,Pullup FG%,Stepback FG%
0,08-09,4.0,4.0,4.0,7.0,8.0,11.0,72.73,57.14,100.0
1,09-10,3.0,9.0,2.0,3.0,5.0,12.0,41.67,66.67,33.33
2,10-11,1.0,1.0,5.0,6.0,6.0,7.0,85.71,83.33,100.0
3,11-12,0.0,0.0,1.0,1.0,1.0,1.0,100.0,100.0,0.0
4,12-13,2.0,5.0,4.0,5.0,6.0,10.0,60.0,80.0,40.0
5,13-14,3.0,6.0,4.0,9.0,7.0,15.0,46.67,44.44,50.0
6,14-15,3.0,9.0,0.0,2.0,3.0,11.0,27.27,0.0,33.33
7,15-16,2.0,10.0,8.0,15.0,10.0,25.0,40.0,53.33,20.0
8,16-17,12.0,24.0,12.0,22.0,24.0,46.0,52.17,54.55,50.0
9,17-18,20.0,49.0,13.0,30.0,33.0,79.0,41.77,43.33,40.82


In [172]:
16 /84

0.19047619047619047