### Getting url page through selenium webdriver

I originally tried this using just the BeautifulSoup package. However, the data within the html is being accessed/created/whatever through javascript. Someone on the internet in stackoverflow said that you need to use selenium to access the webpage then. 

In [1]:
from selenium import webdriver
from bs4 import BeautifulSoup

In [2]:
url='https://stats.nba.com/players/traditional/'

driver = webdriver.Chrome()
driver.get(url)

page = driver.page_source

### Getting web page elements

In [3]:
# this is the path for the data table header
header_xpath = '/html/body/main/div[2]/div/div[2]/div/div/nba-stat-table/div[2]/div[1]/table/thead'
header = driver.find_element_by_xpath(header_xpath)

# XML for path to actual stats
results_xpath = '/html/body/main/div[2]/div/div[2]/div/div/nba-stat-table/div[2]/div[1]/table/tbody'

# XML path to page drop down menu
total_pages_xpath = '/html/body/main/div[2]/div/div[2]/div/div/nba-stat-table/div[3]/div/div/select'
total_pages = driver.find_element_by_xpath(total_pages_xpath).text

# XML path to the next page button
next_page_xpath = '/html/body/main/div[2]/div/div[2]/div/div/nba-stat-table/div[3]/div/div/a[2]'
# saving the next page button to call .click() function later
next_page_button = driver.find_element_by_xpath(next_page_xpath)

### Creating loop to go through each page of stats

In [5]:
# defining some variables to be used in the loop
loop_stop = int(total_pages[-1]) # takes the last page available in the dropdown menu
loop_count = 1
raw_stats = ""
player_list = []
stat_list = []

while loop_count <= loop_stop:
    results = driver.find_element_by_xpath(results_xpath)
    loop_stats = results.text
    
    player_list += loop_stats.split("\n")[1::3]
    stat_list += loop_stats.split("\n")[2::3]
    
    loop_count += 1
    next_page_button.click()

In [6]:
# splitting the header information to add at the beginning of each list
header_player = header.text.split()[0]

header_stats = header.text.split()[1:]

In [7]:
driver.quit()

### Converting web data into dataframe

In [8]:
import pandas as pd
import numpy as np

In [9]:
# The values are returned as a list item. Each item is a full row of data separated by spaces. 
# For each row of data, I am splitting by spaces and saving to a dictionary with the Player Name as the key
value_list = []
for value in stat_list:
    value_list.append(value.split())

dictionary = dict(zip(player_list, value_list))

In [10]:
# Saving the result into a dataframe

df = pd.DataFrame.from_dict(dictionary,orient='index',columns=header_stats)

In [11]:
# Checking the df to make sure it's formatted correctly
df.head()

Unnamed: 0,TEAM,AGE,GP,W,L,MIN,PTS,FGM,FGA,FG%,...,REB,AST,TOV,STL,BLK,PF,FP,DD2,TD3,+/-
James Harden,HOU,30,8,5,3,35.3,36.5,9.3,23.9,38.7,...,5.0,8.1,5.8,1.3,0.8,3.3,54.9,3,0,0.9
Kyrie Irving,BKN,27,7,3,4,33.8,31.7,10.7,22.7,47.2,...,6.1,7.7,2.9,1.3,0.6,2.7,53.4,2,1,4.4
Damian Lillard,POR,29,7,3,4,37.7,31.1,10.6,21.4,49.3,...,5.0,7.3,2.6,1.3,0.6,2.3,51.1,1,0,4.6
Kawhi Leonard,LAC,28,6,5,1,30.5,29.3,10.7,22.5,47.4,...,7.3,5.7,3.0,2.3,1.0,2.7,53.6,2,0,11.7
Giannis Antetokounmpo,MIL,24,8,6,2,32.2,29.0,10.5,17.8,59.2,...,14.3,7.6,4.0,1.3,1.6,4.1,62.2,8,1,10.4


In [12]:
df.to_csv(r"..\\Data\\Scraped_Stats.csv",header=True)