## Selenium

Below shows how to scrape table elements in Selenium. You can use this code to scrape other elements and websites (urls) too.

In [4]:
import pandas as pd
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.keys import Keys
from bs4 import BeautifulSoup

In [1]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.keys import Keys
from bs4 import BeautifulSoup

# Path to your chromedriver executable
# Chrome version: Version 113.0.5672.126 (Official Build) (x86_64)
webdriver_path = '~/chromedriver'

# Set up the Selenium driver with options
options = Options()
options.add_argument('--headless')  # Run in headless mode
driver = webdriver.Chrome(service=Service(webdriver_path), options=options)

# Specify the URL of the webpage
url = "https://www.baseball-reference.com/teams/NYY/2023.shtml"

# Load the webpage
driver.get(url)

# Wait for the dynamic content to load (if necessary)
# You can use driver.implicitly_wait() or other wait methods

# Extract the page source after the dynamic content has loaded
source = driver.page_source

# Close the Selenium driver
driver.quit()

# Parse the page source with BeautifulSoup
soup = BeautifulSoup(source, 'lxml')

# Find all the table elements (ResultSet object is returned. This looks like a list.)
table_elements = soup.find_all('table')

- printing number of table elements
- each table element is a tag object "listed" in a ResultSet object

In [3]:
print(len(table_elements))
print(type(table_elements))
print(type(table_elements[0]))

14
<class 'bs4.element.ResultSet'>
<class 'bs4.element.Tag'>


Pandas cant read in these objects until they are casted as strings. Then it can read each tag (now a string) and store them into their own DataFrame

In [5]:
# Extract the HTML content from each table element
html_tables = [str(table) for table in table_elements]

# Pass the list of HTML tables to pd.read_html()
dfs = pd.read_html('\n'.join(html_tables))

In [6]:
for df in dfs:
    display(df.head())

Unnamed: 0,0,1,2
0,"May 25, 2023","May 25, 2023","May 25, 2023"
1,Baltimore Orioles,,Preview
2,New York Yankees,,7:05PM


Unnamed: 0,0,1
0,BAL,Kyle Gibson
1,NYY,Clarke Schmidt


Unnamed: 0,0,1,2
0,"May 26, 2023","May 26, 2023","May 26, 2023"
1,San Diego Padres,,Preview
2,New York Yankees,,7:05PM


Unnamed: 0,0,1,2
0,"May 27, 2023","May 27, 2023","May 27, 2023"
1,San Diego Padres,,Preview
2,New York Yankees,,1:05PM


Unnamed: 0,0,1,2
0,"May 28, 2023","May 28, 2023","May 28, 2023"
1,San Diego Padres,,Preview
2,New York Yankees,,1:35PM


Unnamed: 0,0,1,2
0,"May 29, 2023","May 29, 2023","May 29, 2023"
1,New York Yankees,,Preview
2,Seattle Mariners,,9:40PM


Unnamed: 0,Rk,Pos,Name,Age,G,PA,AB,R,H,2B,...,OBP,SLG,OPS,OPS+,TB,GDP,HBP,SH,SF,IBB
0,1,C,Jose Trevino (10-day IL),30,30,102,96,11,21,2,...,0.265,0.333,0.598,64,32,4,0,0,0,0
1,2,1B,Anthony Rizzo*,33,49,214,189,30,58,8,...,0.383,0.524,0.907,148,99,4,6,0,1,0
2,3,2B,Gleyber Torres,26,50,211,182,32,48,8,...,0.351,0.467,0.818,124,85,5,1,0,3,0
3,4,SS,Anthony Volpe,22,51,200,177,21,36,5,...,0.29,0.373,0.663,82,66,1,1,0,1,0
4,5,3B,DJ LeMahieu,34,45,182,164,20,41,8,...,0.319,0.402,0.721,98,66,3,2,0,1,0


Unnamed: 0,Rk,Pos,Name,Age,W,L,W-L%,ERA,G,GS,...,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W
0,1,SP,Gerrit Cole,32,5,0,1.0,2.53,11,11,...,0,277,169,3.47,1.138,7.2,0.8,3.1,9.3,3.04
1,2,SP,Nestor Cortes*,28,4,2,0.667,5.3,10,10,...,0,228,81,4.77,1.27,8.8,1.7,2.7,8.8,3.31
2,3,SP,Domingo Germán (40-man),30,2,3,0.4,3.75,9,9,...,0,188,114,4.19,0.896,5.3,1.3,2.8,9.2,3.27
3,4,SP,Clarke Schmidt,27,2,4,0.333,6.0,10,10,...,2,207,71,4.39,1.622,11.4,1.6,3.2,10.8,3.38
4,5,SP,Jhony Brito (40-man),25,3,3,0.5,5.58,10,9,...,0,178,77,5.53,1.488,9.4,1.6,4.0,6.7,1.67


Unnamed: 0,Rk,Uni,Name,Unnamed: 3,Unnamed: 4,OnActv,IL,Age,B,T,Ht,Wt,DoB,1stYr
0,1,84,Albert Abreu,do DO,Pitcher,*,,27,R,R,"6' 2""",190,"Sep 26, 1995",2014
1,2,45,Gerrit Cole,us US,Pitcher,*,,32,R,R,"6' 4""",220,"Sep 8, 1990",2011
2,3,62,Ryan Weber,us US,Pitcher,*,,32,R,R,"6' 1""",175,"Aug 12, 1990",2009
3,4,58,Wandy Peralta,do DO,Pitcher,*,,31,L,L,"6' 0""",227,"Jul 27, 1991",2010
4,5,70,Jimmy Cordero,do DO,Pitcher,*,,31,R,R,"6' 4""",245,"Oct 19, 1991",2012


Unnamed: 0,Name,Age,Unnamed: 2,B,T,Ht,Wt,DoB,Yrs,G,...,LF,CF,RF,OF,DH,PH,PR,WAR,Salary,Unnamed: 28
0,Albert Abreu,27,do DO,R,R,"6' 2""",190,"Sep 26, 1995",4,19,...,0,0,0,0,0,0,0,0.1,"$738,700",
1,Greg Allen,30,us US,B,R,"6' 0""",185,"Mar 15, 1993",7,3,...,1,0,2,3,0,0,3,0.2,,
2,Harrison Bader,29,us US,R,R,"6' 0""",210,"Jun 3, 1994",7,21,...,0,21,0,21,0,2,0,1.0,,
3,Jake Bauers,27,us US,L,L,"5' 11""",195,"Oct 6, 1995",4,19,...,10,0,6,16,2,3,0,-0.1,,
4,Colten Brewer,30,us US,R,R,"6' 4""",222,"Oct 29, 1992",5,3,...,0,0,0,0,0,0,0,0.0,,


Unnamed: 0,Name,Age,Unnamed: 2,DoB,Role,Start Date,End Date
0,Aaron Boone,50,us US,"Mar 9, 1973",Manager,Mar 30,May 24
1,Name,Age,,DoB,Role,Start Date,End Date


Unnamed: 0,Name,Age,G,GS,CG,Inn,Ch,PO,A,E,...,RF/9,RF/G,PB,WP,SB,CS,CS%,lgCS%,PO.1,Pos Summary
0,Albert Abreu,27,19,0,0,24.2,3,2,1,0,...,1.09,0.16,,,3.0,0.0,0%,21%,0.0,P
1,Greg Allen,30,3,0,0,6.0,0,0,0,0,...,0.0,0.0,,,,,,,,RF-LF
2,Harrison Bader,29,21,18,16,166.1,56,55,0,1,...,2.98,2.62,,,,,,,,CF
3,Jake Bauers,27,16,13,3,97.1,21,19,1,1,...,1.85,1.11,,,,,,,,LF-RF-DH
4,Colten Brewer,30,3,0,0,8.1,3,2,1,0,...,3.24,1.0,,,0.0,0.0,,,0.0,P


Unnamed: 0,Name,Age,G,PA,Rbat,Rbaser,Rdp,Rfield,Rpos,RAA,...,RAR,WAR,waaWL%,162WL%,oWAR,dWAR,oRAR,Salary,Acquired,Pos Summary
0,Greg Allen#,30,3,1,1,1,0,0,0,2,...,2,0.2,0.549,0.501,0.2,0.0,2,,Traded,/H97
1,Harrison Bader,29,21,77,2,0,0,4,1,7,...,9,1.0,0.533,0.504,0.5,0.5,5,,Traded,8/H
2,Jake Bauers*,27,19,52,-1,0,0,-1,-1,-3,...,-1,-0.1,0.484,0.498,0.0,-0.2,0,,Purchased,7/9HD
3,Oswaldo Cabrera#,24,44,157,-10,1,0,1,-1,-10,...,-4,-0.5,0.477,0.494,-0.6,0.0,-5,"$742,100",Free Agency,*79/54H63D
4,Willie Calhoun*,28,27,91,-3,0,0,-1,-2,-6,...,-3,-0.4,0.476,0.496,-0.3,-0.3,-2,,Free Agency,D/9H


Unnamed: 0,Name,Age,IP,G,GS,R,RA9,RA9opp,RA9def,RA9role,...,RAA,WAA,gmLI,WAAadj,WAR,RAR,waaWL%,162WL%,Salary,Acquired
0,Albert Abreu,27,24.2,19,0,14,5.11,4.72,0.06,-0.32,...,-1,-0.1,1.16,-0.1,0.1,2,0.496,0.499,"$738,700",Waivers
1,Colten Brewer,30,8.1,3,0,4,4.32,4.37,0.07,-0.3,...,0,0.0,0.65,0.0,0.0,0,0.488,0.5,,Purchased
2,Jhony Brito (40-man),25,40.1,10,9,28,6.25,4.57,0.07,0.2,...,-7,-0.7,0.9,-0.1,-0.3,-3,0.434,0.496,,Free Agency
3,Gerrit Cole,32,67.2,11,11,20,2.66,4.84,0.06,0.22,...,17,1.9,,-0.1,2.5,23,0.675,0.512,"$36,000,000",Free Agency
4,Jimmy Cordero,31,21.1,20,1,9,3.8,4.65,0.06,-0.32,...,1,0.1,1.12,-0.1,0.3,3,0.506,0.501,"$720,000",Free Agency
