In [1]:
# import dependencies
import pandas as pd
import numpy as np
from splinter import Browser
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from bs4 import BeautifulSoup as soup
# import below when using Chrome browser
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager

In [2]:
import time
import json
import os
# set local chromedriver
chromedrv = os.path.expanduser('C:\chromedriver.exe')

## Method 1: splinter's executable_path

In [3]:
# Set some default options for chrome browser
options = webdriver.ChromeOptions()
options.add_argument("--lang=en")
options.add_argument("--start-maximized")
options.add_argument("--disable-notifications")
# Set up Splinter (PS: executable_path has deprecated)
executable_path = {'executable_path': ChromeDriverManager().install()}
browser = Browser('chrome', **executable_path, headless=False, options=options)
# Visit a site defined in the url
def geturl(url):
    browser.visit(url)
    # Optional delay for loading the page (unit: seconds)
    browser.is_element_present_by_css('div.list_text', wait_time=1)

In [4]:
# Visit the Mars NASA news site
geturl('https://redplanetscience.com')

In [5]:
# Parse the HTML
html = browser.html
news_soup = soup(html, 'html.parser')
slide_elem = news_soup.select_one('div.list_text')
slide_elem

<div class="list_text">
<div class="list_date">November 30, 2022</div>
<div class="content_title">NASA's New Mars Rover Will Use X-Rays to Hunt Fossils</div>
<div class="article_teaser_body">PIXL, an instrument on the end of the Perseverance rover's arm, will search for chemical fingerprints left by ancient microbes.</div>
</div>

In [6]:
# Use the parent element to find the news article title
title_elem = slide_elem.find('div', class_='content_title')
print(title_elem)

<div class="content_title">NASA's New Mars Rover Will Use X-Rays to Hunt Fossils</div>


In [7]:
title = title_elem.text
title

"NASA's New Mars Rover Will Use X-Rays to Hunt Fossils"

In [8]:
# Use the parent element to find the paragraph text
news_p = slide_elem.find('div', class_='article_teaser_body').text
news_p

"PIXL, an instrument on the end of the Perseverance rover's arm, will search for chemical fingerprints left by ancient microbes."

In [9]:
browser.quit()

## Method 2: selenium's webdriver

In [10]:
# Set some default options for chrome browser
options = webdriver.ChromeOptions()
options.add_argument("--lang=en")
options.add_argument("--start-maximized")
options.add_argument("--disable-notifications")
# Set up splinter (selenium 4)
driver = webdriver.Chrome(service=Service(chromedrv), options=options)
# Visit a site defined in the url
def geturl(url):
    driver.get(url)
    # Optional delay for loading the page (unit: seconds)
    driver.implicitly_wait(1)

In [11]:
# Visit the Mars NASA news site
geturl('https://redplanetscience.com')

In [12]:
# Parse the HTML (selenium 4)
html = driver.page_source
news_soup = soup(html, 'html.parser')
slide_elem = news_soup.select_one('div.list_text')
slide_elem

<div class="list_text">
<div class="list_date">November 30, 2022</div>
<div class="content_title">Air Deliveries Bring NASA's Perseverance Mars Rover Closer to Launch</div>
<div class="article_teaser_body">A NASA Wallops Flight Facility cargo plane transported more than two tons of equipment — including the rover's sample collection tubes — to Florida for this summer's liftoff.</div>
</div>

In [13]:
# Use the parent element to find the news article title
title_elem = slide_elem.find('div', class_='content_title')
print(title_elem)

<div class="content_title">Air Deliveries Bring NASA's Perseverance Mars Rover Closer to Launch</div>


In [14]:
title = title_elem.text
title

"Air Deliveries Bring NASA's Perseverance Mars Rover Closer to Launch"

In [15]:
# Use the parent element to find the paragraph text
news_p = slide_elem.find('div', class_='article_teaser_body').text
news_p

"A NASA Wallops Flight Facility cargo plane transported more than two tons of equipment — including the rover's sample collection tubes — to Florida for this summer's liftoff."

In [16]:
driver.quit()