In [1]:
# Import dependencies
from splinter import Browser
from bs4 import BeautifulSoup
from webdriver_manager.chrome import ChromeDriverManager
import json

In [2]:
# Setup splinter
executable_path = {'executable_path': ChromeDriverManager().install()}
browser = Browser('chrome', **executable_path, headless=False)

In [3]:
# set the target url
url = 'https://redplanetscience.com/'
browser.visit(url)

In [4]:
# Read HTML from browser
html = browser.html

# Create a Beautiful Soup object
soup = BeautifulSoup(html, 'html.parser')

In [5]:
# Scrape all div's with the list_text class.
# This will give us a list of all articles text
articles = soup.find_all("div", class_="list_text")

In [6]:
# Create empty dictionary and list
data = {}
mars_news = []

# Iterate through each article
for row in articles:
    # Use Beautiful Soup's find() method to navigate and retrieve attributes
    title = row.find('div', class_='content_title').text
    preview = row.find('div', class_='article_teaser_body').text.rstrip()
    data["title"] = title
    data["preview"] = preview
    
    #print(preview)
    # Add dictionary to a list
    mars_news.append(data)
    
    # reset dictionary
    data = {}

# Print output using json.dumps for clearer viewing
print(json.dumps(mars_news,sort_keys=False, indent=4))

[
    {
        "title": "Mars Scientists Investigate Ancient Life in Australia",
        "preview": "Teams with NASA's Mars 2020 and ESA's ExoMars practiced hunting for fossilized microbial life in the Australian Outback in preparation for their Red Planet missions."
    },
    {
        "title": "Follow NASA's Perseverance Rover in Real Time on Its Way to Mars",
        "preview": "A crisply rendered web application can show you where the agency's Mars 2020 mission is right now as it makes its way to the Red Planet for a Feb. 18, 2021, landing."
    },
    {
        "title": "NASA's Curiosity Mars Rover Takes a New Selfie Before Record Climb",
        "preview": "Along with capturing an image before its steepest ascent ever, the robotic explorer filmed its \"selfie stick,\" or robotic arm, in action."
    },
    {
        "title": "Mars Helicopter Attached to NASA's Perseverance Rover",
        "preview": "The team also fueled the rover's sky crane to get ready for this summer's hist

In [7]:
# Close the browser
browser.quit()

In [8]:
# Save results as a json file
with open('results/mars_news.json', 'w') as fout:
    json.dump(mars_news , fout)