In [1]:
# Import Splinter and Beautiful Soup.
from splinter import Browser
from bs4 import BeautifulSoup as soup
from webdriver_manager.chrome import ChromeDriverManager

In [2]:
# Set up Splinter.
executable_path = {"executable_path" : ChromeDriverManager().install()}
browser = Browser("chrome", **executable_path, headless = False, incognito = True)

In [3]:
# Visit the Mars News site for Scraping.
url = "https://redplanetscience.com"
browser.visit(url)

# Optional delay for loading the page.
browser.is_element_present_by_css("div.list_text", wait_time = 1)

True

In [4]:
# Parse the HTML.
html = browser.html
news_soup = soup(html, "html.parser")

In [5]:
# Scrape the articles on the landing page.
articles = news_soup.find_all("div", class_ = "list_text")

In [6]:
# Scrape the article titles.
for article in articles:
    title = article.find("div", class_ = "content_title").text
    print(title)

Robotic Toolkit Added to NASA's Mars 2020 Rover
NASA's New Mars Rover Is Ready for Space Lasers
NASA's Treasure Map for Water Ice on Mars
NASA's Curiosity Keeps Rolling As Team Operates Rover From Home
NASA Prepares for Moon and Mars With New Addition to Its Deep Space Network
3 Things We've Learned From NASA's Mars InSight 
A Martian Roundtrip: NASA's Perseverance Rover Sample Tubes
Mars Is Getting a New Robotic Meteorologist
Meet the People Behind NASA's Perseverance Rover
10.9 Million Names Now Aboard NASA's Perseverance Mars Rover
NASA Moves Forward With Campaign to Return Mars Samples to Earth
NASA's Perseverance Rover Goes Through Trials by Fire, Ice, Light and Sound
Three New Views of Mars' Moon Phobos
6 Things to Know About NASA's Ingenuity Mars Helicopter
Hear Audio From NASA's Perseverance As It Travels Through Deep Space


In [7]:
# Scrape the article previews.
for article in articles:
    preview = article.find("div", class_ = "article_teaser_body").text
    print(preview)

The bit carousel, which lies at the heart of the rover's Sample Caching System, is now aboard NASA's newest rover. 
Perseverance is one of a few Mars spacecraft carrying laser retroreflectors. The devices could provide new science and safer Mars landings in the future.
A new study identifies frozen water just below the Martian surface, where astronauts could easily dig it up.
The team has learned to meet new challenges as they work remotely on the Mars mission.
Robotic spacecraft will be able to communicate with the dish using radio waves and lasers.
Scientists are finding new mysteries since the geophysics mission landed two years ago.
Marvels of engineering, the rover's sample tubes must be tough enough to safely bring Red Planet samples on the long journey back to Earth in immaculate condition. 
Sensors on NASA's Perseverance will help prepare for future human exploration by taking weather measurements and studying dust particles.
These are the scientists and engineers who built NAS

In [8]:
# Put the article titles and preview texts together into a list of dictionaries using list comprehension.
article_list = [{"title" : article.find("div", class_ = "content_title").text,
                 "preview" : article.find("div", class_ = "article_teaser_body").text} for article in articles]

In [9]:
# Print the results.
for article in article_list:
    print(article)

{'title': "Robotic Toolkit Added to NASA's Mars 2020 Rover", 'preview': "The bit carousel, which lies at the heart of the rover's Sample Caching System, is now aboard NASA's newest rover. "}
{'title': "NASA's New Mars Rover Is Ready for Space Lasers", 'preview': 'Perseverance is one of a few Mars spacecraft carrying laser retroreflectors. The devices could provide new science and safer Mars landings in the future.'}
{'title': "NASA's Treasure Map for Water Ice on Mars", 'preview': 'A new study identifies frozen water just below the Martian surface, where astronauts could easily dig it up.'}
{'title': "NASA's Curiosity Keeps Rolling As Team Operates Rover From Home", 'preview': 'The team has learned to meet new challenges as they work remotely on the Mars mission.'}
{'title': 'NASA Prepares for Moon and Mars With New Addition to Its Deep Space Network', 'preview': 'Robotic spacecraft will be able to communicate with the dish using radio waves and lasers.'}
{'title': "3 Things We've Lear

In [10]:
# Close the Splinter session.
browser.quit()

In [11]:
# Export the list of dictionaries into a JSON file.
import json

jsonString = json.dumps(article_list)
jsonFile = open("article_list.json", "w")
jsonFile.write(jsonString)
jsonFile.close()

In [12]:
# Create an instance of MongoClient, using the port number 27017.
from pymongo import MongoClient

mongo = MongoClient(port = 27017)

In [13]:
# Set up a database named "mars_news."
db = mongo["mars_news"]

# Set up a collection named "article_list."
collect = db["article_list"]

# Insert the list of dictionaries.
collect.insert_many(article_list)

<pymongo.results.InsertManyResult at 0x7f9d61305eb0>

In [14]:
# Verify existence of the database.
print(mongo.list_database_names())

['admin', 'config', 'local', 'mars_news']


In [15]:
# Verify existence of the collection.
db = mongo["mars_news"]

print(db.list_collection_names())

['article_list']


In [16]:
# Verify that all documents are accounted for.
collect = db["article_list"]

for result in collect.find():
    print(result)

{'_id': ObjectId('637d2c861270565e61b2d2c0'), 'title': "Robotic Toolkit Added to NASA's Mars 2020 Rover", 'preview': "The bit carousel, which lies at the heart of the rover's Sample Caching System, is now aboard NASA's newest rover. "}
{'_id': ObjectId('637d2c861270565e61b2d2c1'), 'title': "NASA's New Mars Rover Is Ready for Space Lasers", 'preview': 'Perseverance is one of a few Mars spacecraft carrying laser retroreflectors. The devices could provide new science and safer Mars landings in the future.'}
{'_id': ObjectId('637d2c861270565e61b2d2c2'), 'title': "NASA's Treasure Map for Water Ice on Mars", 'preview': 'A new study identifies frozen water just below the Martian surface, where astronauts could easily dig it up.'}
{'_id': ObjectId('637d2c861270565e61b2d2c3'), 'title': "NASA's Curiosity Keeps Rolling As Team Operates Rover From Home", 'preview': 'The team has learned to meet new challenges as they work remotely on the Mars mission.'}
{'_id': ObjectId('637d2c861270565e61b2d2c4')

In [17]:
# Delete the collection.
db.drop_collection("article_list")
db.list_collection_names()

[]

In [18]:
# Delete the database.
mongo.drop_database(db)
mongo.list_database_names()

['admin', 'config', 'local']