In [1]:
# Import Splinter and BeautifulSoup
from splinter import Browser
from bs4 import BeautifulSoup as soup
from webdriver_manager.chrome import ChromeDriverManager
from pymongo import MongoClient
import pymongo

In [2]:
# to set the executable path
executable_path = {'executable_path': ChromeDriverManager().install()}
browser = Browser('chrome', **executable_path, headless=False)

In [3]:
# Visit the Mars NASA news site/instruct the browser to visit it.
url = 'https://redplanetscience.com/'
browser.visit(url)

# Optional delay for loading the page
browser.is_element_present_by_css('div.list_text', wait_time=1)

True

In [4]:
# set up the HTML parser
html = browser.html
news_soup = soup(html, 'html.parser')
slide_elem = news_soup.select_one('div.list_text')

# slide_elem: This result is the parent element of each article, which means that it holds all the other elements within it. 
# Later, reference this when want to filter our search results further.

In [5]:
# 2. scrape the title and preview text, or summary text, of each article on the landing page.
articles = news_soup.find_all('div', class_='list_text')
for article in articles:
    news_t = article.find('div', class_ = 'content_title').text
    print("Title:", news_t)
    news_p = article.find('div', class_ = 'article_teaser_body').text
    print("Preview:", news_p)
    print('\n')

# print('\n') : create the space between. If you need more space put - print('\n\n') or print('\n\n\n')

Title: With Mars Methane Mystery Unsolved, Curiosity Serves Scientists a New One: Oxygen
Preview: For the first time in the history of space exploration, scientists have measured the seasonal changes in the gases that fill the air directly above the surface of Gale Crater on Mars. 


Title: NASA's Mars 2020 Rover Closer to Getting Its Name
Preview: 155 students from across the U.S. have been chosen as semifinalists in NASA's essay contest to name the Mars 2020 rover, and see it launch from Cape Canaveral this July.


Title: Mars Helicopter Attached to NASA's Perseverance Rover
Preview: The team also fueled the rover's sky crane to get ready for this summer's history-making launch.


Title: The Extraordinary Sample-Gathering System of NASA's Perseverance Mars Rover
Preview: Two astronauts collected Moon rocks on Apollo 11. It will take three robotic systems working together to gather up the first Mars rock samples for return to Earth.


Title: NASA's Mars 2020 Rover Completes Its First 

In [6]:
# 3.Store the scraping results in Python data structures
# A. Store each title-and-preview pair in a Python dictionary
mars_news = {}
articles = news_soup.find_all('div', class_='list_text')
for article in articles:
    news_t = article.find('div', class_ = 'content_title').text
    news_p = article.find('div', class_ = 'article_teaser_body').text
    mars_news['title'] = news_t
    mars_news['preview'] = news_p
    print(mars_news)
    
# need to write the code - mars_news['title'] = news_t and mars_news['preview'] = news_p in separate line
# and put print(mars_news) in the same cell

{'title': 'With Mars Methane Mystery Unsolved, Curiosity Serves Scientists a New One: Oxygen', 'preview': 'For the first time in the history of space exploration, scientists have measured the seasonal changes in the gases that fill the air directly above the surface of Gale Crater on Mars. '}
{'title': "NASA's Mars 2020 Rover Closer to Getting Its Name", 'preview': "155 students from across the U.S. have been chosen as semifinalists in NASA's essay contest to name the Mars 2020 rover, and see it launch from Cape Canaveral this July."}
{'title': "Mars Helicopter Attached to NASA's Perseverance Rover", 'preview': "The team also fueled the rover's sky crane to get ready for this summer's history-making launch."}
{'title': "The Extraordinary Sample-Gathering System of NASA's Perseverance Mars Rover", 'preview': 'Two astronauts collected Moon rocks on Apollo 11. It will take three robotic systems working together to gather up the first Mars rock samples for return to Earth.'}
{'title': "NAS

In [7]:
# 3.B -Store all the dictionaries in a Python list.
mars_list = []
articles = news_soup.find_all('div', class_='list_text')
for article in articles:
    news_t = article.find('div', class_ = 'content_title').text
    news_p = article.find('div', class_ = 'article_teaser_body').text
    mars_news['title'] = news_t
    mars_news['preview'] = news_p
    mars_list = list(mars_news.items())
    print(mars_list)

[('title', 'With Mars Methane Mystery Unsolved, Curiosity Serves Scientists a New One: Oxygen'), ('preview', 'For the first time in the history of space exploration, scientists have measured the seasonal changes in the gases that fill the air directly above the surface of Gale Crater on Mars. ')]
[('title', "NASA's Mars 2020 Rover Closer to Getting Its Name"), ('preview', "155 students from across the U.S. have been chosen as semifinalists in NASA's essay contest to name the Mars 2020 rover, and see it launch from Cape Canaveral this July.")]
[('title', "Mars Helicopter Attached to NASA's Perseverance Rover"), ('preview', "The team also fueled the rover's sky crane to get ready for this summer's history-making launch.")]
[('title', "The Extraordinary Sample-Gathering System of NASA's Perseverance Mars Rover"), ('preview', 'Two astronauts collected Moon rocks on Apollo 11. It will take three robotic systems working together to gather up the first Mars rock samples for return to Earth.')

In [9]:
# store the scraped data in a file or database (to ease sharing the data with others). 
# To do so, export the scraped data to either a JSON file or a MongoDB database.

# create a connection to Mongo. create a MongoClient instance, which establishes a connection with Mongo through port 27017
mongo = pymongo.MongoClient("mongodb://localhost:27017/")

# Rememter: 
# Make sure to have started Mongo in the terminal running the command "mongod".
# the hierarchy in Mongo is database → collection → document

In [10]:
print(mongo.list_database_names())

# identify which databases already exist in Mongo. These are databases that come installed with Mongo.

['admin', 'config', 'local']


In [None]:
#mongo.drop_database('my_db')
#mongo.list_database_names()

In [11]:
mars_news_db = mongo["my_db"]
print(mongo.list_database_names())

# To create a new database. assign the name of the new database = my_db, variable = mars_news_db. 

['admin', 'config', 'local']


In [12]:
my_first_collection = mars_news_db['news_collection']
mars_news_db.list_collection_names()

# creates a collection called my_first_collection and assigns it to the variable news_collection.
# the collection is empty until creating the document.

[]

In [13]:
mars = {'title': "Meet the People Behind NASA's Perseverance Rover", 'preview': "These are the scientists and engineers who built NASA's next Mars rover and who will guide it to a safe landing in Jezero Crater. "}
{'title': "NASA Readies Perseverance Mars Rover's Earthly Twin ", 'preview': "Did you know NASA's next Mars rover has a nearly identical sibling on Earth for testing? Even better, it's about to roll for the first time through a replica Martian landscape."}
{'title': 'MAVEN Maps Electric Currents around Mars that are Fundamental to Atmospheric Loss', 'preview': 'Five years after NASA’s MAVEN spacecraft entered into orbit around Mars, data from the mission has led to the creation of a map of electric current systems in the Martian atmosphere.'}
{'title': "NASA's Mars Perseverance Rover Gets Its Sample Handling System", 'preview': 'The system will be collecting and storing Martian rock and soil. Its installation marks another milestone in the march toward the July launch period.'}
{'title': 'NASA Establishes Board to Initially Review Mars Sample Return Plans', 'preview': 'The board will assist with analysis of current plans and goals for one of the most difficult missions humanity has ever undertaken.'}
{'title': 'NASA to Reveal Name of Its Next Mars Rover', 'preview': "After a months-long contest among students to name NASA's newest Mars rover, the agency will reveal the winning name — and the winning student — this Thursday. "}
{'title': "NASA's Mars 2020 Rover Tests Descent-Stage Separation", 'preview': "A crane lifts the rocket-powered descent stage away from NASA's Mars 2020 rover after technicians tested the pyrotechnic charges that separate the two spacecraft."}
{'title': 'NASA Moves Forward With Campaign to Return Mars Samples to Earth', 'preview': 'During this next phase, the program will mature critical technologies and make critical design decisions as well as assess industry partnerships.'}
{'title': 'Space History Is Made in This NASA Robot Factory', 'preview': "From rockets to rovers, JPL's Spacecraft Assembly Facility has been at the center of robotic spaceflight. Here's a closer look at what makes it so special."}
{'title': 'NASA Administrator Statement on Moon to Mars Initiative, FY 2021 Budget', 'preview': "Jim Bridenstine addresses NASA's ambitious plans for the coming years, including Mars Sample Return."}
{'title': "While Stargazing on Mars, NASA's Curiosity Rover Spots Earth and Venus", 'preview': "This new portrait of the Red Planet's neighbors was taken during a time when there's more dust in the air on Mars."}
{'title': "NASA's Perseverance Rover Goes Through Trials by Fire, Ice, Light and Sound", 'preview': "The agency's new Mars rover is put through a series of tests in vacuum chambers, acoustic chambers and more to get ready for the Red Planet."}
{'title': "8 Martian Postcards to Celebrate Curiosity's Landing Anniversary", 'preview': 'The NASA rover touched down eight years ago, on Aug. 5, 2012, and will soon be joined by a second rover, Perseverance.'}
{'title': "How NASA's Mars Helicopter Will Reach the Red Planet's Surface", 'preview': 'The small craft will seek to prove that powered, controlled flight is possible on another planet. But just getting it onto the surface of Mars will take a whole lot of ingenuity.'}
{'title': "6 Things to Know About NASA's Ingenuity Mars Helicopter", 'preview': 'The first helicopter attempting to fly on another planet is a marvel of engineering. Get up to speed with these key facts about its plans.'}

my_first_collection.insert_many([mars])

# my_first_collection.insert_many(mars)- TypeError: documents must be a non-empty list so changed to "([mars])" instead of "_many" - by stackflow

<pymongo.results.InsertManyResult at 0x253d271cdc8>

In [14]:
results = my_first_collection.find()
for result in results:
    print(result)

{'_id': ObjectId('637ee37cdf728f20adc1cc20'), 'title': "Meet the People Behind NASA's Perseverance Rover", 'preview': "These are the scientists and engineers who built NASA's next Mars rover and who will guide it to a safe landing in Jezero Crater. "}


In [15]:
browser.quit()

In [None]:
## Note1: to delete the record 

# query = {'key ': 'value'}
# my_first_collection.delete_one(query)
# [r for r in my_first_collection.find()]

In [None]:
## Note2: to drop the collection 
## Note that the argument for the drop_collection method is my_collection (the name of the collection registered by Mongo) 
## and not my_first_collection, which is the variable used in Python to represent the collection.

# mars_news.drop_collection('my_collection')
# my_first_db.list_collection_names()

In [None]:
## Note3: to drop the database 
## use the name of the database registered with Mongo, my_db, and not the variable used in the Jupyter notebook, 
## my_first_db, to indicate which database to drop

# mongo.drop_database('my_db')
# mongo.list_database_names()