In [1]:
#Step 1: Scraping Dependencies
from bs4 import BeautifulSoup
from splinter import Browser
from selenium import webdriver
from selenium.webdriver.common.by import By

#Step 2: Database Dependencies
import pymongo

#Python Dependencies
import pandas as pd
import time


# STEP 1: Web Scraping

1. Use Splinter to create a browser object connected to NASA Mars website
2. Use BeautifulSoup to parse HTML and retrieve information

In [24]:
#define path to chrome driver
executable_path = {'executable_path': '/chromedriver'}

#Set default behaviors
options = webdriver.ChromeOptions()
options.add_argument("--start-maximized")
options.add_argument("--disable-notifications")

#Create browser object
browser = Browser('chrome', **executable_path, headless=False)

### NASA Mars News

In [25]:
#Connect to NASA Mars webpage
browser.visit("https://mars.nasa.gov/news")

In [26]:
#Create a soup object 
html = browser.html
soup = BeautifulSoup(html, 'lxml')

In [28]:
# Find all headlines
articles = soup.findAll("li", class_="slide")
titles = []
teasers = []

#Loop through headlines to scrape titles and teasers
for article in articles:
    title = article.find('div', class_="content_title").text
    titles.append(title)
    teaser = article.find('div', class_="article_teaser_body").text
    teasers.append(teaser)

['My Culture, My Voice', "NASA Readies Perseverance Mars Rover's Earthly Twin ", "NASA Engineers Checking InSight's Weather Sensors", "Follow NASA's Perseverance Rover in Real Time on Its Way to Mars", 'NASA Establishes Board to Initially Review Mars Sample Return Plans', "NASA's Ingenuity Mars Helicopter Recharges Its Batteries in Flight", "Celebrate Mars Reconnaissance Orbiter's Views From Above", "NASA's MAVEN Observes Martian Night Sky Pulsing in Ultraviolet Light", "8 Martian Postcards to Celebrate Curiosity's Landing Anniversary", 'NASA, ULA Launch Mars 2020 Perseverance Rover Mission to Red Planet', "NASA's Perseverance Rover Will Carry First Spacesuit Materials to Mars", "A New Video Captures the Science of NASA's Perseverance Mars Rover", 'NASA Invites Public to Share Excitement of Mars 2020 Perseverance Rover Launch', "NASA's Mars Perseverance Rover Passes Flight Readiness Review", 'NASA to Broadcast Mars 2020 Perseverance Launch, Prelaunch Activities', "6 Things to Know Abou

### JPL Mars Space Images

In [6]:
#Connect to JPL NASA web page
browser.visit("https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars")

In [7]:
#Create a soup object 
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

In [8]:
#Use Splinter to Click on full JPG image for featured image
browser.find_by_id('full_image').first.click()

In [9]:
pic = browser.links.find_by_partial_text("more info")
pic.click()

In [10]:
#Reset soup object
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

#Scrape image url
img_url = soup.select_one("figure.lede a img").get("src")
feat_image_url = "https://www.jpl.nasa.gov" + img_url

### Mars Facts
1. Use Pandas to scrape the table containing facts about the planet including Diameter, Mass, etc.
2. Use Pandas to convert the data to a HTML table string.

In [11]:
url = 'https://space-facts.com/mars'

#Convert url tables to pandas dataframe
table = pd.read_html(url)

In [12]:
#Reset Index to Attribute
mars_facts = table[0]
mars_facts.columns = ["Attribute", "Value"]
mars_facts = mars_facts.set_index("Attribute")

In [13]:
#Convert Mars Profile table to an html table
mars_facts = mars_facts.to_html(classes="table table-striped")

### Mars Hemispheres

In [20]:
 # Visit USGS webpage for Mars hemispehere images
hemispheres_url = "https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars"
browser.visit(hemispheres_url)
time.sleep(5)

html = browser.html

# Parse HTML with Beautiful Soup
soup = BeautifulSoup(html, "html.parser")

In [21]:
# Create dictionary to store titles & links to images
hemisphere_image_urls = []

#Identifies the number of links (hemispheres)
hemisphere_links = browser.find_by_css("a.product-item h3")

for i in range(len(hemisphere_links)):
    
    #Good for when navigato
    browser.find_by_css("a.product-item h3")[i].click()
    
    #Reset soup object
    html = browser.html
    soup = BeautifulSoup(html, 'lxml')

    #Grab Hemisphere title, clean, save
    title = soup.select_one("h2.title").text
    title = title.replace("Enhanced", "")
    
    #Grab img url
    img_url = soup.select_one("div.downloads a")["href"]

    #Add hemisphere title and image url to hemisphere dictionary
    hemisphere_image_urls.append({"title": title, "img_url": img_url})
    
    #return to original landing page
    browser.back()

print(hemisphere_image_urls)

[{'title': 'Cerberus Hemisphere ', 'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg'}, {'title': 'Schiaparelli Hemisphere ', 'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg'}, {'title': 'Syrtis Major Hemisphere ', 'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg'}, {'title': 'Valles Marineris Hemisphere ', 'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg'}]


## Store all of the scraped data into a dictionary

In [31]:
mars_data = {
        "titles": titles,
        "teasers": teasers,
        "featured_image_url": feat_image_url,
        "mars_facts": mars_facts,
        "hemisphere_image_urls": hemisphere_image_urls
    }

In [32]:
mars_data

{'titles': ['My Culture, My Voice',
  "NASA Readies Perseverance Mars Rover's Earthly Twin ",
  "NASA Engineers Checking InSight's Weather Sensors",
  "Follow NASA's Perseverance Rover in Real Time on Its Way to Mars",
  'NASA Establishes Board to Initially Review Mars Sample Return Plans',
  "NASA's Ingenuity Mars Helicopter Recharges Its Batteries in Flight",
  "Celebrate Mars Reconnaissance Orbiter's Views From Above",
  "NASA's MAVEN Observes Martian Night Sky Pulsing in Ultraviolet Light",
  "8 Martian Postcards to Celebrate Curiosity's Landing Anniversary",
  'NASA, ULA Launch Mars 2020 Perseverance Rover Mission to Red Planet',
  "NASA's Perseverance Rover Will Carry First Spacesuit Materials to Mars",
  "A New Video Captures the Science of NASA's Perseverance Mars Rover",
  'NASA Invites Public to Share Excitement of Mars 2020 Perseverance Rover Launch',
  "NASA's Mars Perseverance Rover Passes Flight Readiness Review",
  'NASA to Broadcast Mars 2020 Perseverance Launch, Prelau

# Step2: Create a MongoDB database to store Mars Info

In [33]:
#Create a connection to 
conn = 'mongodb://localhost:27017'
client = pymongo.MongoClient(conn)

In [34]:
#Create a MongoDB database
db = client.mars_db

#Add collection
data_collection = db["mars"]

#Insert dictionary into mars_db
data_collection.insert_one(mars_data)

<pymongo.results.InsertOneResult at 0x7fdc3065e548>