In [1]:
# Dependencies
from bs4 import BeautifulSoup
import pandas as pd
import pymongo
from splinter import Browser
from selenium import webdriver
import time
import os
import requests
import lxml

In [2]:
# Create function to initialize browser
def init_browser():
    executable_path = {'executable_path': 'chromedriver.exe'}
    return Browser("chrome", **executable_path, headless=True)

In [3]:
# Create dictionary for containing Mission to Mars info
mars_info = {}

In [4]:
# Creates function to scrape Mars news
def scrape_mars_news():
    browser = init_browser()
    news_url = 'https://mars.nasa.gov/news/'
    browser.visit(news_url)
    
    html = browser.html
    soup = BeautifulSoup(html, 'html.parser')
    
    news_header = soup.find('li', class_='slide')
    mars_info["news_title"] = news_header.find('h3').text
    mars_info["news_p"] = news_header.find('div', class_='article_teaser_body').text.strip()
    
    

In [5]:
# Calls scraper to extract and print latest Mars news item
scrape_mars_news()
mars_info

{'news_title': 'Mars 2020 Perseverance Rover to Capture Sounds From the Red Planet',
 'news_p': 'Audio gathered by the mission may not sound quite the same on Mars as it would to our ears on Earth. A new interactive online experience lets you sample the difference.'}

In [6]:
# Creates function to pull featured Mars image from Nasa
def scrape_mars_image():
#   Pulls url from the first image included
    browser = init_browser()
    news_url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
    browser.visit(news_url)

    html = browser.html
    soup = BeautifulSoup(html, 'html.parser')

    image_header = soup.find('div', class_='SearchResultCard').a
    image_url_partial = image_header['href']
    image_url_full = 'https://www.jpl.nasa.gov' + image_url_partial
    
#   Pulls full url for the jpg of the image
    browser = init_browser()
    browser.visit(image_url_full)

    html = browser.html
    soup = BeautifulSoup(html, 'html.parser')

    featured_image_holder = soup.find('div', class_='BaseImagePlaceholder')

    mars_info["featured_image_url"] = featured_image_holder.find('img')['src']


In [7]:
scrape_mars_image()
mars_info

{'news_title': 'Mars 2020 Perseverance Rover to Capture Sounds From the Red Planet',
 'news_p': 'Audio gathered by the mission may not sound quite the same on Mars as it would to our ears on Earth. A new interactive online experience lets you sample the difference.',
 'featured_image_url': 'https://d2pn8kiwq2w21t.cloudfront.net/images/jpegPIA24308.width-1024.jpg'}

In [8]:
# Creates function to pull Mars Facts
def scrape_mars_facts():
    facts_url = 'https://space-facts.com/mars/'
    tables = pd.read_html(facts_url)
    df_facts = tables[0]
    df_facts.reset_index()
    mars_info["facts_html"] = df_facts.to_html()

In [9]:
scrape_mars_facts()
mars_info

{'news_title': 'Mars 2020 Perseverance Rover to Capture Sounds From the Red Planet',
 'news_p': 'Audio gathered by the mission may not sound quite the same on Mars as it would to our ears on Earth. A new interactive online experience lets you sample the difference.',
 'featured_image_url': 'https://d2pn8kiwq2w21t.cloudfront.net/images/jpegPIA24308.width-1024.jpg',
 'facts_html': '<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: right;">\n      <th></th>\n      <th>0</th>\n      <th>1</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>Equatorial Diameter:</td>\n      <td>6,792 km</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>Polar Diameter:</td>\n      <td>6,752 km</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>Mass:</td>\n      <td>6.39 × 10^23 kg (0.11 Earths)</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>Moons:</td>\n      <td>2 (Phobos &amp; Deimos)</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>

In [10]:
# Creates function to pull Mars hemispheres data
hemisphere_image_urls = []

def scrape_mars_hemispheres():
    browser = init_browser()
    hemis_url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
    browser.visit(hemis_url)

    html = browser.html
    soup = BeautifulSoup(html, 'html.parser')


    results = soup.find_all('div', class_='item')
    
    for result in results:
        header = result.find('a')['href']
        hemi_link = 'https://astrogeology.usgs.gov/' + header

        browser.visit(hemi_link)
        html = browser.html
        soup = BeautifulSoup(html, 'html.parser')

        full_image = soup.find('img', class_='wide-image')['src']
        img_url = 'https://astrogeology.usgs.gov/' + full_image

        title_raw = soup.find('h2', class_='title').text.strip()
        title = title_raw.replace(" Enhanced","")

        temp_dict = {"title": title, "img_url": img_url}

        hemisphere_image_urls.append(temp_dict)

In [11]:
scrape_mars_hemispheres()
mars_info["hemisphere_image_urls"] = hemisphere_image_urls

In [12]:
mars_info

{'news_title': 'Mars 2020 Perseverance Rover to Capture Sounds From the Red Planet',
 'news_p': 'Audio gathered by the mission may not sound quite the same on Mars as it would to our ears on Earth. A new interactive online experience lets you sample the difference.',
 'featured_image_url': 'https://d2pn8kiwq2w21t.cloudfront.net/images/jpegPIA24308.width-1024.jpg',
 'facts_html': '<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: right;">\n      <th></th>\n      <th>0</th>\n      <th>1</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>Equatorial Diameter:</td>\n      <td>6,792 km</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>Polar Diameter:</td>\n      <td>6,752 km</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>Mass:</td>\n      <td>6.39 × 10^23 kg (0.11 Earths)</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>Moons:</td>\n      <td>2 (Phobos &amp; Deimos)</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>