## Dependencies

In [1]:
import pymongo
import requests
import pandas as pd
import time

In [2]:
# Import BeautifulSoup
from bs4 import BeautifulSoup

In [3]:
# Import Splinter and set the chromedriver path
from splinter import Browser
def init_browser():
    executable_path = {'executable_path': 'c:/bin/chromedriver'}
    return Browser('chrome', **executable_path, headless=False)

## Scrape NASA Mars News

In [4]:
# Visit the URL & scrape
def scrape_news():
    browser = init_browser()
    nasa_url = 'https://mars.nasa.gov/news/'
    browser.visit(nasa_url)
    time.sleep(2)
    soup = BeautifulSoup(browser.html, 'html.parser')
    result = soup.find_all('div', class_='list_text')[0]
    news_title = result.find('div', class_='content_title').text
    news_para = result.find('div', class_='article_teaser_body').text
    browser.quit()
    print(news_title)
    print(news_para)

In [5]:
# Validate scrape function
scrape_news()

NASA Readies Perseverance Mars Rover's Earthly Twin 
Did you know NASA's next Mars rover has a nearly identical sibling on Earth for testing? Even better, it's about to roll for the first time through a replica Martian landscape.


## Scrape JPL Mars Featured Image

In [6]:
def scrape_jpl():
    browser = init_browser()
    jpl_url = 'https://www.jpl.nasa.gov/'
    jpl_img_page = 'spaceimages/?search=&category=Mars'
    browser.visit(jpl_url + jpl_img_page)
    soup = BeautifulSoup(browser.html, 'html.parser')
    img_url = soup.find('article')['style']
    img_url = img_url.replace('background-image: url(',"").replace(');',"")[2:-1]
    featured_img_url = jpl_url + img_url
    browser.quit()
    return featured_img_url

In [7]:
# Validate scrape function
scrape_jpl()

'https://www.jpl.nasa.gov/spaceimages/images/wallpaper/PIA18058-1920x1200.jpg'

## Scrape Mars Facts Table

In [8]:
facts_url = 'https://space-facts.com/mars/'
table = pd.read_html(facts_url)

In [9]:
facts_df = table[0]
facts_df

Unnamed: 0,0,1
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.39 × 10^23 kg (0.11 Earths)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.38 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-87 to -5 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [10]:
# View/Validate html format
print(facts_df.to_html(header=False, index=False))

<table border="1" class="dataframe">
  <tbody>
    <tr>
      <td>Equatorial Diameter:</td>
      <td>6,792 km</td>
    </tr>
    <tr>
      <td>Polar Diameter:</td>
      <td>6,752 km</td>
    </tr>
    <tr>
      <td>Mass:</td>
      <td>6.39 × 10^23 kg (0.11 Earths)</td>
    </tr>
    <tr>
      <td>Moons:</td>
      <td>2 (Phobos &amp; Deimos)</td>
    </tr>
    <tr>
      <td>Orbit Distance:</td>
      <td>227,943,824 km (1.38 AU)</td>
    </tr>
    <tr>
      <td>Orbit Period:</td>
      <td>687 days (1.9 years)</td>
    </tr>
    <tr>
      <td>Surface Temperature:</td>
      <td>-87 to -5 °C</td>
    </tr>
    <tr>
      <td>First Record:</td>
      <td>2nd millennium BC</td>
    </tr>
    <tr>
      <td>Recorded By:</td>
      <td>Egyptian astronomers</td>
    </tr>
  </tbody>
</table>


## Scrape USGS Astrogeology Site for Hemisphere Images

In [11]:
def scrape_usgs():
    browser = init_browser()
    usgs_url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
    browser.visit(usgs_url)
    soup = BeautifulSoup(browser.html, 'html.parser')
    items = soup.find('div', class_='result-list').find_all('div', class_='item')

    hem_data = []

    for item in items:
        hem_url = 'https://astrogeology.usgs.gov' + item.find('a')['href']
        hem_title = item.find('div',class_='description').find('a').find('h3').text
        hem_title = hem_title.replace(' Enhanced','')
        browser.visit(hem_url)
        time.sleep(1)
        hem_soup = BeautifulSoup(browser.html, 'html.parser')
        hem_info = hem_soup.find('div', class_='downloads')
        hem_dict = {
            'title':hem_title,
            'img_url': hem_info.find('li').find('a')['href']}
        hem_data.append(hem_dict)
    browser.quit()
    return hem_data

In [12]:
# Validate scrape function
scrape_usgs()

[{'title': 'Cerberus Hemisphere',
  'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg'},
 {'title': 'Schiaparelli Hemisphere',
  'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg'},
 {'title': 'Syrtis Major Hemisphere',
  'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg'},
 {'title': 'Valles Marineris Hemisphere',
  'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg'}]