## Mission to Mars

In [1]:
from splinter import Browser
from bs4 import BeautifulSoup as bs
import requests
import pandas as pd
import time

In [2]:
# Confirm chrome driver is available
# https://splinter.readthedocs.io/en/latest/drivers/chrome.html
!which chromedriver

/usr/local/bin/chromedriver


In [3]:
#Create Browser object
executable_path = {'executable_path': '/usr/local/bin/chromedriver'}
browser = Browser('chrome', **executable_path, headless=True)

### Scrape the NASA Mars news site for latest article

In [4]:
# Open the news page for NASA Mars site, use BeautifulSoup to parse the HTML

url = 'https://mars.nasa.gov/news/'
browser.visit(url)
time.sleep(3)  # Sometimes wrong html is captured, sleep seems to help

soup = bs(browser.html, 'html.parser')

In [5]:
# From inspection of the HTML using browser developer tools, the news articles are
# in a list, within the <section> tag of class type "grid_gallery". Each list item has
# a <div> tag of class type "list_text", within which the article title and description 
# can be extracted from <div> tags of class "content_title" and "article_teaser_body", 
# respectively. Since request is for the latest/first article in the list, use find
# method to get the first one.

news = soup.find("section", class_="grid_gallery").find("div", class_="list_text")
news_title = news.find('div', class_='content_title').text
news_p = news.find('div', class_='article_teaser_body').text
print (news_title, "\n", news_p)

Six Things to Know About NASA's Opportunity Rover 
 Opportunity's mission is complete. Here are highlights from its time on Mars.


### JPL Mars Featured Image Scrape

In [6]:
jpl_url = "https://www.jpl.nasa.gov"
query = "/spaceimages/?search=&category=Mars"
browser.visit(jpl_url+query)

jpl_soup = bs(browser.html, 'html.parser')


In [7]:
featured_image_url = jpl_url+jpl_soup.find('a', id='full_image')['data-fancybox-href']
featured_image_url

'https://www.jpl.nasa.gov/spaceimages/images/mediumsize/PIA19141_ip.jpg'

### Mars Weather Scrape

In [8]:
tw_url = "https://twitter.com/marswxreport?lang=en"

browser.visit(tw_url)

tw_soup = bs(browser.html, 'html.parser')

In [9]:
# Loop through the tweets to find weather report
for t in tw_soup.find_all('p', class_='tweet-text') :
    if t.text.startswith("Sol") :
        mars_weather = t.text
        break

# Remove extraneous text as end of the weather text
mars_weather.find('pic.twitter')
mars_weather = mars_weather[0:mars_weather.find('pic.twitter')]
mars_weather

'Sol 2319 (2019-02-13), high -17C/1F, low -72C/-97F, pressure at 8.12 hPa, daylight 06:46-18:52'

### Mars Facts Scrape

In [10]:
# Use pandas to extract table data from space-facts page
facts_url = 'https://space-facts.com/mars/'
tables = pd.read_html(facts_url)
tables[0].head();

In [11]:
# Format the table for bootstrap

table_soup = bs(tables[0].to_html(header=False, index=False), 'html.parser')

table_soup.table['class']='table table-sm table-bordered small'

del table_soup.table['border']

for tag in table_soup.table.tbody.find_all('tr') :
    tag.td['scope'] = 'row'
    tag.td.name = 'th'
    
facts = str(table_soup)

### Mars Hemispheres Scrape

In [12]:
hemi_url = "https://astrogeology.usgs.gov"
hemi_query = "/search/results?q=hemisphere+enhanced&k1=target&v1=Mars"
browser.visit(hemi_url+hemi_query)

hemi_soup = bs(browser.html, 'html.parser')

In [13]:
hemi_map = []
for h in hemi_soup.find_all('div', class_="description") :
    hemi_map.append({'title':h.find('a').text})
    
hemi_map

[{'title': 'Cerberus Hemisphere Enhanced'},
 {'title': 'Schiaparelli Hemisphere Enhanced'},
 {'title': 'Syrtis Major Hemisphere Enhanced'},
 {'title': 'Valles Marineris Hemisphere Enhanced'}]

In [14]:
for hmp in hemi_map :
    browser.visit(hemi_url+hemi_query)
    browser.click_link_by_partial_text(hmp['title'])
    
    link_soup = bs(browser.html, 'html.parser')
    url = link_soup.find('img', class_='wide-image')['src']
    hmp['img_url'] = hemi_url+url
    print(url)

/cache/images/cfa62af2557222a02478f1fcd781d445_cerberus_enhanced.tif_full.jpg
/cache/images/3cdd1cbf5e0813bba925c9030d13b62e_schiaparelli_enhanced.tif_full.jpg
/cache/images/ae209b4e408bb6c3e67b6af38168cf28_syrtis_major_enhanced.tif_full.jpg
/cache/images/7cf2da4bf549ed01c17f206327be4db7_valles_marineris_enhanced.tif_full.jpg


In [15]:
hemi_map

[{'title': 'Cerberus Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/cfa62af2557222a02478f1fcd781d445_cerberus_enhanced.tif_full.jpg'},
 {'title': 'Schiaparelli Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/3cdd1cbf5e0813bba925c9030d13b62e_schiaparelli_enhanced.tif_full.jpg'},
 {'title': 'Syrtis Major Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/ae209b4e408bb6c3e67b6af38168cf28_syrtis_major_enhanced.tif_full.jpg'},
 {'title': 'Valles Marineris Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/7cf2da4bf549ed01c17f206327be4db7_valles_marineris_enhanced.tif_full.jpg'}]