# Mission to Mars
Webscraping NASA Web Sites

In [1]:
# Dependencies
# Beautiful Soup
from bs4 import BeautifulSoup as bs
# Splinter web browser opener
from splinter import Browser

# Standard dependencies
import os
import pandas as pd
import time
import requests

In [2]:
# Splinter for Windows.  Will open and run Google Chrose
executable_path = {'executable_path': 'C:\BIN\chromedriver.exe'}

#To use splinter a browser instance needs to be created
browser = Browser('chrome', **executable_path, headless=False) #headless means no GUI

# tutorial from splinter
# https://splinter.readthedocs.io/en/latest/tutorial.html
#This tutorial provides a simple example, teaching step by step how to:
#search for splinter - python acceptance testing for web applications' in google.com, and
#find if splinter official website is listed among the search results

# from splinter import Browser

# browser = Browser() # defaults to firefox
# browser.visit('http://google.com')
# browser.fill('q', 'splinter - python acceptance testing for web applications')
# browser.find_by_name('btnG').click()

# if browser.is_text_present('splinter.readthedocs.io'):
#     print "Yes, the official website was found!"
# else:
#     print "No, it wasn't found... We need to improve our SEO techniques"

# browser.quit()

# Scrape NASA News

In [3]:
#Scrape the NASA Mars News Site and collect the latest News Title and Paragraph Text. 
#Assign the text to variables that you can reference later.
url = "https://mars.nasa.gov/news/"
browser.visit(url) #uses splinter

In [4]:
#using beautiful soup to write it into html
#assign html content
html = browser.html
soup = bs(html,"html.parser")

In [5]:
news_title = soup.find("div",class_="content_title").text
news_paragraph = soup.find("div", class_="article_teaser_body").text
print(f"Title: {news_title}")
print(f"Para: {news_paragraph}")

Title: NASA Brings Mars Landing to Viewers Everywhere
Para: NASA's InSight lander is scheduled to touch down on the Red Planet at approximately noon PST  on Nov. 26, with a new suite of instruments to probe below the Martian surface.


# Scrape JPL Images 

In [6]:
#Scrape the JPL Mars Space Image Site. 
featured_image_url = "https://www.jpl.nasa.gov/spaceimages/?search=&category=featured#submit"
browser.visit(featured_image_url)

# assign html content
html = browser.html
# Create a Beautiful Soup object
JPLContent = bs(html, "html5lib")

#Scrape Path for the Feature Image. Get the partial path of the url
image_address = JPLContent.find_all('a', class_='fancybox')[0].get('data-fancybox-href').strip()

#final combined url
Final_url = featured_image_url + image_address

print(Final_url)

https://www.jpl.nasa.gov/spaceimages/?search=&category=featured#submit/spaceimages/images/mediumsize/PIA18904_ip.jpg


# Scrape Mars Weather

In [65]:
#Visit the Mars Weather twitter account and scrape the latest Mars weather tweet.
#Save the tweet text for the weather report as a variable called mars_weather.
url_weather = "https://twitter.com/marswxreport?lang=en"
browser.visit(url_weather)

html_weather = browser.html
soup = bs(html_weather, "html.parser")
mars_weather = soup.find("p", class_="TweetTextSize TweetTextSize--normal js-tweet-text tweet-text").text
print(mars_weather)

Sol 2229 (2018-11-13), high -2C/28F, low -71C/-95F, pressure at 8.62 hPa, daylight 06:22-18:39


# Mars Facts

In [66]:
#Visit the Mars Facts webpage and use Pandas to scrape the table containing facts about the planet including 
#Diameter, Mass, etc. Use Pandas to convert the data to a HTML table string.
url_facts = "https://space-facts.com/mars/"

#Read HTML tables into a list of DataFrame objects.
MarsFacts = pd.read_html(url_facts)

#pull the first entry in the list
df_mars_facts = MarsFacts[0]
df_mars_facts.columns = ["Description", "Values"]
df_mars_facts.set_index(["Description"])

Unnamed: 0_level_0,Values
Description,Unnamed: 1_level_1
Equatorial Diameter:,"6,792 km"
Polar Diameter:,"6,752 km"
Mass:,6.42 x 10^23 kg (10.7% Earth)
Moons:,2 (Phobos & Deimos)
Orbit Distance:,"227,943,824 km (1.52 AU)"
Orbit Period:,687 days (1.9 years)
Surface Temperature:,-153 to 20 °C
First Record:,2nd millennium BC
Recorded By:,Egyptian astronomers


# Mars Hemispheres

In [69]:
#Visit the USGS Astrogeology site to obtain high resolution images for each of Mar's hemispheres.
#Click each of the links to the hemispheres in order to find the image url to the full resolution image.
#Save image url string for the full resolution hemisphere image, and the Hemisphere title. 
#Use a Python dictionary to store the data using the keys img_url and title.
#Append the dictionary with the image url string and the hemisphere title to a list. 
#This list will contain one dictionary for each hemisphere.

# hemisphere_image_urls = [
#     {"title": "Valles Marineris Hemisphere", "img_url": "..."},
#     {"title": "Cerberus Hemisphere", "img_url": "..."},
#     {"title": "Schiaparelli Hemisphere", "img_url": "..."},
#     {"title": "Syrtis Major Hemisphere", "img_url": "..."},

# assigned list to store:
# create empty dict
hemisphere_image_urls = []
dict = {}

# URL of page to be scraped
# assign html content
# Create Beautiful Soup object
url_images = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
browser.visit(url_images)
html = browser.html
soup = bs(html,"html5lib")

# Scrape all the titles of each hemisphere on the page
results = soup.find_all('h3')

# Loop through each hemisphere title found on the page
for result in results:
    
    # Get title text from result
    titles = result.text
    
    # Use splinter - click on the title text
    browser.click_link_by_partial_text(titles)
    
    # pull the pages html content, place into html for soup
    html = browser.html

    # Create a Beautiful Soup object for holding all the html content
    html_soup = bs(html,"html5lib")
    
    # Grab the image link
    links = html_soup.find_all('div', class_="downloads")[0].find_all('a')[0].get("href")
    
    # Pass title to Dict
    # Pass url to Dict
    dict["title"]=titles
    dict["img_url"]=links
    
    # Append the dictionay
    hemisphere_image_urls.append(dict)
    
    # Clean Up dictionary for other entries
    dict = {}
    
    # move back in the webpage and pull info for next titles
    browser.click_link_by_partial_text('Back')

# print image information
hemisphere_image_urls

[{'title': 'Cerberus Hemisphere Enhanced',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg'},
 {'title': 'Schiaparelli Hemisphere Enhanced',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg'},
 {'title': 'Syrtis Major Hemisphere Enhanced',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg'},
 {'title': 'Valles Marineris Hemisphere Enhanced',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg'}]