# Mission to Mars - Web Scraping

In [77]:
# Dependencies
import pandas as pd
from bs4 import BeautifulSoup as bs
from splinter import Browser
#from selenium import webdriver

## NASA Mars News Site 

In [62]:
# Scrape NASA Mars News
url_nasa = "https://mars.nasa.gov/news/"

# Retrieve page with WerbDriver, not request - complex page buildup
# Using firefox instead of chrome, chrome on my PC has limitation set by IT
executable_path = {'executable_path': 'geckodriver.exe'} # chromedriver
browser = Browser('firefox', **executable_path, headless=False)
browser.visit(url_nasa)

In [57]:
# Create BeautifulSoup object; parse with 'html.parser'
#soup = bs(response.text, 'html.parser')
soup = bs(browser.html, 'html.parser')

In [58]:
# Collect the latest News Title and Paragraph Text. 
# Assign the text to variables for later reference.
news_items = soup.find_all('div', class_='image_and_description_container')
for item in news_items: 
    news = item.find('div', class_='list_text')
    if news:
        n_date = news.find('div', class_='list_date').text
        n_title = news.find('a').text
        n_teaser = news.find('div', class_='article_teaser_body').text
        print(f"{n_date}\n{n_title}\n{n_teaser}")

September 12, 2019
NASA's Mars 2020 Comes Full Circle
Aiming to pinpoint the Martian vehicle's center of gravity, engineers took NASA's 2,300-pound Mars 2020 rover for a spin in the clean room at JPL. 
August 28, 2019
NASA Invites Students to Name Mars 2020 Rover
Through Nov. 1, K-12 students in the U.S. are encouraged to enter an essay contest to name NASA's next Mars rover.
August 28, 2019
NASA's Mars Helicopter Attached to Mars 2020 Rover 
The helicopter will be first aircraft to perform flight tests on another planet.
August 23, 2019
What's Mars Solar Conjunction, and Why Does It Matter?
NASA spacecraft at Mars are going to be on their own for a few weeks when the Sun comes between Mars and Earth, interrupting communications.
August 23, 2019
Scientists Explore Outback as Testbed for Mars 
Australia provides a great place for NASA's Mars 2020 and the ESA-Roscosmos ExoMars scientists to hone techniques in preparation for searching for signs ancient life on Mars.
August 22, 2019
NASA-

## JPL Mars Space Images - Featured Image 
<a class="button fancybox" 
data-description="This artist's concept shows the NASA's WISE spacecraft, in its orbit around Earth. In September of 2013, 
engineers will attempt to bring the mission out of hibernation to hunt for more asteroids and comets in a project called NEOWISE." 
data-fancybox-group="images" 
data-fancybox-href="/spaceimages/images/mediumsize/PIA17254_ip.jpg" 
data-link="/spaceimages/details.php?id=PIA17254" 
data-title="NEOWISE: Back to Hunt More Asteroids (Artist Concept)" 
id="full_image">
FULL IMAGE
</a>

In [59]:
# Scrape site
url_jpl = "https://www.jpl.nasa.gov"
url_jpl_mars = url_jpl + "/spaceimages/?search=&category=Mars"
browser.visit(url_jpl_mars)

In [60]:
# Create BeautifulSoup object; parse with 'html.parser'
soup = bs(browser.html, 'html.parser')

In [61]:
# Get link to the large featured picture
featured_pic = soup.find('a', id='full_image')
url_featured_pic = url_jpl + featured_pic['data-fancybox-href']

# Open picture in browser
browser.visit(url_featured_pic) 

## Mars Weather

In [63]:
# Scrape site
url_twit = "https://twitter.com/marswxreport?lang=en"
browser.visit(url_twit)

In [64]:
# Create BeautifulSoup object; parse with 'html.parser'
soup = bs(browser.html, 'html.parser')

In [75]:
# Get current weather
weather_container = soup.find('div', class_='stream')
weather_current = weather_container.find('p')
mars_weather = weather_current.text.strip()
mars_weather

'Weeeeee!  https://www.jpl.nasa.gov/spaceimages/details.php?id=PIA23461\xa0…pic.twitter.com/iuEqJjsQ4G'

## Mars Facts

In [83]:
# Scrape facts using pandas
url_mars_facts = "https://space-facts.com/mars/"
mars_facts_df = pd.read_html(url_mars_facts)[1] # data in second table
mars_facts_df.columns = ['Parameter', 'Value']
mars_facts_df

Unnamed: 0,Parameter,Value
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.39 × 10^23 kg (0.11 Earths)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.38 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-87 to -5 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


## Mars Hemispheres

In [113]:
# Scrape site
url_usgs = "https://astrogeology.usgs.gov"
url_usgs_mars = url_usgs + "/search/results?q=hemisphere+enhanced&k1=target&v1=Mars"
browser.visit(url_usgs_mars)

In [114]:
# Create BeautifulSoup object; parse with 'html.parser'
soup = bs(browser.html, 'html.parser')

hemisphere_image_urls = []

# Hemispheres container
# No need to trigger click, use url to navigate to the page with image info instead
hemi_container = soup.find('div', id='product-section')
hemi_items = hemi_container.find_all('div', class_='item')
for item in hemi_items:
    title = item.find('h3').text
    link = item.find('a')['href']
    browser.visit(url_usgs + link)
    soup = bs(browser.html, 'html.parser')
    downloads = soup.find('div', class_='downloads')
    url = downloads.find('a')['href']
    hemisphere_image_urls.append({'title': title, 'img_url' : url})

hemisphere_image_urls

[{'title': 'Cerberus Hemisphere Enhanced',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg'},
 {'title': 'Schiaparelli Hemisphere Enhanced',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg'},
 {'title': 'Syrtis Major Hemisphere Enhanced',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg'},
 {'title': 'Valles Marineris Hemisphere Enhanced',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg'}]

In [None]:
# Close browser
#browser.quit()