# Mission to Mars
---
Webscrapper for NASA's Mars Exploration Program website.
---
### Dependencies

In [1]:
from bs4 import BeautifulSoup
import requests
from splinter import Browser
from os.path import basename
import time

In [2]:
# Splinter set up
executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)

---
### Latest News
- Grab titles and teaser text of all the latest news.

In [3]:
# Open browser with splinter for scraping
url = 'https://mars.nasa.gov/news/'
browser.visit(url)
html = browser.html
mars_soup = BeautifulSoup(html, 'html.parser')


In [4]:
# Select the container for main contents > article containers
news_hits = mars_soup.find('div', class_='grid_list_page module content_page')\
    .find_all('div', class_='image_and_description_container')


In [5]:
# Create lists for findings
news_titles = []
news_teaser = []

# Go through each article and strip the title and preview summary
for news in news_hits:
    title = news.find('div', class_='content_title').get_text()
    news_titles.append(title)
    tease = news.find('div', class_='article_teaser_body').get_text()
    news_teaser.append(tease)

# Check the count of items scraped and preview the last find
print(f'{len(news_titles)} titles found.\n{len(news_teaser)} teasers found.')
print(f'Last Scraped Result: "{title}" \n    {tease}')

40 titles found.
40 teasers found.
Last Scraped Result: "Drilling Success: Curiosity is Collecting Mars Rocks" 
    Engineers will now test delivering samples to instruments inside NASA's Curiosity Mars rover.


---
### JPL Mars Space Images
- Grab the full size jpeg of the featured image
- Retain a copy of the URL to the featured image

In [6]:
# Redirect browser to the JPL website and prepare for scraping
base_url = 'https://www.jpl.nasa.gov' 
url = base_url + '/spaceimages/?search=&category=Mars'
browser.visit(url)

In [7]:
# Have splinter focus on the full size of the featured image
browser.click_link_by_partial_text('FULL IMAGE')

# Pause to give browser time to load before moving on
time.sleep(5)

In [8]:
# Scrape the URL of the image
html = browser.html
jpl_soup = BeautifulSoup(html, 'html.parser')
feature_img_tag = jpl_soup.find('img', class_='fancybox-image')
feature_img_url = feature_img_tag.get('src')
print(base_url + feature_img_url)

# Grab the immage itself
save_dir = basename(feature_img_url)
with open(save_dir, 'wb') as dl:
    dl.write(requests.get(base_url+feature_img_url).content)

https://www.jpl.nasa.gov/spaceimages/images/mediumsize/PIA20057_ip.jpg


---
### Mars Weather
- Grab the latest Mars weather status tweet

---
### Mars Facts
- Scrape the Mars facts into a dataframe

---
### Mars Hemispheres
- Get hi-res images of each of Mar's hemispheres
- Grab the name and save the URL
- Store data as dict