## Stefanie Huckleberry's Mission to Mars

In [69]:
#Dependencies

def dependencies():
    
    from splinter import Browser
    from bs4 import BeautifulSoup
    import requests
    import pymongo
    import tweepy
    import json
    import pandas as pd
    from config import consumer_key, consumer_secret, access_token, access_token_secret
    from collections import OrderedDict

In [70]:
#Function to set up the path to the chromedriver.exe for splinter to use

def init_browser():
    executable_path = {"executable_path": "/Users/stefa/chromedriver.exe"}
    return Browser("chrome", **executable_path, headless=False)


## Scrape Nasa for Mars Headlines and Text

In [75]:
#Function to scrape the Nasa website for the latest news headline

def scrape_mars_headline():
    browser = init_browser()
      
    # visit https://mars.nasa.gov/news/
    mars_news = "https://mars.nasa.gov/news/"
    browser.visit(mars_news)
    
    #store the html in a variable called html    
    html = browser.html

    # create a soup object from the html.  This will parse the html we pulled from Nasa website.
    soup = BeautifulSoup(html, "html.parser")
   

    #Get the latest article posted on the site.  The list_text class has the headline,
    # date, and a blurb about the article - "a teaser"
    
    mars_article = soup.find('div', class_='list_text')

    #Now that we have the article, we can get the headline, blurb, and date
    mars_headline = mars_article.find('div', class_='content_title').text
    mars_teaser = mars_article.find('div', class_='article_teaser_body').text
    mars_news_date = mars_article.find('div', class_='list_date').text
    
    print(f"Latest Headline: {mars_headline}")
    print(f"Teaser: {mars_teaser}")
    print(f"Date: {mars_news_date}")


    return mars_headline, mars_teaser, mars_news_date       
  

# JPL Mars Space Images - Featured Image

In [81]:
#Function to scrape the JPL website for an image of Mars

def scrape_JPL_image():
    
    browser = init_browser()
      
    # visit https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars
    mars_jpl = "https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars"
    browser.visit(mars_jpl)
    
    #store the html in a variable called html    
    html = browser.html

    # create a soup object from the html.  This will parse the html we pulled from Nasa website.
    soup = BeautifulSoup(html, "html.parser")
    
    #Get the URL for the featured image-full size
     
    full_size_url = soup.find('a', class_='button fancybox').get('data-fancybox-href')
    
    
    #this returns a link to the jpeg, but the front end https://www.jpl.nasa.gov piece is missing
    
    #append the front end of the website onto the url
    mars_image_url = "https://www.jpl.nasa.gov" + full_size_url

    print(f"Mars Featured Image URL from JPL site: {mars_image_url}")

    return mars_image_url
    


## Mars Weather Tweet

In [79]:
def mars_weather_tweet():
    
    # Setup Tweepy API Authentication
    auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
    auth.set_access_token(access_token, access_token_secret)
    api = tweepy.API(auth, parser=tweepy.parsers.JSONParser())

    # Target User
    target_user = "MarsWxReport"

    mars_weather_tweet = api.user_timeline(target_user, count=1)[0]

    mars_weather = mars_weather_tweet.get('text')
    
    print(f"Latest Weather Tweet: {mars_weather}")
    
    return mars_weather



## Mars Facts

In [82]:
#scrape the space-facts website to get basic data on the planet Mars

def mars_facts():

    #store the URL for the space facts website
    facts_url = "https://space-facts.com/mars/"

    #Use pandas to scrape the page for table data
    tables = pd.read_html(facts_url)
    tables

    facts_df = tables[0]
    facts_df.columns = ['Fact Type', 'Fact Data']
    facts_df
    
    #Convert the dataframe to an HTML table string

    facts_html_table = facts_df.to_html(header=False, index=False)


    #strip the \n characters
    facts_html_table = facts_html_table.replace('\n', '')
    
    return facts_html_table



## Mars Hemispheres

In [61]:
#scrape the entire 'collapsible results' class so that we can loop through each item

#set the browser variable
browser = init_browser()

# visit https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars
mars_usgs_url = "https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars"
browser.visit(mars_usgs_url)

#store the html in a variable called usgs_html    
usgs_html = browser.html

# create a soup object from the html.  This will parse the html we pulled from USGS website.
usgs_soup = BeautifulSoup(usgs_html, "html.parser")

#Get the URL for the featured image-full size

product_box = usgs_soup.find('div', class_='collapsible results')


In [62]:
#create a list to hold the links to each hemisphere's page

hemi_links = []

for item in product_box.find_all('div', class_='item'):
    hemi_links.append(item.find('a').get('href'))
    
hemi_links

#beginning of url to append
link_beg = "https://astrogeology.usgs.gov"

#create a new list to store the entire url string

hemi_urls = []

for link in hemi_links:
    link = link_beg + link
    hemi_urls.append(link)
    
hemi_urls

['https://astrogeology.usgs.gov/search/map/Mars/Viking/cerberus_enhanced',
 'https://astrogeology.usgs.gov/search/map/Mars/Viking/schiaparelli_enhanced',
 'https://astrogeology.usgs.gov/search/map/Mars/Viking/syrtis_major_enhanced',
 'https://astrogeology.usgs.gov/search/map/Mars/Viking/valles_marineris_enhanced']

In [60]:
#visit each hemisphere's links using Splinter and get the images


#Create an empty list to store the dictionaries for all hemispheres
hemisphere_image_urls = []

#Create an empty list to store the title and image link for each hemisphere
hemi_dict = {}


for url in hemi_urls:
    browser.visit(url)
    hemi_html = browser.html

    # create a soup object from the html. 
    hemi_soup = BeautifulSoup(hemi_html, "html.parser")
    
    #store the title
    title = hemi_soup.find('h2', class_='title').text
    title = title.replace(' Enhanced', '')
     
    #go to the downloads section to get the list of images and pick the full image
    hemi_download = hemi_soup.find('div', class_='downloads')
    hemi_list = hemi_download.find('li')
    hemi_image = hemi_list.a['href']
    
    hemi_dict['title'] = title
    
    hemi_dict['img_url'] = hemi_image
    
    hemisphere_image_urls.append(hemi_dict)
    
print(hemisphere_image_urls) 




[{'title': 'Valles Marineris Hemisphere', 'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg'}, {'title': 'Valles Marineris Hemisphere', 'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg'}, {'title': 'Valles Marineris Hemisphere', 'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg'}, {'title': 'Valles Marineris Hemisphere', 'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg'}]


# Main 

In [83]:
#this is the main procedure

def scrape():
    #Import dependencies
    dependencies()
    
    #Create the dictionary that will store all of the Mars information we scrape
    mars_data = {}
    
    #scrape Nasa data
    headline, teaser, date = scrape_mars_headline()

    mars_data["nasa_headline"] = headline
    mars_data["nasa_teaser"] = teaser
    mars_data["nasa_date"] = date
    
    #scape JPL for featured image
    
    featured_image = scrape_JPL_image()
    mars_data["featured_image"] = featured_image
    
    #scrape Twitter for the latest Mars weather tweet
    
    mars_weather = mars_weather_tweet()
    mars_data["weather"] = mars_weather

   
    #scrape Mars Facts website to store basic facts on the planet
    
    facts_html_table = mars_facts()
    mars_data["facts_table"] = facts_html_table
    
    print(mars_data) 
    
    
    
scrape()
    
    

Latest Headline: NASA to Host Briefing on November Mars InSight Landing
Teaser: A briefing on NASA's upcoming InSight Mars landing will air on Wed. Oct. 31 at 1:30 p.m. EDT (10:30 a.m. PDT) on NASA TV, the agency's website and NASA InSight Facebook Page.
Date: October 25, 2018
Mars Featured Image URL from JPL site: https://www.jpl.nasa.gov/spaceimages/images/mediumsize/PIA18182_ip.jpg
Latest Weather Tweet: Sol 2208 (2018-10-22), high -18C/0F, low -75C/-102F, pressure at 8.80 hPa, daylight 06:08-18:26
{'nasa_headline': 'NASA to Host Briefing on November Mars InSight Landing', 'nasa_teaser': "A briefing on NASA's upcoming InSight Mars landing will air on Wed. Oct. 31 at 1:30 p.m. EDT (10:30 a.m. PDT) on NASA TV, the agency's website and NASA InSight Facebook Page.", 'nasa_date': 'October 25, 2018', 'featured_image': 'https://www.jpl.nasa.gov/spaceimages/images/mediumsize/PIA18182_ip.jpg', 'weather': 'Sol 2208 (2018-10-22), high -18C/0F, low -75C/-102F, pressure at 8.80 hPa, daylight 06:0