In [None]:
# ****************************************************************************
# Author: Aline Jaimes
# Date: 10162019
# Title: Web Scraping Homework - Mission to Mars
# Goal: Build a web application that scrapes various websites for data related 
# to the Mission to Mars and displays the information in a single HTML page. 
# ****************************************************************************
# **********************    INSTRUCTIONS    **********************************
# ## Step 1 - Scraping
# ## a. Scrape the [NASA Mars News Site](https://mars.nasa.gov/news/)
# ##    and collect the latest News Title and Paragraph Text. 
# ##    Assign the text to variables that you can reference later.
###     JPL Mars Space Images - Featured Image
###```python
#### Example:
###          news_title = "NASA's Next Mars Mission to Investigate Interior of Red Planet"
###          news_p = "Preparation of NASA's next spacecraft to Mars, InSight, has ramped up this summer, on course for launch next May from Vandenberg Air Force Base in central California -- the first interplanetary launch in history from America's West Coast."
###   ```

# ## b. Visit the url for JPL Featured Space Image 
# ##    (https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars).
# ##    Use splinter to navigate the site and find the image url
# ##    for the current Featured Mars Image and assign the url string
# ##    to a variable called `featured_image_url`.
# ##    Make sure to find the image url to the full size `.jpg` image.
# ##    Make sure to save a complete url string for this image.
###```python
#### Example:
####         featured_image_url = 'https://www.jpl.nasa.gov/spaceimages/images/largesize/PIA16225_hires.jpg'
###```

# ## c. Mars Facts. Visit the Mars Facts webpage 
# ##    (https://space-facts.com/mars/) 
# ##    and use Pandas to scrape the table containing facts about 
# ##    the planet including Diameter, Mass, etc.
# ##    Use Pandas to convert the data to a HTML table string.

# ## d. Mars Hemispheres. Visit the USGS Astrogeology site 
# ##    (https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars) 
# ##    to obtain high resolution images for each of Mar's hemispheres.
# ##    You will need to click each of the links to the hemispheres in order to find the image url to the full resolution image.
# ##    Save both the image url string for the full resolution hemisphere image, 
# ##    and the Hemisphere title containing the hemisphere name. 
# ##    Use a Python dictionary to store the data using the keys `img_url` and `title`.
###```python
#### Example:
#### hemisphere_image_urls = [
####     {"title": "Valles Marineris Hemisphere", "img_url": "..."},
####     {"title": "Cerberus Hemisphere", "img_url": "..."},
####     {"title": "Schiaparelli Hemisphere", "img_url": "..."},
####     {"title": "Syrtis Major Hemisphere", "img_url": "..."},
#### ]
#### ```


In [16]:
#Imports & Dependencies
from splinter import Browser
from bs4 import BeautifulSoup
import tweepy
import pandas as pd
import time
import re
from selenium import webdriver

In [17]:
#Site Navigation
executable_path = {"executable_path": "C:/Program Files (x86)/chromedriver"}
browser = Browser("chrome", **executable_path, headless=False)
url = 'https://mars.nasa.gov/news/'
browser.visit(url)
html = browser.html
soup = bs4(html, 'html.parser')

NameError: name 'bs' is not defined

In [None]:
# Defining scrape & dictionary
def scrape():
    final_data = {}
    output = marsNews()
    final_data["mars_news"] = output[0]
    final_data["mars_paragraph"] = output[1]
    final_data["mars_image"] = marsImage()
    final_data["mars_weather"] = marsWeather()
    final_data["mars_facts"] = marsFacts()
    final_data["mars_hemisphere"] = marsHem()

    return final_data

In [None]:
# # NASA Mars News

def marsNews():
    news_url = "https://mars.nasa.gov/news/"
    browser.visit(news_url)
    html = browser.html
    soup = BeautifulSoup(html, "html.parser")
    article = soup.find("div", class_='list_text')
    news_title = article.find("div", class_="content_title").text
    news_p = article.find("div", class_ ="article_teaser_body").text
    output = [news_title, news_p]
    return output

In [None]:
# # JPL Mars Space Images - Featured Image
def marsImage():
    image_url = "https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars"
    browser.visit(image_url)
    html = browser.html
    soup = BeautifulSoup(html, "html.parser")
    image = soup.find("img", class_="thumb")["src"]
    featured_image_url = "https://www.jpl.nasa.gov" + image
    return featured_image_url

In [None]:
# # Mars Weather
def marsWeather():
    
    import tweepy
    # Twitter API Keys
    def get_file_contents(filename):
        try:
            with open(filename, 'r') as f:
                return f.read().strip()
        except FileNotFoundError:
            print("'%s' file not found" % filename)

    consumer_key = get_file_contents('consumer_key')
    consumer_secret = get_file_contents('consumer_secret')
    access_token = get_file_contents('access_token')
    access_token_secret = get_file_contents('access_token_secret')

In [None]:
# Setup Tweepy API Authentication
    auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
    auth.set_access_token(access_token, access_token_secret)
    api = tweepy.API(auth, parser=tweepy.parsers.JSONParser())

    target_user = "MarsWxReport"
    tweet = api.user_timeline(target_user, count =1)
    mars_weather = ((tweet)[0]['text'])
    return mars_weather


In [None]:
# # Mars Facts
def marsFacts():
    import pandas as pd
    facts_url = "https://space-facts.com/mars/"
    browser.visit(facts_url)
    mars_data = pd.read_html(facts_url)
    mars_data = pd.DataFrame(mars_data[0])
    mars_data.columns = ["Description", "Value"]
    mars_data = mars_data.set_index("Description")
    mars_facts = mars_data.to_html(index = True, header =True)
    return mars_facts


In [None]:
# # Mars Hemispheres
def marsHem():
    import time 
    hemispheres_url = "https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars"
    browser.visit(hemispheres_url)
    html = browser.html
    soup = BeautifulSoup(html, "html.parser")
    mars_hemisphere = []

    products = soup.find("div", class_ = "result-list" )
    hemispheres = products.find_all("div", class_="item")

    for hemisphere in hemispheres:
        title = hemisphere.find("h3").text
        title = title.replace("Enhanced", "")
        end_link = hemisphere.find("a")["href"]
        image_link = "https://astrogeology.usgs.gov/" + end_link    
        browser.visit(image_link)
        html = browser.html
        soup=BeautifulSoup(html, "html.parser")
        downloads = soup.find("div", class_="downloads")
        image_url = downloads.find("a")["href"]
        dictionary = {"title": title, "img_url": image_url}
        mars_hemisphere.append(dictionary)
    return mars_hemisphere

In [6]:
news_url = "https://mars.nasa.gov/news/"
browser.visit(news_url)
html = browser.html
soup = BeautifulSoup(html, "html.parser")

In [7]:
article = soup.find("div", class_='list_text')
news_title = article.find("div", class_="content_title").text
news_p = article.find("div", class_ ="article_teaser_body").text
print(news_title)
print(news_p)

AttributeError: 'NoneType' object has no attribute 'find'

In [8]:
image_url = "https://www.jpl.nasa.gov/spaceimages/images/largesize/PIA16225_hires.jpg"
browser.visit(image_url)
html = browser.html
soup = BeautifulSoup(html, "html.parser")

In [9]:
image = soup.find("img", class_="thumb")["src"]
featured_image_url = "https://www.jpl.nasa.gov" + image
print(featured_image_url)

TypeError: 'NoneType' object is not subscriptable

In [10]:
# Twitter API Keys
def get_file_contents(filename):
    try:
        with open(filename, 'r') as f:
            return f.read().strip()
    except FileNotFoundError:
        print("'%s' file not found" % filename)

consumer_key = get_file_contents('consumer_key')
consumer_secret = get_file_contents('consumer_secret')
access_token = get_file_contents('access_token')
access_token_secret = get_file_contents('access_token_secret')

# Setup Tweepy API Authentication
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth, parser=tweepy.parsers.JSONParser())

'consumer_key' file not found
'consumer_secret' file not found
'access_token' file not found
'access_token_secret' file not found


In [11]:
target_user = "MarsWxReport"
tweet = api.user_timeline(target_user, count =1)
mars_weather = ((tweet)[0]['text'])
print(mars_weather)

TweepError: Failed to send request: Only unicode objects are escapable. Got None of type <class 'NoneType'>.

In [12]:
facts_url = "https://space-facts.com/mars/"
browser.visit(facts_url)
mars_data = pd.read_html(facts_url)
mars_data = pd.DataFrame(mars_data[0])
mars_facts = mars_data.to_html(header = False, index = False)
print(mars_facts)

<table border="1" class="dataframe">
  <tbody>
    <tr>
      <td>Diameter:</td>
      <td>6,779 km</td>
      <td>12,742 km</td>
    </tr>
    <tr>
      <td>Mass:</td>
      <td>6.39 × 10^23 kg</td>
      <td>5.97 × 10^24 kg</td>
    </tr>
    <tr>
      <td>Moons:</td>
      <td>2</td>
      <td>1</td>
    </tr>
    <tr>
      <td>Distance from Sun:</td>
      <td>227,943,824 km</td>
      <td>149,598,262 km</td>
    </tr>
    <tr>
      <td>Length of Year:</td>
      <td>687 Earth days</td>
      <td>365.24 days</td>
    </tr>
    <tr>
      <td>Temperature:</td>
      <td>-153 to 20 °C</td>
      <td>-88 to 58°C</td>
    </tr>
  </tbody>
</table>


In [13]:
hemispheres_url = "https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars"
browser.visit(hemispheres_url)
html = browser.html
soup = BeautifulSoup(html, "html.parser")
mars_hemisphere = []

products = soup.find("div", class_ = "result-list" )
hemispheres = products.find_all("div", class_="item")

for hemisphere in hemispheres:
    title = hemisphere.find("h3").text
    title = title.replace("Enhanced", "")
    end_link = hemisphere.find("a")["href"]
    image_link = "https://astrogeology.usgs.gov/" + end_link    
    browser.visit(image_link)
    html = browser.html
    soup=BeautifulSoup(html, "html.parser")
    downloads = soup.find("div", class_="downloads")
    image_url = downloads.find("a")["href"]
    mars_hemisphere.append({"title": title, "img_url": image_url})

In [14]:
mars_hemisphere

[{'title': 'Cerberus Hemisphere ',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg'},
 {'title': 'Schiaparelli Hemisphere ',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg'},
 {'title': 'Syrtis Major Hemisphere ',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg'},
 {'title': 'Valles Marineris Hemisphere ',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg'}]