In [1]:
#Import dependencies
import pandas as pd
from splinter import Browser
import time

## Mission to Mars

Build of a web application that scrapes various websites for data related to the Mission to Mars and displays the information in a single HTML page.

### Nasa Mars latest news scrape

>- Collect the latest news articles title and paragraph texts

In [17]:
# Setup splinter path and 'browser' variable
executable_path = {'executable_path': 'app_files/chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)
time.sleep(5)

In [3]:
# URL of page to be scraped
mars_news_url = 'https://mars.nasa.gov/news/?page=0&per_page=40&order=publish_date+desc%2Ccreated_at+desc&search=&category=19%2C165%2C184%2C204&blank_scope=Latest'

# Send Splinter to Mars news page
browser.visit(mars_news_url)
time.sleep(5)

In [4]:
# Navigate Splinter to the news articles section, find and store the first articles title
mars_latest_news_title_first_li = browser.find_by_css('body .item_list .slide').first
mars_latest_news_title = mars_latest_news_title_first_li.find_by_css('.content_title a').value
mars_latest_news_title

"NASA's MRO Completes 60,000 Trips Around Mars"

In [5]:
# Find and store the first articles text
mars_latest_news_ptext = mars_latest_news_title_first_li.find_by_css('.article_teaser_body').value
mars_latest_news_ptext

'The orbiting spacecraft is also about to set a record for data relayed from the Martian surface.'

### Current Featured Mars Image
>- Collect the current featured Mars image

In [6]:
# Setup url to grab the image from and initiate Splinter
mars_image_url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars#submit'
browser.visit(mars_image_url)

# Give page time to load
time.sleep(10)

In [7]:
#Navigate first click to get to full res image
browser.find_by_id('full_image').first.click()
time.sleep(5)

In [8]:
# Next click to get further to full res image
browser.click_link_by_text('more info     ')
time.sleep(5)

In [9]:
# Save the URL of the image
featured_image_url = browser.find_by_css('figure.lede a')['href']

In [10]:
featured_image_url

'https://www.jpl.nasa.gov/spaceimages/images/largesize/PIA09113_hires.jpg'

### Mars weather
>- Collect the latest mars weather from the Mars weather twitter account

In [11]:
# Setup url to grab the weather from and redirect Splinter
mars_weather_twitter_url = 'https://twitter.com/MarsWxReport/media?lang=en'
browser.visit(mars_weather_twitter_url)

# Give page time to load
time.sleep(5)

In [12]:
# Log the latest weather tweet
mars_weather = browser.find_by_css('.TweetTextSize.TweetTextSize--normal.js-tweet-text.tweet-text').first.text
mars_weather

'InSight sol 165 (2019-05-15) low -100.3ºC (-148.6ºF) high -18.2ºC (-0.7ºF)\nwinds from the SW at 4.6 m/s (10.4 mph) gusting to 13.7 m/s (30.6 mph)\npressure at 7.50 hPa'

### Mars Facts
>- General facts about the planet including Diameter, Mass, etc. in table form

In [28]:
# Mars facts url read by pandas into a table
mars_facts_url = 'https://space-facts.com/mars/'
mars_raw_html = pd.read_html(mars_facts_url)
mars_html = mars_raw_html[0]
mars_html.rename(columns={0:"description", 1:"value"}, inplace=True)
mars_html.set_index("description", inplace=True)
mars_html_cleaned = mars_html.to_html()

### Mars Hemispheres
>- High resolution image for each of Mar's hemispheres gathered from the USGS Astrogeology site.

In [14]:
# Url for hemisphere images, visit main page for hemishperes
hemispheres_url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
browser.visit(hemispheres_url)

# Give page time to load
time.sleep(5)

In [15]:
# List to hold dictionaries of each images title and full size pic URL
hemisphere_image_urls = []

# Four hemispheres so we can hard code our range
# Iterate through the four pages and collect title and URL, sleeps are to allow browser time parse page
for link in range(4):
    hemis_links = browser.find_by_css('.results .item')
    hemis_links[link].find_by_css('a img').click()
    time.sleep(3)
    hemi_title = browser.find_by_css('section.block.metadata h2').value
    hemisphere_image_urls.append({'title':hemi_title.strip('Enhanced'), 'img_url': browser.find_by_css('.downloads ul li a')['href']})
    browser.back()
    time.sleep(3)

In [18]:
browser.quit()