# Mission to Mars Web Scraping
### Joey Ashcroft 4/12/2019
- build a web application that scrapes various websites for data related to the Mission to Mars and displays the information in a single HTML page

In [1]:
#import modules
import pandas as pd
from splinter import Browser
from bs4 import BeautifulSoup
import requests
import time

In [2]:
# URL of page to be scraped
url = 'https://mars.nasa.gov/news/'

In [3]:
# Retrieve page with the requests module
response = requests.get(url)

In [4]:
# Create BeautifulSoup object; parse with 'html.parser'
soup = BeautifulSoup(response.text, 'html.parser')

## Step 1: Scraping

### Nasa Mars News

In [5]:
#Scrape the NASA Mars News Site and collect the latest News Title and Paragraph Text.
#Assign the text to variables that you can reference later.

#title of first article
title = soup.find('div', class_="content_title").a.text
#clean indents
title = title.replace('\n','')

#paragraph explanation
paragraph_text = soup.find('div', class_="image_and_description_container").find('div',class_='rollover_description_inner').text
#clean indents
paragraph_text = paragraph_text.replace('\n', '')

#output string values
print(f"news title = {title}")
print(f"news_p= {paragraph_text}")

news title = NASA Garners 7 Webby Award Nominations
news_p= Nominees include four JPL projects: the solar system and climate websites, InSight social media, and a 360-degree Earth video. Public voting closes April 18, 2019.


### JPL Mars Space Images - Featured Image

In [6]:
#JPL Mars Space Images - Featured Image

#execute chromedriver
executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)

In [7]:
browser.visit('https://www.jpl.nasa.gov/spaceimages/')
browser.click_link_by_partial_text('FULL IMAGE')
time.sleep(2)

In [8]:
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

In [9]:
featured_image_url = f"https://www.jpl.nasa.gov/{soup.find('a', class_='ready').img['src']}"
print(f"featured_image_url = '{featured_image_url}'")

featured_image_url = 'https://www.jpl.nasa.gov//spaceimages/images/mediumsize/PIA03519_ip.jpg'


### Mars Weather

In [10]:
#Visit the Mars Weather twitter account and scrape the latest Mars weather tweet from the page
#Save the tweet text for the weather report as a variable called mars_weather

#go to web page in same browser
browser.visit('https://twitter.com/marswxreport')

#get html code via beautifulsoup
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

In [11]:
#get the latest tweet text
mars_weather = soup.find('p', class_='tweet-text').text
print(f"mars_weather = {mars_weather}")

mars_weather = InSight sol 138 (2019-04-17) low -97.7ºC (-143.9ºF) high -17.3ºC (0.9ºF)
winds from the W at 4.3 m/s (9.5 mph) gusting to 12.6 m/s (28.1 mph)
pressure at 7.30 hPapic.twitter.com/ofMyPZM2vS


### Mars Facts

In [12]:
#visit the Mars Facts webpage here and use Pandas to scrape the table containing facts about the planet
tables = pd.read_html('https://space-facts.com/mars/')

In [13]:
#save as dataframe and create columns
df = tables[0]
df.columns = ['Mars Fact','Value']
df

Unnamed: 0,Mars Fact,Value
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.42 x 10^23 kg (10.7% Earth)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.52 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-153 to 20 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [14]:
#convert to html string
html_table = df.to_html()
#clean up table by stripping unwanted new lines
html_table = html_table.replace('\n', '')
html_table

'<table border="1" class="dataframe">  <thead>    <tr style="text-align: right;">      <th></th>      <th>Mars Fact</th>      <th>Value</th>    </tr>  </thead>  <tbody>    <tr>      <th>0</th>      <td>Equatorial Diameter:</td>      <td>6,792 km</td>    </tr>    <tr>      <th>1</th>      <td>Polar Diameter:</td>      <td>6,752 km</td>    </tr>    <tr>      <th>2</th>      <td>Mass:</td>      <td>6.42 x 10^23 kg (10.7% Earth)</td>    </tr>    <tr>      <th>3</th>      <td>Moons:</td>      <td>2 (Phobos &amp; Deimos)</td>    </tr>    <tr>      <th>4</th>      <td>Orbit Distance:</td>      <td>227,943,824 km (1.52 AU)</td>    </tr>    <tr>      <th>5</th>      <td>Orbit Period:</td>      <td>687 days (1.9 years)</td>    </tr>    <tr>      <th>6</th>      <td>Surface Temperature:</td>      <td>-153 to 20 °C</td>    </tr>    <tr>      <th>7</th>      <td>First Record:</td>      <td>2nd millennium BC</td>    </tr>    <tr>      <th>8</th>      <td>Recorded By:</td>      <td>Egyptian astronome

### Mars Hemispheres

In [15]:
#go to web page in same browser
browser.visit('https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars')

#get html code via beautifulsoup
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

In [16]:
#get names of all the links
#retrieve the parent divs for all links
links = soup.find_all('div', class_='item')

hemisphere_image_urls = []

#for loop to click on each planet link on home page and grab image url
#must have browser open to astrogeology science center page
for link in links:
    #get names of links (which are our titles)
    title = link.find('div', class_='description').a.text
    
    #click on link for each planet
    browser.click_link_by_partial_text(f"{title}")
    #get html code via beautifulsoup
    html = browser.html
    soup = BeautifulSoup(html, 'html.parser')
    #save image url (jpg)
    image_url = soup.find('div', class_='downloads').li.a['href']
    
    #back to home page
    browser.back()
    
    #save values to dictionary
    post = {'title': title, 'image_url': image_url}
    #append to list
    hemisphere_image_urls.append(post)
    
browser.quit()

In [17]:
#output list
hemisphere_image_urls

[{'title': 'Cerberus Hemisphere Enhanced',
  'image_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg'},
 {'title': 'Schiaparelli Hemisphere Enhanced',
  'image_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg'},
 {'title': 'Syrtis Major Hemisphere Enhanced',
  'image_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg'},
 {'title': 'Valles Marineris Hemisphere Enhanced',
  'image_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg'}]