# Mission to Mars

## Part One: Scraping

### Featuring Beautiful Soup, Pandas, and Requests/Splinter

In [1]:
# Import dependencies
import pandas as pd
from bs4 import BeautifulSoup as bs
from splinter import Browser
import requests
import time

In [2]:
# Initialize Splinter
executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)

### Scraping NASA Mars News

Using Splinter. 

In [3]:
# Scrape the NASA Mars News Site to collect the latest News Title and Paragraph Text.
# Go forth. 
NASAurl = 'https://mars.nasa.gov/news/'
browser.visit(NASAurl)

In [4]:
# Make some Mars soup
html = browser.html
NASAsoup = bs(html, 'html.parser')

In [5]:
# Confirm success.
#print(NASAsoup.prettify())

In [6]:
# If you gaze long enough into the soup, the soup also gazes back into you. 
# Assign the results text for News Title and Paragraph to variables. 
NASAtitle = NASAsoup.find('div', class_ = 'content_title').text
NASAparagraph = NASAsoup.find('div', class_ = 'article_teaser_body').text

In [7]:
# Testing
print(NASAtitle)
print(NASAparagraph)
# Mars soup is delicious. 

NASA Invites Students to Name Mars 2020 Rover
Through Nov. 1, K-12 students in the U.S. are encouraged to enter an essay contest to name NASA's next Mars rover.


### Scrape JPL Mars Space Images -- Featured Image

Use Splinter.

In [11]:
# Set the URL and visit the page. 
JPLurl = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
browser.visit(JPLurl)

In [12]:
# Navigate to the full image using a button.
browser.click_link_by_partial_text('FULL IMAGE')
time.sleep(1)

In [13]:
# And again
browser.click_link_by_partial_text('more info')
time.sleep(1)

In [14]:
# Make some Jet Propulsion soup
html = browser.html
JPLsoup = bs(html, 'html.parser')

In [15]:
# Confirm that your soup was delivered.
# JPLsoup.prettify()

In [16]:
# Get the URL
# Get the relative first
# Get the lede figure
# # Get the src (relative path)
relative_path = JPLsoup.select_one('figure.lede a').get('href')

# Add it to the end of the full path
full_path = f'https://www.jpl.nasa.gov{relative_path}'

print(full_path)

https://www.jpl.nasa.gov/spaceimages/images/largesize/PIA20316_hires.jpg


### Scrape Mars Weather Twitter account. 

Use Splinter.

In [17]:
# Set URL path and visit
MWRurl = 'https://twitter.com/marswxreport?lang=en'
browser.visit(MWRurl)

In [18]:
# Make Twitter soup
html=browser.html
MWRsoup= bs(html, 'html.parser')

In [19]:
# Get the weather info by scraping
mars_weather = MWRsoup.find('p', class_="TweetTextSize TweetTextSize--normal js-tweet-text tweet-text").text

In [20]:
# Confirm success.
mars_weather

'We won’t be hearing from @MarsCuriosity or @NASAInSight for the next 2 weeks during Mars solar conjunction. Read more about why Mars missions go silent every 2 years: https://www.wral.com/mars-spacecraft-go-quiet-during-solar-conjunction/18595551/\xa0…pic.twitter.com/fWruE2v151'

### Scrape Space Facts Mars page
Use Pandas

In [21]:
# Set path and visit
SFMurl = 'https://space-facts.com/mars/'

In [22]:
# Set tables
Marstable= pd.read_html(SFMurl)

In [23]:
print(Marstable)

[  Mars - Earth Comparison             Mars            Earth
0               Diameter:         6,779 km        12,742 km
1                   Mass:  6.39 × 10^23 kg  5.97 × 10^24 kg
2                  Moons:                2                1
3      Distance from Sun:   227,943,824 km   149,598,262 km
4         Length of Year:   687 Earth days      365.24 days
5            Temperature:    -153 to 20 °C      -88 to 58°C,                       0                              1
0  Equatorial Diameter:                       6,792 km
1       Polar Diameter:                       6,752 km
2                 Mass:  6.39 × 10^23 kg (0.11 Earths)
3                Moons:            2 (Phobos & Deimos)
4       Orbit Distance:       227,943,824 km (1.38 AU)
5         Orbit Period:           687 days (1.9 years)
6  Surface Temperature:                   -87 to -5 °C
7         First Record:              2nd millennium BC
8          Recorded By:           Egyptian astronomers]


In [24]:
# Make a DF. Need second table (index 1)
MarsDF = Marstable[1]

In [25]:
MarsDF

Unnamed: 0,0,1
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.39 × 10^23 kg (0.11 Earths)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.38 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-87 to -5 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [26]:
# Give the table descriptive column names
MarsDF.columns = ['Description', 'Value']

In [27]:
MarsDF

Unnamed: 0,Description,Value
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.39 × 10^23 kg (0.11 Earths)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.38 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-87 to -5 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [28]:
# Set the index to Description column
MarsDF.set_index('Description')

Unnamed: 0_level_0,Value
Description,Unnamed: 1_level_1
Equatorial Diameter:,"6,792 km"
Polar Diameter:,"6,752 km"
Mass:,6.39 × 10^23 kg (0.11 Earths)
Moons:,2 (Phobos & Deimos)
Orbit Distance:,"227,943,824 km (1.38 AU)"
Orbit Period:,687 days (1.9 years)
Surface Temperature:,-87 to -5 °C
First Record:,2nd millennium BC
Recorded By:,Egyptian astronomers


In [29]:
# Convert to html and export as an html file
MarsDF.to_html('MarsFacts_table.html')

### Scraper USGS Astrogeology site for images of Mars' hemispheres

In [4]:
# Set URL and visit it
USGSurl = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
browser.visit(USGSurl)

In [5]:
### Find images
html = browser.html
USGSsoup = bs(html, 'html.parser')
enhanced_images = USGSsoup.find_all('h3')

In [6]:
print(enhanced_images)

[<h3>Cerberus Hemisphere Enhanced</h3>, <h3>Schiaparelli Hemisphere Enhanced</h3>, <h3>Syrtis Major Hemisphere Enhanced</h3>, <h3>Valles Marineris Hemisphere Enhanced</h3>]


In [8]:
hemph_img_urls = []
hemph_dictionary = {"title": [] , "img_url": []}
hemph_url = "https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars"
browser = Browser("chrome", headless = False)
browser.visit(hemph_url)
time.sleep(3)
home = browser.html
hemph_soup = bs(home, "html.parser")
results = hemph_soup.find_all("h3")
for result in results:
    title = result.text[:-9]
    print(title)
    browser.click_link_by_partial_text(title)
    time.sleep(3)
    img_url = browser.find_link_by_partial_href("download")["href"]
    print(img_url)
    hemph_dictionary = {"title": title, "img_url": img_url}
    hemph_img_urls.append(hemph_dictionary)
    time.sleep(3)
    browser.visit(hemph_url)

Cerberus Hemisphere
http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg
Schiaparelli Hemisphere
http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg
Syrtis Major Hemisphere
http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg
Valles Marineris Hemisphere
http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg


In [14]:
hemisphere_dictionary = []
hemisphere_data = {"Image": [] , "URL": []}

USGS_url = "https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars"
browser = Browser("chrome", headless = False)
browser.visit(USGS_url)
time.sleep(3)

home = browser.html
USGSsoup = bs(home, "html.parser")
headings = USGSsoup.find_all("h3")

for heading in headings:
    title = heading.text
    print(title)
    browser.click_link_by_partial_text(title)
    time.sleep(3)
    img_url = browser.find_link_by_partial_href("download")["href"]
    print(img_url)
    hemisphere_data = {"Image": title, "URL": img_url}
    hemisphere_dictionary.append(hemisphere_data)
    time.sleep(3)
    browser.visit(USGS_url)

print(hemisphere_dictionary)

Cerberus Hemisphere Enhanced
http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg
Schiaparelli Hemisphere Enhanced
http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg
Syrtis Major Hemisphere Enhanced
http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg
Valles Marineris Hemisphere Enhanced
http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg
[{'Image': 'Cerberus Hemisphere Enhanced', 'URL': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg'}, {'Image': 'Schiaparelli Hemisphere Enhanced', 'URL': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg'}, {'Image': 'Syrtis Major Hemisphere Enhanced', 'URL': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg'}, {'Image': 'Valles Marineris Hemi

In [30]:
print(hemisphere_dictionary)

[{'Image': 'Cerberus Hemisphere Enhanced', 'URL': 'https://astrogeology.usgs.gov/spaceimages/images/largesize/PIA16227_hires.jpg'}, {'Image': 'Schiaparelli Hemisphere Enhanced', 'URL': 'https://astrogeology.usgs.gov/spaceimages/images/largesize/PIA16227_hires.jpg'}, {'Image': 'Syrtis Major Hemisphere Enhanced', 'URL': 'https://astrogeology.usgs.gov/spaceimages/images/largesize/PIA16227_hires.jpg'}, {'Image': 'Valles Marineris Hemisphere Enhanced', 'URL': 'https://astrogeology.usgs.gov/spaceimages/images/largesize/PIA16227_hires.jpg'}]


In [32]:
import pprint

In [33]:
pprint.pprint(hemisphere_dictionary)

[{'Image': 'Cerberus Hemisphere Enhanced',
  'URL': 'https://astrogeology.usgs.gov/spaceimages/images/largesize/PIA14400_hires.jpg'},
 {'Image': 'Schiaparelli Hemisphere Enhanced',
  'URL': 'https://astrogeology.usgs.gov/spaceimages/images/largesize/PIA14400_hires.jpg'},
 {'Image': 'Syrtis Major Hemisphere Enhanced',
  'URL': 'https://astrogeology.usgs.gov/spaceimages/images/largesize/PIA14400_hires.jpg'},
 {'Image': 'Valles Marineris Hemisphere Enhanced',
  'URL': 'https://astrogeology.usgs.gov/spaceimages/images/largesize/PIA14400_hires.jpg'}]


## Prepare for Step Two: Mongo DB and Flask application

### Need to convert notebook to Python Script and be sure that it will return a single dictionary with all results. Testing dictionary here. 

In [35]:
## Make results dictionary to test. <3  

Mars_dict = {"Headline": NASAtitle, "Summary": NASAparagraph, 
             "ImageURL": full_path, "Weather": mars_weather, 
             "Table": MarsDF, "Hemisphere Dictionaries": hemisphere_dictionary}



In [36]:
pprint.pprint(Mars_dict)

{'Headline': "What's Mars Solar Conjunction, and Why Does It Matter?",
 'Hemisphere Dictionaries': [{'Image': 'Cerberus Hemisphere Enhanced',
                              'URL': 'https://astrogeology.usgs.gov/spaceimages/images/largesize/PIA17661_hires.jpg'},
                             {'Image': 'Schiaparelli Hemisphere Enhanced',
                              'URL': 'https://astrogeology.usgs.gov/spaceimages/images/largesize/PIA17661_hires.jpg'},
                             {'Image': 'Syrtis Major Hemisphere Enhanced',
                              'URL': 'https://astrogeology.usgs.gov/spaceimages/images/largesize/PIA17661_hires.jpg'},
                             {'Image': 'Valles Marineris Hemisphere Enhanced',
                              'URL': 'https://astrogeology.usgs.gov/spaceimages/images/largesize/PIA17661_hires.jpg'}],
 'ImageURL': 'https://www.jpl.nasa.gov/spaceimages/images/largesize/PIA17661_hires.jpg',
 'Summary': 'NASA spacecraft at Mars are going to be on their o