# Mission to Mars

## Part One: Scraping

### Featuring Beautiful Soup, Pandas, and Requests/Splinter

In [255]:
# Import dependencies
import pandas as pd
from bs4 import BeautifulSoup as bs
from splinter import Browser
import requests
import time

In [256]:
# Initialize Splinter
executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)

### Scraping NASA Mars News

Using Splinter. 

In [257]:
# Scrape the NASA Mars News Site to collect the latest News Title and Paragraph Text.
# Go forth. 
NASAurl = 'https://mars.nasa.gov/news/'
browser.visit(NASAurl)

In [258]:
# Make some Mars soup
html = browser.html
NASAsoup = bs(html, 'html.parser')

In [259]:
# Confirm success.
#print(NASAsoup.prettify())

In [260]:
# If you gaze long enough into the soup, the soup also gazes back into you. 
# Assign the results text for News Title and Paragraph to variables. 
NASAtitle1 = NASAsoup.find('div', class_ = 'content_title').text
NASAparagraph1 = NASAsoup.find('div', class_ = 'article_teaser_body').text

In [261]:
# Testing
print(NASAtitle1)
print(NASAparagraph1)
# Mars soup is delicious. 

What's Mars Solar Conjunction, and Why Does It Matter?
NASA spacecraft at Mars are going to be on their own for a few weeks when the Sun comes between Mars and Earth, interrupting communications.


### Scrape JPL Mars Space Images -- Featured Image

Use Splinter.

In [262]:
# Set the URL and visit the page. 
JPLurl = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
browser.visit(JPLurl)

In [263]:
# Navigate to the full image using a button.
browser.click_link_by_partial_text('FULL IMAGE')
time.sleep(1)

In [264]:
# And again
browser.click_link_by_partial_text('more info')
time.sleep(1)

In [265]:
# Make some Jet Propulsion soup
html = browser.html
JPLsoup = bs(html, 'html.parser')

In [266]:
# Confirm that your soup was delivered.
# JPLsoup.prettify()

In [267]:
# Get the URL
# Get the relative first
# Get the lede figure
# # Get the src (relative path)
relative_path = JPLsoup.select_one('figure.lede a').get('href')

# Add it to the end of the full path
full_path = f'https://www.jpl.nasa.gov{relative_path}'

print(full_path)

https://www.jpl.nasa.gov/spaceimages/images/largesize/PIA19821_hires.jpg


### Scrape Mars Weather Twitter account. 

Use Splinter.

In [268]:
# Set URL path and visit
MWRurl = 'https://twitter.com/marswxreport?lang=en'
browser.visit(MWRurl)

In [269]:
# Make Twitter soup
html=browser.html
MWRsoup= bs(html, 'html.parser')

In [270]:
# Get the weather info by scraping
mars_weather = MWRsoup.find('p', class_="TweetTextSize TweetTextSize--normal js-tweet-text tweet-text").text

In [271]:
# Confirm success.
mars_weather

'InSight sol 261 (2019-08-21) low -102.4ºC (-152.4ºF) high -26.6ºC (-15.8ºF)\nwinds from the SSE at 4.9 m/s (11.0 mph) gusting to 16.0 m/s (35.8 mph)\npressure at 7.70 hPapic.twitter.com/MhPPOHJg3m'

### Scrape Space Facts Mars page
Use Pandas

In [272]:
# Set path and visit
SFMurl = 'https://space-facts.com/mars/'

In [273]:
# Set tables
Marstable= pd.read_html(SFMurl)

In [274]:
print(Marstable)

[  Mars - Earth Comparison             Mars            Earth
0               Diameter:         6,779 km        12,742 km
1                   Mass:  6.39 × 10^23 kg  5.97 × 10^24 kg
2                  Moons:                2                1
3      Distance from Sun:   227,943,824 km   149,598,262 km
4         Length of Year:   687 Earth days      365.24 days
5            Temperature:    -153 to 20 °C      -88 to 58°C,                       0                              1
0  Equatorial Diameter:                       6,792 km
1       Polar Diameter:                       6,752 km
2                 Mass:  6.39 × 10^23 kg (0.11 Earths)
3                Moons:            2 (Phobos & Deimos)
4       Orbit Distance:       227,943,824 km (1.38 AU)
5         Orbit Period:           687 days (1.9 years)
6  Surface Temperature:                   -87 to -5 °C
7         First Record:              2nd millennium BC
8          Recorded By:           Egyptian astronomers]


In [275]:
# Make a DF. Need second table (index 1)
MarsDF = Marstable[1]

In [276]:
MarsDF

Unnamed: 0,0,1
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.39 × 10^23 kg (0.11 Earths)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.38 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-87 to -5 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [277]:
# Give the table descriptive column names
MarsDF.columns = ['Description', 'Value']

In [278]:
MarsDF

Unnamed: 0,Description,Value
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.39 × 10^23 kg (0.11 Earths)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.38 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-87 to -5 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [279]:
# Set the index to Description column
MarsDF.set_index('Description')

Unnamed: 0_level_0,Value
Description,Unnamed: 1_level_1
Equatorial Diameter:,"6,792 km"
Polar Diameter:,"6,752 km"
Mass:,6.39 × 10^23 kg (0.11 Earths)
Moons:,2 (Phobos & Deimos)
Orbit Distance:,"227,943,824 km (1.38 AU)"
Orbit Period:,687 days (1.9 years)
Surface Temperature:,-87 to -5 °C
First Record:,2nd millennium BC
Recorded By:,Egyptian astronomers


In [280]:
# Convert to html and export as an html file
MarsDF.to_html('MarsFacts_table.html')

### Scraper USGS Astrogeology site for images of Mars' hemispheres

In [281]:
# Set URL and visit it
USGSurl = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
browser.visit(USGSurl)

In [282]:
# Find images
html = browser.html
USGSsoup = bs(html, 'html.parser')
enhanced_images = USGSsoup.find_all('img', class_="thumb")

In [283]:
# Create an empty list for your lovely dictionary
hemisphere_dictionary = []

#Loop through all of the thumbnails
for image in range(len(enhanced_images)):
    # Use a try/else; will stop errors from ruining everything
    try:
        # Use find by css to use CSS tags to identify things. All clickable links (with image titles)
        # are on the h3 level. 
        # Get the text of the link; it's the title.
        image_title = browser.find_by_css('a.product-item h3')[image].text
        # Use CSS to identify the active links (same as above) and click on them. 
        browser.find_by_css('a.product-item h3')[image].click()
        # Pause for a breather. 
        time.sleep(3)
        # Now, click by text. Use Sample to see full image in browser. Other one will download,
        # which is not the desired effect. 
        browser.click_link_by_partial_text('Sample') 
        # Pause for another breather. 
        time.sleep(3)
    #     Start scraping. Yes, again. 
        html = browser.html
        USGSsoup = bs(html, 'html.parser')
        # Get the URL
        # Get the relative link first
        # There's only one image on the page. Select it, get the source
        relative_img = USGSsoup.select_one('img').get('src')
        # Add the relative path to the main URL to get the full URL
        full_image_path = f'https://astrogeology.usgs.gov{relative_path}'
        # Append these to your dictionary
        hemisphere_dictionary.append({'Image': image_title , 'URL': full_image_path})
        # Go back from whence you came
        browser.back()
    except:
        # Channel Arya if something bad happens.  
        print("What do we say to death? Not today.")
    
print(hemisphere_dictionary)

[{'Image': 'Cerberus Hemisphere Enhanced', 'URL': 'https://astrogeology.usgs.gov/spaceimages/images/largesize/PIA19821_hires.jpg'}, {'Image': 'Schiaparelli Hemisphere Enhanced', 'URL': 'https://astrogeology.usgs.gov/spaceimages/images/largesize/PIA19821_hires.jpg'}, {'Image': 'Syrtis Major Hemisphere Enhanced', 'URL': 'https://astrogeology.usgs.gov/spaceimages/images/largesize/PIA19821_hires.jpg'}, {'Image': 'Valles Marineris Hemisphere Enhanced', 'URL': 'https://astrogeology.usgs.gov/spaceimages/images/largesize/PIA19821_hires.jpg'}]


In [284]:
print(hemisphere_dictionary)

[{'Image': 'Cerberus Hemisphere Enhanced', 'URL': 'https://astrogeology.usgs.gov/spaceimages/images/largesize/PIA19821_hires.jpg'}, {'Image': 'Schiaparelli Hemisphere Enhanced', 'URL': 'https://astrogeology.usgs.gov/spaceimages/images/largesize/PIA19821_hires.jpg'}, {'Image': 'Syrtis Major Hemisphere Enhanced', 'URL': 'https://astrogeology.usgs.gov/spaceimages/images/largesize/PIA19821_hires.jpg'}, {'Image': 'Valles Marineris Hemisphere Enhanced', 'URL': 'https://astrogeology.usgs.gov/spaceimages/images/largesize/PIA19821_hires.jpg'}]


In [285]:
import pprint

In [286]:
pprint.pprint(hemisphere_dictionary)

[{'Image': 'Cerberus Hemisphere Enhanced',
  'URL': 'https://astrogeology.usgs.gov/spaceimages/images/largesize/PIA19821_hires.jpg'},
 {'Image': 'Schiaparelli Hemisphere Enhanced',
  'URL': 'https://astrogeology.usgs.gov/spaceimages/images/largesize/PIA19821_hires.jpg'},
 {'Image': 'Syrtis Major Hemisphere Enhanced',
  'URL': 'https://astrogeology.usgs.gov/spaceimages/images/largesize/PIA19821_hires.jpg'},
 {'Image': 'Valles Marineris Hemisphere Enhanced',
  'URL': 'https://astrogeology.usgs.gov/spaceimages/images/largesize/PIA19821_hires.jpg'}]


## Prepare for Step Two: Mongo DB and Flask application

### Need to convert notebook to Python Script and be sure that it will return a single dictionary with all results. Testing dictionary here. 

In [287]:
## Make results dictionary to test. <3  

Mars_dict = {"Headline": NASAtitle1, "Summary": NASAparagraph1, 
             "ImageURL": full_path, "Weather": mars_weather, 
             "Table": MarsDF, "Hemisphere Dictionaries": hemisphere_dictionary}



In [289]:
pprint.pprint(Mars_dict)

{'Headline': "What's Mars Solar Conjunction, and Why Does It Matter?",
 'Hemisphere Dictionaries': [{'Image': 'Cerberus Hemisphere Enhanced',
                              'URL': 'https://astrogeology.usgs.gov/spaceimages/images/largesize/PIA19821_hires.jpg'},
                             {'Image': 'Schiaparelli Hemisphere Enhanced',
                              'URL': 'https://astrogeology.usgs.gov/spaceimages/images/largesize/PIA19821_hires.jpg'},
                             {'Image': 'Syrtis Major Hemisphere Enhanced',
                              'URL': 'https://astrogeology.usgs.gov/spaceimages/images/largesize/PIA19821_hires.jpg'},
                             {'Image': 'Valles Marineris Hemisphere Enhanced',
                              'URL': 'https://astrogeology.usgs.gov/spaceimages/images/largesize/PIA19821_hires.jpg'}],
 'ImageURL': 'https://www.jpl.nasa.gov/spaceimages/images/largesize/PIA19821_hires.jpg',
 'Summary': 'NASA spacecraft at Mars are going to be on their o