In [None]:
#**************************************************************#
# This mission to mars Jupyter Notebook developed as part of   #
# the web scraping challenge for Data Analytics Bootcamp       #
# Created By: Paul Hardy                                       #
# Created On: 09-13-2020                                       #
#**************************************************************#

In [37]:
from bs4 import BeautifulSoup
import requests

In [47]:
# ******** NASA Mars News scrape *************
#*********************************************
# URL of page to be scraped
url = 'https://mars.nasa.gov/news/'
# Retrieve page with the requests module
response = requests.get(url)
#response
# Create BeautifulSoup object; parse with 'html.parser'
soup = BeautifulSoup(response.text, 'html.parser')
# Examine the results, then determine element that contains sought info
#print(soup.prettify())
news_title = soup.find('div', class_="content_title").text.strip()
print(news_title)
news_teaser_title = soup.find('div', class_="rollover_description_inner").text.strip()
print(news_teaser_title)

NASA Readies Perseverance Mars Rover's Earthly Twin
Did you know NASA's next Mars rover has a nearly identical sibling on Earth for testing? Even better, it's about to roll for the first time through a replica Martian landscape.


In [85]:
# ******** JPL Mars Space Images - Featured Image *************
#**************************************************************
# Splinter is used here to navigate the website, by finding 
# and clicking on buttons.
from splinter import Browser


In [86]:
executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)
# Visit the JPL web site.
url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
browser.visit(url)

In [87]:
# Instantiate the HTML object of the initial page
html = browser.html
# Parse HTML with Beautiful Soup
soup = BeautifulSoup(html, 'html.parser')
#print(soup.prettify())

In [88]:
# After identifying the "full image" button on the page - click it to navigate to the next page.
browser.links.find_by_partial_text('FULL IMAGE').click()

In [89]:
# Instantiate the HTML object of the "full image" page
html = browser.html
# Parse HTML with Beautiful Soup
soup = BeautifulSoup(html, 'html.parser')
#print(soup.prettify())

In [90]:
# After identifying the "more info" button on the page - click it to navigate to the next page which contains the full jpg image.
browser.links.find_by_partial_text('more info').click()

In [91]:
# Instantiate the HTML object of the "more info" page which contains a link to the full jpg image
html = browser.html
# Parse HTML with Beautiful Soup
soup = BeautifulSoup(html, 'html.parser')
# The section of the HTML that contains the href to the full jpg image
results = soup.find('figure', class_="lede")
#results
# Grab href element of full jpeg image
link = results.a['href']
# link

In [92]:
# Build the full URL by concatenating "link" and the initial string of the JPL url
featured_image_url = "https://www.jpl.nasa.gov" + link
featured_image_url

'https://www.jpl.nasa.gov/spaceimages/images/largesize/PIA00069_hires.jpg'

In [120]:
# ******** Mars Facts - Table scrape of Mars Facts table using Pandas *************
#**********************************************************************************
import pandas as pd
url = 'https://space-facts.com/mars/'

In [121]:
tables_list = pd.read_html(url)
tables_list

[                      0                              1
 0  Equatorial Diameter:                       6,792 km
 1       Polar Diameter:                       6,752 km
 2                 Mass:  6.39 × 10^23 kg (0.11 Earths)
 3                Moons:            2 (Phobos & Deimos)
 4       Orbit Distance:       227,943,824 km (1.38 AU)
 5         Orbit Period:           687 days (1.9 years)
 6  Surface Temperature:                   -87 to -5 °C
 7         First Record:              2nd millennium BC
 8          Recorded By:           Egyptian astronomers,
   Mars - Earth Comparison             Mars            Earth
 0               Diameter:         6,779 km        12,742 km
 1                   Mass:  6.39 × 10^23 kg  5.97 × 10^24 kg
 2                  Moons:                2                1
 3      Distance from Sun:   227,943,824 km   149,598,262 km
 4         Length of Year:   687 Earth days      365.24 days
 5            Temperature:     -87 to -5 °C      -88 to 58°C,
           

In [122]:
mars_profile_df = tables_list[0]
mars_profile_df.columns = ['Attribute', 'Value']
mars_profile_df.set_index ('Attribute', inplace = True)
mars_profile_df

Unnamed: 0_level_0,Value
Attribute,Unnamed: 1_level_1
Equatorial Diameter:,"6,792 km"
Polar Diameter:,"6,752 km"
Mass:,6.39 × 10^23 kg (0.11 Earths)
Moons:,2 (Phobos & Deimos)
Orbit Distance:,"227,943,824 km (1.38 AU)"
Orbit Period:,687 days (1.9 years)
Surface Temperature:,-87 to -5 °C
First Record:,2nd millennium BC
Recorded By:,Egyptian astronomers


In [125]:
# Convert to HTML table
html_table = mars_profile_df.to_html()
html_table = html_table.replace('\n', '')
html_table

'<table border="1" class="dataframe">  <thead>    <tr style="text-align: right;">      <th></th>      <th>Value</th>    </tr>    <tr>      <th>Attribute</th>      <th></th>    </tr>  </thead>  <tbody>    <tr>      <th>Equatorial Diameter:</th>      <td>6,792 km</td>    </tr>    <tr>      <th>Polar Diameter:</th>      <td>6,752 km</td>    </tr>    <tr>      <th>Mass:</th>      <td>6.39 × 10^23 kg (0.11 Earths)</td>    </tr>    <tr>      <th>Moons:</th>      <td>2 (Phobos &amp; Deimos)</td>    </tr>    <tr>      <th>Orbit Distance:</th>      <td>227,943,824 km (1.38 AU)</td>    </tr>    <tr>      <th>Orbit Period:</th>      <td>687 days (1.9 years)</td>    </tr>    <tr>      <th>Surface Temperature:</th>      <td>-87 to -5 °C</td>    </tr>    <tr>      <th>First Record:</th>      <td>2nd millennium BC</td>    </tr>    <tr>      <th>Recorded By:</th>      <td>Egyptian astronomers</td>    </tr>  </tbody></table>'

In [278]:
# ******** Mars Hemispheres Scrape -*******************
#******************************************************

In [279]:
executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)
# Visit the USGS web site.
url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
browser.visit(url)
time.sleep(1)

In [280]:
# Instantiate the HTML object of the initial page
html = browser.html
# Parse HTML with Beautiful Soup
soup = BeautifulSoup(html, 'html.parser')
#print(soup.prettify())

In [281]:
hemisphere_url= []
# Build list of Hemisphere names, searching for h3s
h3s = soup.find_all('h3')
hemispheres = []
# Loop over td elements
for h3 in h3s:
    hemispheres.append(h3.text)
hemispheres

['Cerberus Hemisphere Enhanced',
 'Schiaparelli Hemisphere Enhanced',
 'Syrtis Major Hemisphere Enhanced',
 'Valles Marineris Hemisphere Enhanced']

In [282]:
# Loop through each of the hemispheres and find the associated link
# on a separate page to the link to the full resolution image 
for hemi in hemispheres:
    
    # After identifying the "full image" button on the page - click it to navigate to the next page.
    #browser.links.find_by_partial_text('Cerberus').click()
    browser.links.find_by_partial_text(hemi).click()
    html = browser.html
    # Parse HTML with Beautiful Soup
    soup = BeautifulSoup(html, 'html.parser')
    # print(soup.prettify())
    # Search for ul tag and then li tags that contain the links to the full images
    results = soup.find('ul')
    full_images = results.find_all('li')
    # In the results, find full_images by searching for the <a> tag and href, then append to hemisphere_url list.
    for image_link in full_images:
        href_full = image_link.find('a')
        if "Original" in href_full: 
            image_url = image_link.find('a')['href']
            hemisphere_url.append(image_url)
    browser.back()
    time.sleep(1)

In [283]:
hemisphere_url

['https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif',
 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif',
 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif',
 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif']

In [284]:
# Now make a list of dictionaries of the two lists containing the hemisphere names
# and associated high resolution image links.
hemisphere_image_urls = [{'title': hemispheres[i], 'img_url': hemisphere_url[i]} for i in range(len(hemispheres))]
hemisphere_image_urls

[{'title': 'Cerberus Hemisphere Enhanced',
  'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif'},
 {'title': 'Schiaparelli Hemisphere Enhanced',
  'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif'},
 {'title': 'Syrtis Major Hemisphere Enhanced',
  'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif'},
 {'title': 'Valles Marineris Hemisphere Enhanced',
  'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif'}]