# Scraping Mars Web Data

In [5]:
# Dependencies
import pandas as pd
from splinter import Browser
from bs4 import BeautifulSoup
import time

### NASA Mars News Site
* latest News Title
* paragraph text
* source: https://mars.nasa.gov/news/

In [6]:
# Set up path and url
executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)
news_url = 'https://mars.nasa.gov/news/'

In [7]:
# Visit url and retrieve first article title and description
browser.visit(news_url)
news_html = browser.html
time.sleep(1)
soup = BeautifulSoup(news_html, 'html.parser')

# Pull first news headline and description
news_title = soup.find('div', class_='bottom_gradient').find('h3').text
news_p = soup.find('div', class_='article_teaser_body').text

In [8]:
print(news_title)
print(news_p)

Hear Audio From NASA's Perseverance As It Travels Through Deep Space
The first to be rigged with microphones, the agency's latest Mars rover picked up the subtle sounds of its own inner workings during interplanetary flight.


### JPL Mars Space Images
* URL for featured Mars image (full size jpg image)
* Source: https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars

In [9]:
# Set up url
jpl_url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'

# Navigate to full website featured image and save link to image
browser.visit(jpl_url)
time.sleep(1)

# Navigate to full image
browser.links.find_by_partial_text('FULL IMAGE').click()
time.sleep(1)

# Navigate to page that has full-sized image
browser.links.find_by_partial_text('more info').click()
time.sleep(1)

# Pull html and store image url
jpl_html = browser.html
soup = BeautifulSoup(jpl_html, 'html.parser')
image_url = soup.find('img', class_='main_image')['src']
featured_image_url = 'https://www.jpl.nasa.gov' + image_url

In [10]:
print(featured_image_url)

https://www.jpl.nasa.gov/spaceimages/images/largesize/PIA14106_hires.jpg


### Mars Facts
* table containing facts about the planet
* Source: https://space-facts.com/mars/

In [11]:
# Scrape tables from webpage using Pandas
facts_url = 'https://space-facts.com/mars/'
tables = pd.read_html(facts_url)

In [12]:
tables

[                      0                              1
 0  Equatorial Diameter:                       6,792 km
 1       Polar Diameter:                       6,752 km
 2                 Mass:  6.39 × 10^23 kg (0.11 Earths)
 3                Moons:            2 (Phobos & Deimos)
 4       Orbit Distance:       227,943,824 km (1.38 AU)
 5         Orbit Period:           687 days (1.9 years)
 6  Surface Temperature:                   -87 to -5 °C
 7         First Record:              2nd millennium BC
 8          Recorded By:           Egyptian astronomers,
   Mars - Earth Comparison             Mars            Earth
 0               Diameter:         6,779 km        12,742 km
 1                   Mass:  6.39 × 10^23 kg  5.97 × 10^24 kg
 2                  Moons:                2                1
 3      Distance from Sun:   227,943,824 km   149,598,262 km
 4         Length of Year:   687 Earth days      365.24 days
 5            Temperature:     -87 to -5 °C      -88 to 58°C,
           

In [13]:
mars_facts = tables[0].rename(columns={0: '', 1: ' '}).set_index('')

In [14]:
mars_facts

In [15]:
# Use Pandas to convert to HTML table string
mars_facts_html = mars_facts.to_html(index=False)

In [16]:
mars_facts_html

'<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: right;">\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <td>6,792 km</td>\n    </tr>\n    <tr>\n      <td>6,752 km</td>\n    </tr>\n    <tr>\n      <td>6.39 × 10^23 kg (0.11 Earths)</td>\n    </tr>\n    <tr>\n      <td>2 (Phobos &amp; Deimos)</td>\n    </tr>\n    <tr>\n      <td>227,943,824 km (1.38 AU)</td>\n    </tr>\n    <tr>\n      <td>687 days (1.9 years)</td>\n    </tr>\n    <tr>\n      <td>-87 to -5 °C</td>\n    </tr>\n    <tr>\n      <td>2nd millennium BC</td>\n    </tr>\n    <tr>\n      <td>Egyptian astronomers</td>\n    </tr>\n  </tbody>\n</table>'

### Mars Hemispheres
* High resolution images of each of Mar's hemispheres
* Source: https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars

In [18]:
# Set up url
hemi_url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'

# List of hemispheres
hemispheres = ['Valles Marineris Hemisphere', 'Cerberus Hemisphere', 'Schiaparelli Hemisphere', 'Syrtis Major Hemisphere']

# Empty list to store image urls
hemisphere_image_urls = []

# Open up webpage
browser.visit(hemi_url)

# For each hemisphere, pull image url and store with associated hemisphere name
for hemi in hemispheres: 
    hemi_dict = {}
    try:
        # Click on hemisphere link
        browser.links.find_by_partial_text(hemi).click()
        time.sleep(1)
        # Click on 'Open' to get to full image stored on page
        browser.links.find_by_partial_text('Open').click()
        
        # Pull html
        hemi_html = browser.html
        soup = BeautifulSoup(hemi_html, 'html.parser')
        
        # Pull full-sized image url
        image_url = soup.find('img', class_='wide-image')['src']
        
        # Go back to main page
        browser.back()
        
        # Store image link in dictionary and append to list
        full_image_url = 'https://astrogeology.usgs.gov' + image_url
        hemi_dict['title'] = hemi
        hemi_dict['img_url'] = full_image_url
        hemisphere_image_urls.append(hemi_dict)
    except:
        print('Webpage not found')

browser.quit()

In [19]:
print(hemisphere_image_urls)

[{'title': 'Valles Marineris Hemisphere', 'img_url': 'https://astrogeology.usgs.gov/cache/images/b3c7c6c9138f57b4756be9b9c43e3a48_valles_marineris_enhanced.tif_full.jpg'}, {'title': 'Cerberus Hemisphere', 'img_url': 'https://astrogeology.usgs.gov/cache/images/f5e372a36edfa389625da6d0cc25d905_cerberus_enhanced.tif_full.jpg'}, {'title': 'Schiaparelli Hemisphere', 'img_url': 'https://astrogeology.usgs.gov/cache/images/3778f7b43bbbc89d6e3cfabb3613ba93_schiaparelli_enhanced.tif_full.jpg'}, {'title': 'Syrtis Major Hemisphere', 'img_url': 'https://astrogeology.usgs.gov/cache/images/555e6403a6ddd7ba16ddb0e471cadcf7_syrtis_major_enhanced.tif_full.jpg'}]
