In [1]:
# Import Dependencies
from splinter import Browser
from bs4 import BeautifulSoup
import requests
import pandas as pd
import time
import re

In [2]:
# https://splinter.readthedocs.io/en/latest/drivers/chrome.html
!which chromedriver

/usr/local/bin/chromedriver


In [3]:
# Executable path to browser
executable_path = {'executable_path': '/usr/local/bin/chromedriver'}
browser = Browser('chrome', **executable_path, headless=False)

In [4]:
# SECTION 1 - NASA MARS NEWS

In [5]:
# URL of mars nasa page to be scraped
url = "https://mars.nasa.gov/news/"
browser.visit(url)

In [6]:
# HTML Object, Parse with Beautiful Soup
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

In [7]:
# Extracting News Title and News Paragraph
news_title = soup.find('div', class_='content_title').text
news_paragraph = soup.find('div', class_='article_teaser_body').text
print(news_title)
print(news_paragraph)

Mars Now
The NASA rover touched down eight years ago, on Aug. 5, 2012, and will soon be joined by a second rover, Perseverance.


In [8]:
# SECTION 2 - JPL Mars Space Images - Featured Image

In [9]:
# URL of mars image to be scraped
featured_image_url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
browser.visit(featured_image_url)

# Find and click the full image button
full_image = browser.find_by_id('full_image')
full_image.click()

# Find the more info button and click that
browser.is_element_present_by_text('more info', wait_time=1)
more_info = browser.find_link_by_partial_text('more info')
more_info.click()



In [10]:
# HTML Object, Parse with Beautiful Soup
html_image = browser.html
soup = BeautifulSoup(html_image, 'html.parser')

In [11]:
# find the relative image url
img_url_rel = soup.select_one('figure.lede a img').get("src")
img_url_rel
# Use the base url to create an absolute url
img_url = f'https://www.jpl.nasa.gov{img_url_rel}'
img_url

'https://www.jpl.nasa.gov/spaceimages/images/largesize/PIA16567_hires.jpg'

In [12]:
# SECTION 3 - MARS WEATHER

In [13]:
# URL of mars twitter to be scraped
mars_weather_url = 'https://twitter.com/marswxreport?lang=en'
browser.visit(mars_weather_url)
time.sleep(5)

In [14]:
# HTML Object, Parse with Beautiful Soup
html_weather = browser.html
soup = BeautifulSoup(html_weather, 'html.parser')

In [15]:
# First, find a tweet with the data-name `Mars Weather`
mars_weather = soup.find('div', attrs={"class": "tweet", "data-name": "Mars Weather"})

In [16]:
mars_weather

In [17]:
# Next, search within the tweet for the p tag or span tag containing the tweet text
# As Twitter is frequently making changes the try/except will identify the tweet
# text usings a regular expression pattern that includes the string 'sol' if there
# is no p tag with a class of 'tweet-text'
try:
    mars_weather_tweet = mars_weather.find("p", "tweet-text").get_text()
    mars_weather_tweet
except AttributeError:
    pattern = re.compile(r'sol')
    mars_weather_tweet = soup.find('span', text=pattern).text
    mars_weather_tweet
mars_weather_tweet

'InSight sol 601 (2020-08-05) low -91.6ºC (-132.9ºF) high -10.6ºC (12.9ºF)\nwinds from the W at 6.0 m/s (13.4 mph) gusting to 16.0 m/s (35.7 mph)\npressure at 7.80 hPa'

In [18]:
# SECTION 4 - MARS FACTS

In [19]:
# URL of mars facts to be turned into pandas table
facts_url = 'https://space-facts.com/mars/'
mars_facts = pd.read_html(facts_url)
mars_facts

[                      0                              1
 0  Equatorial Diameter:                       6,792 km
 1       Polar Diameter:                       6,752 km
 2                 Mass:  6.39 × 10^23 kg (0.11 Earths)
 3                Moons:            2 (Phobos & Deimos)
 4       Orbit Distance:       227,943,824 km (1.38 AU)
 5         Orbit Period:           687 days (1.9 years)
 6  Surface Temperature:                   -87 to -5 °C
 7         First Record:              2nd millennium BC
 8          Recorded By:           Egyptian astronomers,
   Mars - Earth Comparison             Mars            Earth
 0               Diameter:         6,779 km        12,742 km
 1                   Mass:  6.39 × 10^23 kg  5.97 × 10^24 kg
 2                  Moons:                2                1
 3      Distance from Sun:   227,943,824 km   149,598,262 km
 4         Length of Year:   687 Earth days      365.24 days
 5            Temperature:    -153 to 20 °C      -88 to 58°C,
           

In [20]:
# Creating a table with mars information
mars_facts_df = mars_facts[0]
mars_facts_df.rename({0: 'Description', 
                     1: 'Value'}, axis=1, inplace=True)
mars_facts_df

# Convert to html
mars_facts_df.to_html('marsdata.html')
mars_facts_df

Unnamed: 0,Description,Value
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.39 × 10^23 kg (0.11 Earths)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.38 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-87 to -5 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [21]:
# Find table and assign it to dataframe
mars_df = mars_facts[0]

# Set columns to description and value
mars_df.columns = ['Description', 'Value']

# Set index to be description
mars_df.set_index('Description', inplace=True)

# Show dataframe
mars_df

Unnamed: 0_level_0,Value
Description,Unnamed: 1_level_1
Equatorial Diameter:,"6,792 km"
Polar Diameter:,"6,752 km"
Mass:,6.39 × 10^23 kg (0.11 Earths)
Moons:,2 (Phobos & Deimos)
Orbit Distance:,"227,943,824 km (1.38 AU)"
Orbit Period:,687 days (1.9 years)
Surface Temperature:,-87 to -5 °C
First Record:,2nd millennium BC
Recorded By:,Egyptian astronomers


In [22]:
# SECTION 5 - MARS HEMISPHERES

In [23]:
def scrape_hemisphere(html_text):
    # parse html text
    hemi_soup = BeautifulSoup(html_text, "html.parser")
    # adding try/except for error handling
    try:
        title_elem = hemi_soup.find("h2", class_="title").get_text()
        sample_elem = hemi_soup.find("a", text="Sample").get("href")
    except AttributeError:
        # Image error will return None, for better front-end handling
        title_elem = None
        sample_elem = None
    hemispheres = {
        "title": title_elem,
        "img_url": sample_elem
    }
    return hemispheres

In [24]:
# A way to break up long strings
url = "https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars"
browser.visit(url)

# Click the link, find the sample anchor, return the href
hemisphere_image_urls = []

for i in range(4):
    # Find the elements on each loop to avoid a stale element exception
    browser.find_by_css("a.product-item h3")[i].click()
    hemi_data = scrape_hemisphere(browser.html)
    # Append hemisphere object to list
    hemisphere_image_urls.append(hemi_data)
    # Finally, we navigate backwards
    browser.back()

print (hemisphere_image_urls)

[{'title': 'Cerberus Hemisphere Enhanced', 'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg'}, {'title': 'Schiaparelli Hemisphere Enhanced', 'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg'}, {'title': 'Syrtis Major Hemisphere Enhanced', 'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg'}, {'title': 'Valles Marineris Hemisphere Enhanced', 'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg'}]
