In [1]:
# Import dependencies
from splinter import Browser
from bs4 import BeautifulSoup

In [2]:
# Set the executable path and initialize the chrome browser in splinter
executable_path = {'executable_path': 'chromedriver'}
browser = Browser('chrome', **executable_path)

## Obtaining NASA Mars News

In [3]:
# Mars NASA news website
url = 'https://mars.nasa.gov/news/'
browser.visit(url)

# Delay for loading
browser.is_element_present_by_css("ul.item_list li.slide", wait_time=1)

True

In [4]:
# Convert the html to the soup object
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

slide = soup.select_one('ul.item_list li.slide')

In [5]:
slide.find("div", class_='content_title')

<div class="content_title"><a href="/news/8585/nasas-mars-2020-rover-closer-to-getting-its-name/" target="_self">NASA's Mars 2020 Rover Closer to Getting Its Name</a></div>

In [6]:
# Use the parent element to find the first a tag and save it as `news_title`
news_title = slide.find("div", class_='content_title').get_text()
news_title

"NASA's Mars 2020 Rover Closer to Getting Its Name"

In [7]:
# Use the parent element to find the paragraph text
news_p = slide.find('div', class_="article_teaser_body").get_text()
news_p

"155 students from across the U.S. have been chosen as semifinalists in NASA's essay contest to name the Mars 2020 rover, and see it launch from Cape Canaveral this July."

## JPL Space Images Featured Image

In [27]:
# Visit NASA JPL
url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
browser.visit(url)

In [28]:
# Find click the full image button
featured_image = browser.find_by_id('full_image')
featured_image.click()

In [29]:
# Click "more info" button
browser.is_element_present_by_text('more info', wait_time=1)
more_info = browser.find_link_by_partial_text('more info')
more_info.click()

In [30]:
# Parse the resulting html with soup as before
html = browser.html
img_soup = BeautifulSoup(html, 'html.parser')

In [31]:
# find the image url
img_url = img_soup.select_one('figure.lede a img').get("src")
img_url

'/spaceimages/images/largesize/PIA16715_hires.jpg'

In [32]:
# Use the base url to create an absolute url
featured_img_url = f'https://www.jpl.nasa.gov{img_url}'
featured_img_url

'https://www.jpl.nasa.gov/spaceimages/images/largesize/PIA16715_hires.jpg'

## Mars Weather info from Twitter

In [43]:
url = 'https://twitter.com/marswxreport?lang=en'
browser.visit(url)

In [44]:
# Browse and get the soup object
html = browser.html
weather_soup = BeautifulSoup(html, 'html.parser')

In [45]:
# First, find a tweet with the data-name `Mars Weather`
mars_weather_tweet = weather_soup.find('div', attrs={"class": "tweet", "data-name": "Mars Weather"})

In [46]:
# Next, search within the tweet for the p tag containing the tweet text
mars_weather = mars_weather_tweet.find('p', 'tweet-text').get_text()
mars_weather

'InSight sol 408 (2020-01-19) low -97.1ºC (-142.7ºF) high -16.0ºC (3.1ºF)\nwinds from the SSW at 5.2 m/s (11.6 mph) gusting to 20.7 m/s (46.3 mph)\npressure at 6.40 hPapic.twitter.com/tIqoEyK2Uk'

## Mars Facts

In [54]:
import pandas as pd

In [58]:
# Scrape Mars fact table from the url
df = pd.read_html('https://space-facts.com/mars/')[0]
df.columns = ['Description', 'Value']
df.set_index('Description', inplace = True)
df

Unnamed: 0_level_0,Value
Description,Unnamed: 1_level_1
Equatorial Diameter:,"6,792 km"
Polar Diameter:,"6,752 km"
Mass:,6.39 × 10^23 kg (0.11 Earths)
Moons:,2 (Phobos & Deimos)
Orbit Distance:,"227,943,824 km (1.38 AU)"
Orbit Period:,687 days (1.9 years)
Surface Temperature:,-87 to -5 °C
First Record:,2nd millennium BC
Recorded By:,Egyptian astronomers


In [59]:
# Use Pandas to convert the data to a HTML table string
df.to_html()

'<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: right;">\n      <th></th>\n      <th>Value</th>\n    </tr>\n    <tr>\n      <th>Description</th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>Equatorial Diameter:</th>\n      <td>6,792 km</td>\n    </tr>\n    <tr>\n      <th>Polar Diameter:</th>\n      <td>6,752 km</td>\n    </tr>\n    <tr>\n      <th>Mass:</th>\n      <td>6.39 × 10^23 kg (0.11 Earths)</td>\n    </tr>\n    <tr>\n      <th>Moons:</th>\n      <td>2 (Phobos &amp; Deimos)</td>\n    </tr>\n    <tr>\n      <th>Orbit Distance:</th>\n      <td>227,943,824 km (1.38 AU)</td>\n    </tr>\n    <tr>\n      <th>Orbit Period:</th>\n      <td>687 days (1.9 years)</td>\n    </tr>\n    <tr>\n      <th>Surface Temperature:</th>\n      <td>-87 to -5 °C</td>\n    </tr>\n    <tr>\n      <th>First Record:</th>\n      <td>2nd millennium BC</td>\n    </tr>\n    <tr>\n      <th>Recorded By:</th>\n      <td>Egyptian astronomers</td>\n    </tr>\

## Mars Hemispheres

In [51]:
url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
browser.visit(url)

In [52]:
# create an empty array for the loop needed later
hemisphere_image_urls = []

# Store a template for each hemisphere's own page (this should contain 4 elements)
links = browser.find_by_css("a.product-item h3")

# Loop through each hemisphere's links, click the link, find the sample anchor, and return the href
for i in range(len(links)):
    hemisphere = {}
    
    # Getting to each page one by one
    browser.find_by_css("a.product-item h3")[i].click()
    
    # Find the Sample image (high resolution) anchor tag and extract the href
    sample_elem = browser.find_link_by_text('Sample').first
    hemisphere['img_url'] = sample_elem['href']
    
    # Get Hemisphere title
    hemisphere['title'] = browser.find_by_css("h2.title").text
    
    # Append hemisphere object to list
    hemisphere_image_urls.append(hemisphere)
    
    # Finally, get back to the main page
    browser.back()
    

In [53]:
# Scraped full images of the hemispheres in urls
hemisphere_image_urls

[{'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg',
  'title': 'Cerberus Hemisphere Enhanced'},
 {'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg',
  'title': 'Schiaparelli Hemisphere Enhanced'},
 {'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg',
  'title': 'Syrtis Major Hemisphere Enhanced'},
 {'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg',
  'title': 'Valles Marineris Hemisphere Enhanced'}]

In [60]:
browser.quit()