In [1]:
# Import Splinter and BeautifulSoup
from splinter import Browser
from bs4 import BeautifulSoup

In [2]:
# Set the executable path and initialize the chrome browser in splinter
executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path)

## Visit the NASA mars news site

In [3]:
# Visit the mars nasa news site
url = 'https://mars.nasa.gov/news/'
browser.visit(url)

In [4]:
# Convert the browser html to a soup object and then quit the browser
html = browser.html
news_soup = BeautifulSoup(html, 'html.parser')

articles = news_soup.find_all("div", class_='content_title')

# Check the length of the article titles scraped
print(len(articles))

# Print the article titles 
for a in articles:
    print(a.text.strip())


52
NASA's Mars 2020 Rover Completes Its First Drive
NASA's Treasure Map for Water Ice on Mars
Two Rovers to Roll on Mars Again: Curiosity and Mars 2020
NASA's Briefcase-Size MarCO Satellite Picks Up Honors
Global Storms on Mars Launch Dust Towers Into the Sky
NASA Updates Mars 2020 Mission Environmental Review
Two of a Space Kind: Apollo 12 and Mars 2020
Mars Scientists Investigate Ancient Life in Australia
NASA's Mars 2020 Will Hunt for Microscopic Fossils
With Mars Methane Mystery Unsolved, Curiosity Serves Scientists a New One: Oxygen
NASA's Mars 2020 Heads Into the Test Chamber
Screening Soon: 'The Pathfinders' Trains Lens on Mars
InSight's 'Mole' Team Peers into the Pit
Common Questions about InSight's 'Mole'
Mars 2020 Stands on Its Own Six Wheels
New Selfie Shows Curiosity, the Mars Chemist
Naming a NASA Mars Rover Can Change Your Life
Mars 2020 Unwrapped and Ready for More Testing
HiRISE Views NASA's InSight and Curiosity on Mars
NASA's Curiosity Rover Finds an Ancient Oasis on 

In [5]:
slide_elem = news_soup.select_one('ul.item_list li.slide')

# Optional delay for loading the page
browser.is_element_present_by_css("ul.item_list li.slide", wait_time=1)

True

In [6]:
# Use the parent element to find the first a tag and save it as `news_title`
news_title = slide_elem.find("div", class_='content_title').get_text()
news_title

"NASA's Mars 2020 Rover Completes Its First Drive"

In [7]:
# Use the parent element to find the paragraph text
news_p = slide_elem.find('div', class_="article_teaser_body").get_text()
news_p

'In a 10-plus-hour marathon, the rover steered, turned and drove in 3-foot (1-meter) increments over small ramps.'

## JPL Space Images Featured Image

In [8]:
# Visit URL
url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
browser.visit(url)

In [9]:
# Find and click the full image button
full_image_elem = browser.find_by_id('full_image')
full_image_elem.click()

In [10]:
# Find the more info button and click that
browser.is_element_present_by_text('more info', wait_time=1)
more_info_elem = browser.find_link_by_partial_text('more info')
more_info_elem.click()

In [11]:
# Parse the resulting html with soup
html = browser.html
img_soup = BeautifulSoup(html, 'html.parser')

In [12]:
# find the relative image url
img_url_rel = img_soup.select_one('figure.lede a img').get("src")
img_url_rel

'/spaceimages/images/largesize/PIA16711_hires.jpg'

In [13]:
# Use the base url to create an absolute url
img_url = f'https://www.jpl.nasa.gov{img_url_rel}'
img_url

'https://www.jpl.nasa.gov/spaceimages/images/largesize/PIA16711_hires.jpg'

## Mars Weather

In [14]:
url = 'https://twitter.com/marswxreport?lang=en'
browser.visit(url)

In [15]:
html = browser.html
weather_soup = BeautifulSoup(html, 'html.parser')

In [16]:
# First, find a tweet with the data-name `Mars Weather`
mars_weather_tweet = weather_soup.find('div', attrs={"class": "tweet", "data-name": "Mars Weather"})

In [17]:
# Next, search within the tweet for the p tag containing the tweet text
mars_weather = mars_weather_tweet.find('p', 'tweet-text').get_text()
mars_weather

AttributeError: 'NoneType' object has no attribute 'find'

## Mars Hemispheres

In [None]:
url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
browser.visit(url)

In [None]:
hemisphere_image_urls = []

# First, get a list of all of the hemispheres
links = browser.find_by_css("a.product-item h3")

# Next, loop through those links, click the link, find the sample anchor, return the href
for i in range(len(links)):
    hemisphere = {}
    
    # We have to find the elements on each loop to avoid a stale element exception
    browser.find_by_css("a.product-item h3")[i].click()
    
    # Next, we find the Sample image anchor tag and extract the href
    sample_elem = browser.find_link_by_text('Sample').first
    hemisphere['img_url'] = sample_elem['href']
    
    # Get Hemisphere title
    hemisphere['title'] = browser.find_by_css("h2.title").text
    
    # Append hemisphere object to list
    hemisphere_image_urls.append(hemisphere)
    
    # Finally, we navigate backwards
    browser.back()
    

In [None]:
hemisphere_image_urls

## Mars Facts

In [None]:
import pandas as pd
df = pd.read_html('https://space-facts.com/mars/')[0]
df.columns=['description', 'value']
df.set_index('description', inplace=True)
df

In [None]:
df.to_html()

In [None]:
browser.quit()