In [None]:
# Import modules
import splinter
import selenium
import pandas as pd
from splinter import Browser
from bs4 import BeautifulSoup
from webdriver_manager.chrome import ChromeDriverManager

In [None]:
# Set up splinter
executable_path = {'executable_path': ChromeDriverManager().install()}
browser = Browser('chrome', **executable_path, headless=False)

NASA Mars News

Scrape the Mars News Site and collect the latest News Title and Paragraph Text. Assign the text to variables that you can reference later.

In [None]:
# Scrape MARS web
def mars_news(browser):

    # Initialize base url
    base_url = 'https://redplanetscience.com/'

    # Visit browser
    browser.visit(base_url)

    # Allow delay for loading page
    browser.is_element_present_by_css('div.list_text', wait_time=1)

    # Generate soup object and then quit browser
    html = browser.html
    soup = soup(html, 'html.parser')
    browser.quit()

    # Add try/except for error handling
    try:
        slide_elem = soup.select_one('div.list_text')

        # Save news title
        news_title = slide_elem.find("div", class_="content_title").get_text()
        
        # Save news paragraph
        news_p = slide_elem.find("div", class_="article_teaser_body").get_text()

    except AttributeError:
        print("Uh oh! Something went wrong")

    return news_title, news_p

In [None]:
# Execute Mars scraper and print results
news_title, news_p = mars_news(browser)
print(news_title)
print(news_p)

JPL Mars Space Images - Featured Image

Visit the url for the Featured Space Image site here.

Use splinter to navigate the site and find the image url for the current Featured Mars Image and assign the url string to a variable called featured_image_url.

Make sure to find the image url to the full size .jpg image.

Make sure to save a complete url string for this image.

In [None]:
# Scrape JPL web
def jpl(browser):

    # Initialize base url
    base_url = "https://www.jpl.nasa.gov"

    # Visit browser
    browser.visit(base_url)

    # Allow delay for loading page
    browser.is_element_present_by_css('div.list_text', wait_time=1)

    # Generate soup object and then quit browser
    html = browser.html
    soup = soup(html, 'html.parser')
    browser.quit()

    # Add try/except for error handling
    try:
        # Save image url
        style = soup.find(class_ = "main_feature").find(class_ = "carousel_items").article["style"]
        img_url = base_url + style.split("url")[1].strip(";(')")

    except AttributeError:
        print("Uh oh! Something went wrong")

    return img_url

In [None]:
# Execute JPL scraper and print results
img_url = jpl(browser)
print(img_url)

Mars Facts

Visit the Mars Facts webpage here and use Pandas to scrape the table containing facts about the planet including Diameter, Mass, etc.

Use Pandas to convert the data to a HTML table string.

In [None]:
# Scrape Mars facts
def mars_facts(browser):

    # Initialize base url
    base_url = 'https://space-facts.com/mars/'

    # Generate html table
    table = pd.read_html(base_url)[0]

    # Rename columns
    table.rename(columns={0:"metric", 1:"value"}, inplace=True)

    # Convert table to html
    table_html = table.to_html(index=False)

    # Strip unwanted newlines
    table_html = table_html.replace('\n', '')

    # Strip table tag
    table_html = table_html.replace("<table border=\"1\" class=\"dataframe\">", "").replace("</table>", "").strip()

    return table_html

In [None]:
# Execute Mars Facts scraper and print results
table_html = mars_facts(browser)
print(table_html)

Mars Hemispheres

Visit the astrogeology site here to obtain high resolution images for each of Mar's hemispheres.

You will need to click each of the links to the hemispheres in order to find the image url to the full resolution image.

Save both the image url string for the full resolution hemisphere image, and the Hemisphere title containing the hemisphere name. Use a Python dictionary to store the data using the keys img_url and title.

Append the dictionary with the image url string and the hemisphere title to a list. This list will contain one dictionary for each hemisphere.

In [None]:
# Scrape Mars hemispheres
def mars_hemi(browser):

    # Initialize base url
    base_url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'

    # Visit browser
    browser.visit(base_url)

    # Allow delay for loading page
    browser.is_element_present_by_css('div.list_text', wait_time=1)

    # Generate soup object and then quit browser
    html = browser.html
    soup = soup(html, 'html.parser')
    browser.quit()

    # Generate urls for each hemisphere
    hemi_base_url = "https://astrogeology.usgs.gov"
    links = [hemi_base_url + item.find(class_="description").a["href"] for item in soup.find_all("div", class_="item")]

    # Initialize list of image urls
    image_urls = []

    # Loop through each link
    for link in links:
        try:
            # Visit link and make soup object
            browser.visit(link)
            html = browser.html
            soup = BeautifulSoup(html, 'html.parser')
            
            # Extract title and image url
            title = soup.find("div", class_="content").find("h2", class_="title").text.replace(" Enhanced", "")
            image_url = base_url + soup.find("img", class_="wide-image")["src"]
            
            # Store in list
            image_urls.append({"title": title, "image_url": image_url})

            # Quite browser
            browser.quit()

        except AttributeError:
            print("Uh oh! Something went wrong")

    return image_urls


In [None]:
# Execute Mars hemisphere scraper and print results
image_urls = mars_hemi(browser)
print(image_urls)