In [1]:
# Import dependencies
from bs4 import BeautifulSoup as bs
from splinter import Browser
import pandas as pd
import requests
import time

In [2]:
# To import splinter and set path to the chromedriver
def init_browser():
    executable_path = {"executable_path": "chromedriver"}
    return Browser("chrome", **executable_path, headless=False)
    

## NASA Mars News

In [3]:
# To scrape data for Mars News
def scrape_news():
    browser = init_browser()
    
    url = "https://mars.nasa.gov/news/"
    
    browser.visit(url)

    # To avoid lag time
    time.sleep(2)
    
    # To scrape page into soup
    html = browser.html
    soup = bs(html, "html.parser")
    
    # To get the latest news title and paragraph text
    article = soup.find('div', class_='list_text')
    news_title = article.find('a').text
    news_p = article.find('div', class_='article_teaser_body').text
    
    browser.quit()
    
    return {'title' : news_title, 'text' : news_p}

## JPL Mars Space Images 

In [5]:
# To scrape images 

def scrape_JPL_image():
    browser = init_browser()
    
    # To visit the url to grap the jpl space images
    nasa_url = "https://www.jpl.nasa.gov"
    jpl_query = "/spaceimages/?search=&category=Mars"
    browser.visit(nasa_url+jpl_query)

    time.sleep(1)
    
    # To scrape the page into the soup
    html = browser.html
    soup = bs(html, "html.parser")
    
    # To find the image url for the current featured mars image
    article = soup.find('div', class_='carousel_items').find('article')
    featured_image_url = article['style'].split("'")[1]
    
    browser.quit()

    return nasa_url+featured_image_url

## Mars Facts

In [6]:
# To get the facts about mars and scrape into a table

def scrape_mars_facts():
    browser = init_browser()
    
    # To visit the url to scrape the images
    url = "https://space-facts.com/mars/"
    browser.visit(url)

    time.sleep(1)
    
    # To read html using pandas
    html = browser.html
    tables = pd.read_html(html)
    
    # To scrape the table containing facts about the planet including Diameter, Mass, etc.
    df = pd.DataFrame(tables[0])
    table_html = df.to_html(index=False, border=1, header=False,
                            classes=["table", "table-responsive", "table-striped"], 
                            justify='center')

    browser.quit()

    return table_html

## Mars Hemispheres 

In [9]:
# To scrape data about Mars Hemispheres

def scrape_mars_hemispheres():
    browser = init_browser()
    
    # To visit the USGS site 
    url = "https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars"
    browser.visit(url)

    time.sleep(1)
    
    # To scrape page into soup
    html = browser.html
    soup = bs(html, "html.parser")
    
    # To find all items
    items = soup.find_all('div', class_='item')
    
    # To initialize a list
    hemisphere_image_urls = []
    
    # To find the titles and image urls for the Hemispheres
    usgs_url = "https://astrogeology.usgs.gov"
    for item in items:
        
        # To find title of this item
        title = item.find('h3').text
        
        # To initialize a dictionary
        hemisphere = {}
        
        # To find the url where this item is explained in detail
        item_url = item.find('a')['href']
        
        # To scrape the item's url to find image link and title
        # To scrape by clicking the link for each item 
        browser.find_by_text(title).click()
        soup2 = bs(browser.html, "html.parser")
        
        # To find the link for the full size image 
        # NOTE: the first link is to get the jpg
        # NOTE2: the second link is to get the tif(full size)
        imgs = soup2.find('div', class_="downloads").find_all('a')
        jpg_url = imgs[0]['href']
        
        # To add the img url to the dictionary
        hemisphere['title'] = title
        hemisphere['img_url'] = jpg_url
        
        # To append the dictionary to the list
        hemisphere_image_urls.append(hemisphere)
        
        # To go back to the USGS Astrogeology site
        browser.back()
        
        # To quit the browser
    browser.quit()

    return hemisphere_image_urls

In [10]:
# To scrape data for mars hemispheres
scrape_mars_hemispheres()

[{'title': 'Cerberus Hemisphere Enhanced',
  'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg'},
 {'title': 'Schiaparelli Hemisphere Enhanced',
  'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg'},
 {'title': 'Syrtis Major Hemisphere Enhanced',
  'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg'},
 {'title': 'Valles Marineris Hemisphere Enhanced',
  'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg'}]