In [1]:
# Dependencies

# Web scraping dependencies
from bs4 import BeautifulSoup

# Asynchronous scraping dependencies
# Pausing to allow for page to load
import time
from selenium import webdriver

# Asynchronous control (clicking, etc.)
from splinter import Browser
from splinter.exceptions import ElementDoesNotExist

# Database deployment dependencies
import pymongo

# Scrape Latest News

In [2]:
# Create soup object
url = 'https://mars.nasa.gov/news/?page=0&per_page=40&order=publish_date+desc%2Ccreated_at+desc&search=&category=19%2C165%2C184%2C204&blank_scope=Latest'
# Chromedriver set-up
executable_path = {'executable_path': 'chromedriver.exe'}
browser = webdriver.Chrome(**executable_path)
browser.get(url)

# 2 second time delay is so that the page can load and all information can be scraped
time.sleep(2)

# Scrape the html on the site after the timer is done
html = browser.page_source
soup = BeautifulSoup(html, 'html.parser')

# find the first div class that contains news information 
# (this will be the latest news for the website)
latest_news_class = soup.find("div", class_ = "list_text")

# close the browser
browser.close()

In [3]:
# Loop through returned results
result_list = []
for result in latest_news_class:
    # Error handling
    try:
        # Identify and return title of listing
        text_result = result.text
        
        # Print results only if title, price, and link are available
        if (text_result):
            result_list.append(text_result)
    except AttributeError as e:
        print(e)

result_list

['April 11, 2019',
 "Curiosity Tastes First Sample in 'Clay-Bearing Unit'",
 'This new region on Mars might reveal more about the role of water on Mount Sharp.']

# Scrape Image of the Day

In [18]:
url2 = "https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars"

# Create browser object from Splinter library
browser = Browser('chrome', **executable_path, headless=False)
browser.visit(url2)

# Click on the button that says FULL IMAGE to scrape the image
browser.click_link_by_partial_text('FULL IMAGE')

# Pause to let the browser load
time.sleep(4)

# Load html
html = browser.html

# Close the browser
browser.quit()

# Parse HTML with Beautiful Soup
soup = BeautifulSoup(html, 'html.parser')
link = soup.find("div", class_ = "fancybox-inner fancybox-skin fancybox-dark-skin fancybox-dark-skin-open")

In [19]:
for result in link:
    try:
        partial_url = result.get('src')
        if(partial_url):
            featured_image_url = "https://www.jpl.nasa.gov" + partial_url
    except AttributeError as e:
        print(e)

featured_image_url

'https://www.jpl.nasa.gov/spaceimages/images/mediumsize/PIA19674_ip.jpg'

# Scrape latest mars weather tweet

In [32]:
url3 = "https://twitter.com/marswxreport?lang=en"

# Create browser object from Splinter library
browser = Browser('chrome', **executable_path, headless=False)
browser.visit(url3)

# Pause to let the browser load
time.sleep(4)

# Load html
html = browser.html

# Close the browser
browser.quit()

# Parse HTML with Beautiful Soup
soup = BeautifulSoup(html, 'html.parser')
link = soup.find("div", class_ = "js-tweet-text-container")

In [36]:
for result in link:
    try:
        tweet = result.text
        if(tweet):
            mars_weather = tweet
    except AttributeError as e:
        print(e)
        
mars_weather

'NavigableString' object has no attribute 'text'
'NavigableString' object has no attribute 'text'


'InSight sol 137 (2019-04-16) low -97.0ºC (-142.7ºF) high -15.9ºC (3.4ºF)\nwinds from the SW at 4.3 m/s (9.7 mph) gusting to 12.4 m/s (27.7 mph)\npressure at 7.30 hPapic.twitter.com/jhaMb7q9gy'

# Scrape mars data

In [43]:
import pandas as pd
url = "https://space-facts.com/mars/"
tables = pd.read_html(url , encoding= "utf-8")
mars_data_df = tables[0]

In [46]:
mars_data_df.columns = ['Description','Value']
mars_data_df.set_index('Description', inplace=True)

mars_data_df

# save the dataframe to an html file
mars_data_df.to_html('table.html')

Unnamed: 0_level_0,Value
Description,Unnamed: 1_level_1
Equatorial Diameter:,"6,792 km"
Polar Diameter:,"6,752 km"
Mass:,6.42 x 10^23 kg (10.7% Earth)
Moons:,2 (Phobos & Deimos)
Orbit Distance:,"227,943,824 km (1.52 AU)"
Orbit Period:,687 days (1.9 years)
Surface Temperature:,-153 to 20 °C
First Record:,2nd millennium BC
Recorded By:,Egyptian astronomers


# Scrape Hemisphere Images

In [57]:
url4 = "https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars"

#
browser = Browser('chrome', **executable_path, headless=False)
browser.visit(url4)
browser.click_link_by_partial_text('Cerberus Hemisphere Enhanced')
html = browser.html
browser.quit()
soup = BeautifulSoup(html, 'html.parser')

#
browser = Browser('chrome', **executable_path, headless=False)
browser.visit(url4)
browser.click_link_by_partial_text('Schiaparelli Hemisphere Enhanced')
html = browser.html
browser.quit()
soup = BeautifulSoup(html, 'html.parser')

#
browser = Browser('chrome', **executable_path, headless=False)
browser.visit(url4)
browser.click_link_by_partial_text('Syrtis Major Hemisphere Enhanced')
html = browser.html
browser.quit()
soup = BeautifulSoup(html, 'html.parser')

#
browser = Browser('chrome', **executable_path, headless=False)
browser.visit(url4)
browser.click_link_by_partial_text('Valles Marineris Hemisphere Enhanced')
html = browser.html
browser.quit()
soup = BeautifulSoup(html, 'html.parser')