## Mission to Mars

In [1]:
# The purpose of this exercise is to build a web application that scrapes various websites for data related 
# to the Mission to Mars and displays the information in a single HTML page.
# Web scraping automatically extracts data and presents it in a format you can easily make sense of.

In [2]:
# MODULE IMPORT
# "splinter" is an open source tool for testing web applications using Python. It lets you automate browser actions, 
# such as visiting URLs and interacting with their items.  To use splinter you need create a "Browser" instance.
# "BeautifulSoup" is a Python library for pulling data out of HTML and XML files. It works with your favorite parser 
# to provide idiomatic ways of navigating, searching, and modifying the parse tree. It commonly saves programmers 
# hours or days of work.
# "time" is imported to insert some time delays.

import pandas as pd
from splinter import Browser
from bs4 import BeautifulSoup
import time

# import requests
# import tweepy

In [3]:
# WebDriver is an open source tool for automated testing of webapps across many browsers. It provides capabilities for
# navigating to web pages, user input, JavaScript execution, and more.  ChromeDriver is a standalone server which 
# implements WebDriver's wire protocol for Chromium. 
# The code below gives the path to ChromeDriver on your computer.  We will use this path right below.

!which chromedriver

/c/webdrivers/chromedriver


## Access the NASA mars news site

In [4]:
# URL of the mars nasa news site.

URL = "https://mars.nasa.gov/news/"

In [5]:
# Set the executable path to ChromeDriver.

execPath = {"executable_path": "/webdrivers/chromedriver"}

In [6]:
# Initialize the Chrome Browser in splinter, by using the executable path just created before.

browser = Browser("chrome", **execPath)

In [7]:
# Visit the site.

browser.visit(URL)

In [8]:
# Convert the browser html into a BeautifulSoup object.

HTML = browser.html
newsObj = BeautifulSoup(HTML,"html.parser")

In [9]:
# Quit the browser, as otherwise you are going to have all these internet windows open.

browser.quit()

In [10]:
# Using the BeautifulSoup object, find the first title and print it to verify that it works.

newsTitle = newsObj.find("div","content_title","a").text
newsTitle

'Bound for Mars: Countdown to First Interplanetary Launch from California'

In [11]:
# Similarly, find the first paragraph and print it.

newsParagraph = newsObj.find('div', 'rollover_description_inner').text
newsParagraph

'On May 5, millions of Californians may witness the historic first interplanetary launch from America’s West Coast.'

## Jet Propulsion Laboratory Mars Images

In [12]:
# URL of the JPL Mars Image site.

URL = "https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars"

In [13]:
# Set the executable path to ChromeDriver.

execPath = {"executable_path": "/webdrivers/chromedriver"}

In [14]:
# Initialize the Chrome Browser in splinter, by using the executable path just created before.

browser = Browser("chrome",**execPath)

In [15]:
# Visit the site.

browser.visit(URL)

In [16]:
# Find the full image button.

fullImage = browser.find_by_id("full_image")

In [17]:
# Click on the button.

fullImage.click()

In [18]:
# Find the "more info" button.

moreInfo = browser.find_link_by_partial_text("more info")

In [19]:
# Click on the button.

moreInfo.click()

In [20]:
# Convert the browser html into a BeautifulSoup object.

HTML = browser.html
imageObj = BeautifulSoup(HTML,"html.parser")

In [21]:
# Quit the browser, as otherwise you are going to have all these internet windows open.

browser.quit()

In [22]:
# Using the BeautifulSoup object, find the first image URL and print it to verify that it works.

imageURL = imageObj.find("figure",class_="lede").find("img")["src"]
imageURL

'/spaceimages/images/largesize/PIA16153_hires.jpg'

In [23]:
# Use the first URL to create a generic URL.

genericURL = f"https://www.jpl.nasa.gov{imageURL}"
genericURL

'https://www.jpl.nasa.gov/spaceimages/images/largesize/PIA16153_hires.jpg'

## Mars Weather

In [24]:
# URL of the twitter Mars weather site.

URL = "https://twitter.com/marswxreport?lang=en"

In [25]:
# Set the executable path to ChromeDriver.

execPath = {"executable_path": "/webdrivers/chromedriver"}

In [26]:
# Initialize the Chrome Browser in splinter, by using the executable path just created before.

browser = Browser("chrome",**execPath)

In [27]:
# Visit the site.

browser.visit(URL)

In [28]:
# Convert the browser html into a BeautifulSoup object.

HTML = browser.html
tweetObj = BeautifulSoup(HTML,"html.parser")

In [29]:
browser.quit()

In [30]:
# Find a tweet with the data-name "Mars Weather".

MarsWeatherTweet = tweetObj.find("div", attrs={"class":"tweet","data-name":"Mars Weather"})

In [31]:
# Search within the tweet for the paragraph tag containing the tweet text.

MarsWeather = MarsWeatherTweet.find('p', 'tweet-text').get_text()
MarsWeather

'Sol 2019 (April 11, 2018), Sunny, high -6C/21F, low -75C/-103F, pressure at 7.18 hPa, daylight 05:27-17:21'

## Mars Hemisphere

In [43]:
# URL of the Mars hemisphere site.

URL = "https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars"

In [44]:
# Set the executable path to ChromeDriver.

execPath = {"executable_path": "/webdrivers/chromedriver"}

In [45]:
# Initialize the Chrome Browser in splinter, by using the executable path just created before.

browser = Browser("chrome",**execPath)

In [46]:
# Visit the site.

browser.visit(URL)

In [47]:
# List, initially empty, of hemisphere image URLs.

hemisphereImageURLs=[]

In [48]:
# Convert the browser html into a BeautifulSoup object.

HTML = browser.html
hemisphereObj = BeautifulSoup(HTML,"html.parser")

In [49]:
browser.quit()

In [50]:
# Using the BeautifulSoup object, find the first URL, append it to the initially empty list "hemisphereImageURLs" and 
# print it to verify that it works.

vallesLink = hemisphereObj.find("div","downloads").a["href"]
vallesMarineris = {"title":"Valles Marineris Hemisphere","img_url":vallesLink}
hemisphereImageURLs.append(vallesMarineris)
vallesMarineris

AttributeError: 'NoneType' object has no attribute 'a'

## Mars Facts

In [40]:
import pandas as pd
df1 = pd.read_html('http://space-facts.com/mars/')[0]
df1.columns=['description', 'value']
df1.set_index('description', inplace=True)
df1

Unnamed: 0_level_0,value
description,Unnamed: 1_level_1
Equatorial Diameter:,"6,792 km"
Polar Diameter:,"6,752 km"
Mass:,6.42 x 10^23 kg (10.7% Earth)
Moons:,2 (Phobos & Deimos)
Orbit Distance:,"227,943,824 km (1.52 AU)"
Orbit Period:,687 days (1.9 years)
Surface Temperature:,-153 to 20 °C
First Record:,2nd millennium BC
Recorded By:,Egyptian astronomers


In [41]:
df1.to_html()

'<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: right;">\n      <th></th>\n      <th>value</th>\n    </tr>\n    <tr>\n      <th>description</th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>Equatorial Diameter:</th>\n      <td>6,792 km</td>\n    </tr>\n    <tr>\n      <th>Polar Diameter:</th>\n      <td>6,752 km</td>\n    </tr>\n    <tr>\n      <th>Mass:</th>\n      <td>6.42 x 10^23 kg (10.7% Earth)</td>\n    </tr>\n    <tr>\n      <th>Moons:</th>\n      <td>2 (Phobos &amp; Deimos)</td>\n    </tr>\n    <tr>\n      <th>Orbit Distance:</th>\n      <td>227,943,824 km (1.52 AU)</td>\n    </tr>\n    <tr>\n      <th>Orbit Period:</th>\n      <td>687 days (1.9 years)</td>\n    </tr>\n    <tr>\n      <th>Surface Temperature:</th>\n      <td>-153 to 20 °C</td>\n    </tr>\n    <tr>\n      <th>First Record:</th>\n      <td>2nd millennium BC</td>\n    </tr>\n    <tr>\n      <th>Recorded By:</th>\n      <td>Egyptian astronomers</td>\n    </tr>

In [42]:
browser.quit()