# Mission to Mars - Web Scraping
#### Submitted by : Sheetal Bongale | UT Data Analysis and Visualization | March 3, 2020


In [1]:
%reload_ext lab_black
import pandas as pd
import pprint
import requests
import urllib.parse
from bs4 import BeautifulSoup as bs
import re

from selenium import webdriver
from splinter import Browser

In [4]:
# URLs to be scraped:
NEWS_URL = "https://mars.nasa.gov/news/"
IMAGE_URL = "https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars"
WEATHER_URL = "https://twitter.com/marswxreport?lang=en"
FACTS_URL = "http://space-facts.com/mars/"
HEM_URL = "https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars"

### NASA Mars News 

In [5]:
# Collect the latest news title from NASA's page and the paragraph teaser text.
# NEWS_URL = "https://mars.nasa.gov/news/"

driver = webdriver.Firefox()
driver.get(NEWS_URL)
html = driver.page_source

soup = bs(html, "html.parser")
news_title = (soup.find("div", class_="list_text")).find("a").text

print(f"Latest News Title: {news_title}")
driver.close()

Latest News Title: NASA to Reveal Name of Its Next Mars Rover


In [6]:
teaser_url = (
    "https://mars.nasa.gov/news/" + soup.find("div", class_="list_text").a["href"]
)
teaser_url

'https://mars.nasa.gov/news//news/8619/nasa-to-reveal-name-of-its-next-mars-rover/'

In [7]:
r = requests.get(teaser_url)
html = r.text
soup = bs(html, "html.parser")
teaser = soup.find("div", class_="wysiwyg_content").find("p").text

print(f"Teaser: {teaser}")

Teaser: After a months-long contest among students to name NASA's newest Mars rover, the agency will reveal the winning name — and the winning student — this Thursday. 


### JPL Mars Space Images - Featured Image

In [10]:
# scarpe the JPL web page to scrape the current Featured Mars Image
# IMAGE_URL = "https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars"

driver = webdriver.Firefox()
driver.get(IMAGE_URL)
html = driver.page_source
img_soup = bs(html, "html.parser")

img_base_url = img_soup.find("article", {"class": "carousel_item"})["style"]

featured_image_url = re.findall("url\((.*?)\)", img_base_url)[0].replace("'", "")
featured_image_url = "https://www.jpl.nasa.gov" + featured_image_url
featured_image_title = img_soup.find("h1", class_="media_feature_title").text.strip()

driver.close()

print(f"Image URL: {featured_image_url}")
print(f"Image Title: {featured_image_title}")

Image URL: https://www.jpl.nasa.gov/spaceimages/images/wallpaper/PIA16842-1920x1200.jpg
Image Title: Saturn's Ring 'Rain' (Artist Concept)


### Mars Weather - Twitter

In [10]:
# scrape the latest Mars weather tweet from the given twitter page
# WEATHER_URL = "https://twitter.com/marswxreport?lang=en"

r = requests.get(WEATHER_URL)
html = r.text
weather_soup = bs(html, "html.parser")
mars_weather = weather_soup.find_all("div", class_="js-tweet-text-container")
mars_weather = mars_weather[0].text[:-26]

print(f"Current Weather on Planet Mars: {mars_weather}")

Current Weather on Planet Mars: 
InSight sol 450 (2020-03-02) low -93.5ºC (-136.4ºF) high -10.4ºC (13.3ºF)
winds from the SSW at 5.5 m/s (12.4 mph) gusting to 20.6 m/s (46.1 mph)
pressure at 6.30 hPap


### Mars Facts 

In [12]:
#  use Pandas to scrape the table containing facts about the planet including Diameter, Mass, etc.
# FACTS_URL = "http://space-facts.com/mars/"

# Use Pandas to read the HTML
fact_table = pd.read_html(FACTS_URL)
mars_fact_table = fact_table[0]

mars_fact_table.columns = ["Description", "Value"]
mars_fact_table.set_index("Description", inplace=True)

# Convert this facts table to HTML using Pandas
mars_fact_table_html = mars_fact_table.to_html(justify="left")


mars_fact_table

Unnamed: 0_level_0,Value
Description,Unnamed: 1_level_1
Equatorial Diameter:,"6,792 km"
Polar Diameter:,"6,752 km"
Mass:,6.39 × 10^23 kg (0.11 Earths)
Moons:,2 (Phobos & Deimos)
Orbit Distance:,"227,943,824 km (1.38 AU)"
Orbit Period:,687 days (1.9 years)
Surface Temperature:,-87 to -5 °C
First Record:,2nd millennium BC
Recorded By:,Egyptian astronomers


In [13]:
print(mars_fact_table_html)

<table border="1" class="dataframe">
  <thead>
    <tr style="text-align: left;">
      <th></th>
      <th>Value</th>
    </tr>
    <tr>
      <th>Description</th>
      <th></th>
    </tr>
  </thead>
  <tbody>
    <tr>
      <th>Equatorial Diameter:</th>
      <td>6,792 km</td>
    </tr>
    <tr>
      <th>Polar Diameter:</th>
      <td>6,752 km</td>
    </tr>
    <tr>
      <th>Mass:</th>
      <td>6.39 × 10^23 kg (0.11 Earths)</td>
    </tr>
    <tr>
      <th>Moons:</th>
      <td>2 (Phobos &amp; Deimos)</td>
    </tr>
    <tr>
      <th>Orbit Distance:</th>
      <td>227,943,824 km (1.38 AU)</td>
    </tr>
    <tr>
      <th>Orbit Period:</th>
      <td>687 days (1.9 years)</td>
    </tr>
    <tr>
      <th>Surface Temperature:</th>
      <td>-87 to -5 °C</td>
    </tr>
    <tr>
      <th>First Record:</th>
      <td>2nd millennium BC</td>
    </tr>
    <tr>
      <th>Recorded By:</th>
      <td>Egyptian astronomers</td>
    </tr>
  </tbody>
</table>


### Mars Hemispheres

In [19]:
# scrape to obtain high resolution images for each of Mar's hemispheres.
# HEM_URL = "https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars"

browser = Browser("firefox")
browser.visit(HEM_URL)

urls = [(a.text, a["href"]) for a in browser.find_by_css('div[class="description"] a')]
hemisphere_dict = []
for title, url in urls:
    product_dict = {}
    product_dict["title"] = title
    browser.visit(url)
    img_url = browser.find_by_css('img[class="wide-image"]')["src"]
    product_dict["hem_img_url"] = img_url
    hemisphere_dict.append(product_dict)

browser.quit()
hemisphere_dict

[{'title': 'Cerberus Hemisphere Enhanced',
  'hem_img_url': 'https://astrogeology.usgs.gov/cache/images/cfa62af2557222a02478f1fcd781d445_cerberus_enhanced.tif_full.jpg'},
 {'title': 'Schiaparelli Hemisphere Enhanced',
  'hem_img_url': 'https://astrogeology.usgs.gov/cache/images/3cdd1cbf5e0813bba925c9030d13b62e_schiaparelli_enhanced.tif_full.jpg'},
 {'title': 'Syrtis Major Hemisphere Enhanced',
  'hem_img_url': 'https://astrogeology.usgs.gov/cache/images/ae209b4e408bb6c3e67b6af38168cf28_syrtis_major_enhanced.tif_full.jpg'},
 {'title': 'Valles Marineris Hemisphere Enhanced',
  'hem_img_url': 'https://astrogeology.usgs.gov/cache/images/7cf2da4bf549ed01c17f206327be4db7_valles_marineris_enhanced.tif_full.jpg'}]