# Mission to Mars - Web Scraping
#### Submitted by : Sheetal Bongale | UT Data Analysis and Visualization | March 3, 2020


In [2]:
%reload_ext lab_black
import pandas as pd
import pprint
import requests
import urllib.parse
from bs4 import BeautifulSoup as bs
import re

from selenium import webdriver
from splinter import browser

In [3]:
# URLs to be scraped:
NEWS_URL = "https://mars.nasa.gov/news/"
IMAGE_URL = "https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars"
WEATHER_URL = "https://twitter.com/marswxreport?lang=en"
FACTS_URL = "http://space-facts.com/mars/"
HEM_URL = "https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars"

### NASA Mars News 

In [6]:
# Collect the latest news title from NASA's page and the paragraph teaser text.
# NEWS_URL = "https://mars.nasa.gov/news/"

driver = webdriver.Firefox()
driver.get(NEWS_URL)
html = driver.page_source

soup = bs(html, "html.parser")
news_title = (soup.find("div", class_="list_text")).find("a").text

print(f"Latest News Title: {news_title}")
driver.close()

Latest News Title: The MarCO Mission Comes to an End


In [7]:
teaser_url = (
    "https://mars.nasa.gov/news/" + soup.find("div", class_="list_text").a["href"]
)
teaser_url

'https://mars.nasa.gov/news//news/8408/the-marco-mission-comes-to-an-end/'

In [8]:
r = requests.get(teaser_url)
html = r.text
soup = bs(html, "html.parser")
teaser = soup.find("div", class_="wysiwyg_content").find("p").text

print(f"Teaser: {teaser}")

Teaser: The pair of briefcase-sized satellites made history when they sailed past Mars in 2019.


### JPL Mars Space Images - Featured Image

In [10]:
# scarpe the JPL web page to scrape the current Featured Mars Image
# IMAGE_URL = "https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars"

driver = webdriver.Firefox()
driver.get(IMAGE_URL)
html = driver.page_source
img_soup = bs(html, "html.parser")

img_base_url = img_soup.find("article", {"class": "carousel_item"})["style"]

featured_image_url = re.findall("url\((.*?)\)", img_base_url)[0].replace("'", "")
featured_image_url = "https://www.jpl.nasa.gov" + featured_image_url
featured_image_title = img_soup.find("h1", class_="media_feature_title").text.strip()

driver.close()

print(f"Image URL: {featured_image_url}")
print(f"Image Title: {featured_image_title}")

Image URL: https://www.jpl.nasa.gov/spaceimages/images/wallpaper/PIA16842-1920x1200.jpg
Image Title: Saturn's Ring 'Rain' (Artist Concept)


### Mars Weather - Twitter

In [23]:
# scrape the latest Mars weather tweet from the given twitter page
# WEATHER_URL = "https://twitter.com/marswxreport?lang=en"

r = requests.get(WEATHER_URL)
html = r.text
soup = bs(html, "html.parser")
mars_weather = soup.find_all("div", class_="js-tweet-text-container")
mars_weather = mars_weather[0].text

print(f"Current Weather on Planet Mars: {mars_weather}")

Current Weather on Planet Mars: 
InSight sol 448 (2020-02-29) low -94.1ºC (-137.3ºF) high -8.3ºC (17.0ºF)
winds from the SSW at 5.5 m/s (12.4 mph) gusting to 19.9 m/s (44.6 mph)
pressure at 6.30 hPapic.twitter.com/QTKm4klPNr



### Mars Facts 

In [21]:
#  use Pandas to scrape the table containing facts about the planet including Diameter, Mass, etc.
# FACTS_URL = "http://space-facts.com/mars/"

fact_table = pd.read_html(FACTS_URL)
mars_fact_table = fact_table[0]

mars_fact_table.columns = ["Description", "Value"]
mars_fact_table.set_index("Description", inplace=True)

# Use Pandas to convert the data to a HTML table string.
mars_fact_table_html = mars_fact_table.to_html(
    header=False, index=False, justify="left"
)
# mars_fact_table_html = mars_fact_table_html.replace("\n", "")

mars_fact_table

Unnamed: 0_level_0,Value
Description,Unnamed: 1_level_1
Equatorial Diameter:,"6,792 km"
Polar Diameter:,"6,752 km"
Mass:,6.39 × 10^23 kg (0.11 Earths)
Moons:,2 (Phobos & Deimos)
Orbit Distance:,"227,943,824 km (1.38 AU)"
Orbit Period:,687 days (1.9 years)
Surface Temperature:,-87 to -5 °C
First Record:,2nd millennium BC
Recorded By:,Egyptian astronomers


In [22]:
print(mars_fact_table_html)

<table border="1" class="dataframe">
  <tbody>
    <tr>
      <td>6,792 km</td>
    </tr>
    <tr>
      <td>6,752 km</td>
    </tr>
    <tr>
      <td>6.39 × 10^23 kg (0.11 Earths)</td>
    </tr>
    <tr>
      <td>2 (Phobos &amp; Deimos)</td>
    </tr>
    <tr>
      <td>227,943,824 km (1.38 AU)</td>
    </tr>
    <tr>
      <td>687 days (1.9 years)</td>
    </tr>
    <tr>
      <td>-87 to -5 °C</td>
    </tr>
    <tr>
      <td>2nd millennium BC</td>
    </tr>
    <tr>
      <td>Egyptian astronomers</td>
    </tr>
  </tbody>
</table>


### Mars Hemispheres

In [4]:
# scrape to obtain high resolution images for each of Mar's hemispheres.
# HEM_URL = "https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars"

mars_hemispheres = requests.get(HEM_URL).text
astro_soup = bs(mars_hemispheres, "html.parser")

In [6]:
hemisphere_dict = []
hemisphere_dict = [
    {
        "Title": e.text.strip(" Enhanced"),
        "HEM_URL": ("https://astrogeology.usgs.gov" + e["href"]),
    }
    for e in astro_soup.find_all(class_="itemLink product-item")
]
hemisphere_dict

[{'Title': 'Cerberus Hemispher',
  'HEM_URL': 'https://astrogeology.usgs.gov/search/map/Mars/Viking/cerberus_enhanced'},
 {'Title': 'Schiaparelli Hemispher',
  'HEM_URL': 'https://astrogeology.usgs.gov/search/map/Mars/Viking/schiaparelli_enhanced'},
 {'Title': 'Syrtis Major Hemispher',
  'HEM_URL': 'https://astrogeology.usgs.gov/search/map/Mars/Viking/syrtis_major_enhanced'},
 {'Title': 'Valles Marineris Hemispher',
  'HEM_URL': 'https://astrogeology.usgs.gov/search/map/Mars/Viking/valles_marineris_enhanced'}]