# Mission to Mars

The following notebook scrapes text and images from a number of sites related to Mars exploration. 

In [1]:
# Dependencies
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from splinter import Browser
from bs4 import BeautifulSoup
import requests
import pandas as pd
import time

In [15]:
# Create browser parser in Chrome 
executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)

## NASA Mars News

In [5]:
# URL of page to be scraped
nasa_url = "https://mars.nasa.gov/news/"
browser.visit(nasa_url)

# Retrieve page with splinter and read html
html = browser.html

# Create BeautifulSoup object and parse with html parser
soup = BeautifulSoup(html, 'html.parser')

In [6]:
# Parse and print title and short article description
title = soup.find('div', class_='content_title')
news_title = title.find('a').text

paragraph = soup.find('div', class_='article_teaser_body')
news_p = paragraph.text

print(news_title)
print('-------------')
print(news_p)

The MarCO Mission Comes to an End
-------------
The pair of briefcase-sized satellites made history when they sailed past Mars in 2019.


## JPL Mars Space Images - Featured Image

In [7]:
# URL of page to be scraped
images_url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
browser.visit(images_url)

# Retrieve page with splinter and read html
html = browser.html

# Create BeautifulSoup object and parse with html parser
soup = BeautifulSoup(html, 'html.parser')

# Return image result
image_url = soup.find('article')["style"].split("('")[1].split("')")[0]

# Concatenate urls to display complete URL
site_url = "https://www.jpl.nasa.gov"
featured_image_url = site_url + image_url
print(featured_image_url)

https://www.jpl.nasa.gov/spaceimages/images/wallpaper/PIA18432-1920x1200.jpg


## Mars Weather

In [8]:
# Create browser parser in Chrome
driver = webdriver.Chrome()

# URL of page to be scraped
twitter_url = 'https://twitter.com/marswxreport?lang=en'
driver.get(twitter_url)
time.sleep(5)

# Create BeautifulSoup object and parse with html parser
body = driver.find_element_by_tag_name('body')
body.send_keys(Keys.PAGE_DOWN)

mars_twitter_html= driver.page_source
soup = BeautifulSoup(mars_twitter_html, 'html.parser')

# Parse twitter text for latest weather info
weather = soup.find_all(class_ = 'css-901oao css-16my406 r-1qd0xha r-ad9z0x r-bcqeeo r-qvutc0')

for item in weather:
    if item.text.split(" ")[0] =='InSight':
        print(item.text)
        break
time.sleep(5)    
driver.quit()

InSight sol 447 (2020-02-28) low -92.6ºC (-134.6ºF) high -11.1ºC (12.0ºF)
winds from the SSW at 5.8 m/s (13.1 mph) gusting to 20.2 m/s (45.2 mph)
pressure at 6.30 hPa


## Mars Facts

In [3]:
# URL of page to be scraped using Pandas
facts_url = "https://space-facts.com/mars/"

In [24]:
# Scrape tabular data from page with Pandas
mars_facts = pd.read_html(facts_url)
mars_df = pd.DataFrame(mars_facts[0])
mars_facts_df = mars_df.rename(columns={0: "Attribute", 1: "Value"})
mars_facts_df

Unnamed: 0,Attribute,Value
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.39 × 10^23 kg (0.11 Earths)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.38 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-87 to -5 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [23]:
# Generate HTML table from scraped Pandas dataframe
html_table_mars = mars_df.to_html(index=False)
html_table_mars

'<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: right;">\n      <th>0</th>\n      <th>1</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <td>Equatorial Diameter:</td>\n      <td>6,792 km</td>\n    </tr>\n    <tr>\n      <td>Polar Diameter:</td>\n      <td>6,752 km</td>\n    </tr>\n    <tr>\n      <td>Mass:</td>\n      <td>6.39 × 10^23 kg (0.11 Earths)</td>\n    </tr>\n    <tr>\n      <td>Moons:</td>\n      <td>2 (Phobos &amp; Deimos)</td>\n    </tr>\n    <tr>\n      <td>Orbit Distance:</td>\n      <td>227,943,824 km (1.38 AU)</td>\n    </tr>\n    <tr>\n      <td>Orbit Period:</td>\n      <td>687 days (1.9 years)</td>\n    </tr>\n    <tr>\n      <td>Surface Temperature:</td>\n      <td>-87 to -5 °C</td>\n    </tr>\n    <tr>\n      <td>First Record:</td>\n      <td>2nd millennium BC</td>\n    </tr>\n    <tr>\n      <td>Recorded By:</td>\n      <td>Egyptian astronomers</td>\n    </tr>\n  </tbody>\n</table>'

In [12]:
# Strip unwanted newlines
html_table_mars.replace('\n', '')

'<table border="1" class="dataframe">  <thead>    <tr style="text-align: right;">      <th></th>      <th>0</th>      <th>1</th>    </tr>  </thead>  <tbody>    <tr>      <th>0</th>      <td>Equatorial Diameter:</td>      <td>6,792 km</td>    </tr>    <tr>      <th>1</th>      <td>Polar Diameter:</td>      <td>6,752 km</td>    </tr>    <tr>      <th>2</th>      <td>Mass:</td>      <td>6.39 × 10^23 kg (0.11 Earths)</td>    </tr>    <tr>      <th>3</th>      <td>Moons:</td>      <td>2 (Phobos &amp; Deimos)</td>    </tr>    <tr>      <th>4</th>      <td>Orbit Distance:</td>      <td>227,943,824 km (1.38 AU)</td>    </tr>    <tr>      <th>5</th>      <td>Orbit Period:</td>      <td>687 days (1.9 years)</td>    </tr>    <tr>      <th>6</th>      <td>Surface Temperature:</td>      <td>-87 to -5 °C</td>    </tr>    <tr>      <th>7</th>      <td>First Record:</td>      <td>2nd millennium BC</td>    </tr>    <tr>      <th>8</th>      <td>Recorded By:</td>      <td>Egyptian astronomers</td>    </

## Mars Hemispheres

In [16]:
# URL of page to be scraped 
landing_url = "https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars"
base_url = "https://astrogeology.usgs.gov"
browser.visit(landing_url)

# Create empty list of final output
hemisphere_image_urls = []

# Retrieve page with splinter and read html
html = browser.html

# Create BeautifulSoup object and parse with html parser
soup = BeautifulSoup(html, 'html.parser')

# Parse landing page for title of each hemisphere and add to list
container = soup.find_all('div', class_='item')
for hemi in container:
    titles = hemi.find('h3').text
    
    # Find URL of each hemisphere page 
    partial_hemi_url = hemi.find('a')['href']
    hemi_url = base_url + partial_hemi_url
    
    # Visit each URl and extract image URL
    browser.visit(hemi_url)
    html = browser.html
    soup = BeautifulSoup(html, 'html.parser') 
    wrapper = soup.find('div', class_='downloads')
    images = wrapper.find('a')['href']
    
    hemi_dict = dict({"title": titles, "img_url": images})
    hemisphere_image_urls.append(hemi_dict)
        
hemisphere_image_urls

[{'title': 'Cerberus Hemisphere Enhanced',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg'},
 {'title': 'Schiaparelli Hemisphere Enhanced',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg'},
 {'title': 'Syrtis Major Hemisphere Enhanced',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg'},
 {'title': 'Valles Marineris Hemisphere Enhanced',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg'}]