# Web Scraping Homework: Mission to Mars

## Import libraries and run configuration

In [1]:
import pandas as pd
import time
import requests
import pymongo
import numpy as np
import urllib
import cv2

from splinter import Browser
from splinter.exceptions import ElementDoesNotExist
from bs4 import BeautifulSoup as bs


In [2]:
# Initialize browser
def init_browser():
    executable_path = {"executable_path": "/usr/local/bin/chromedriver"}
    return Browser("chrome", **executable_path, headless=False)


## NASA Mars News

In [3]:
nasa_news_browser = init_browser()

nasa_mars_news_url = 'https://mars.nasa.gov/news'
nasa_news_browser.visit(nasa_mars_news_url)

time.sleep(1)

html = nasa_news_browser.html
soup = bs(html, 'html.parser')


In [4]:
news_gallery = soup.find('ul', class_= 'item_list')
news_cards = news_gallery.find_all('li')

news_title = news_cards[0].find('h3').text
news_description = news_cards[0].find('div', class_ = 'rollover_description_inner').text

mars_news  = {
    "news_title" : news_title,
    "news_description" : news_description
} 

In [5]:
print(news_title)
print(news_description)

NASA, ULA Launch Mars 2020 Perseverance Rover Mission to Red Planet
The agency's Mars 2020 mission is on its way. It will land at Jezero Crater in about seven months, on Feb. 18, 2021. 


## JPL Mars Space Images

In [6]:
jpl_mars_space_images_browser = init_browser()

jpl_mars_space_images_url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
jpl_mars_space_images_browser.visit(jpl_mars_space_images_url)

time.sleep(1)

html = jpl_mars_space_images_browser.html
soup = bs(html, 'html.parser')


In [7]:
featured_image_article_element = soup.find('article', class_= 'carousel_item')
image_name_element = featured_image_article_element.find('a', class_= 'button')
partial_image_name = image_name_element['data-link']
partial_image_name_parts = partial_image_name.split('=')
image_name = partial_image_name_parts[1]

featured_image_url = f'https://www.jpl.nasa.gov/spaceimages/images/largesize/{image_name}_hires.jpg'


In [8]:
featured_image_url

'https://www.jpl.nasa.gov/spaceimages/images/largesize/PIA02570_hires.jpg'

## Mars Weather

In [9]:
twitter_browser = init_browser()

twitter_mars_weather_url = 'https://twitter.com/marswxreport'
twitter_browser.visit(twitter_mars_weather_url)

time.sleep(1)

html = twitter_browser.html
soup = bs(html, 'html.parser')


In [10]:
twitter_card = soup.find('div', class_ = 'css-901oao r-hkyrab r-1qd0xha r-a023e6 r-16dba41 r-ad9z0x r-bcqeeo r-bnwqim r-qvutc0')
mars_weather = twitter_card.find('span').text


In [11]:
mars_weather

'InSight sol 597 (2020-08-01) low -91.0ºC (-131.8ºF) high -16.9ºC (1.6ºF)\nwinds from the WNW at 8.0 m/s (17.9 mph) gusting to 20.2 m/s (45.1 mph)\npressure at 7.90 hPa'

## Mars Facts

In [12]:
tables = pd.read_html("https://space-facts.com/mars/")
table_df = tables[0]
table_df = table_df.rename(columns={0: "Fact", 1: "Value"})
table_df


Unnamed: 0,Fact,Value
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.39 × 10^23 kg (0.11 Earths)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.38 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-87 to -5 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [13]:
mars_facts_table = table_df.to_html(index = False, justify = 'center')

print(mars_facts_table)


<table border="1" class="dataframe">
  <thead>
    <tr style="text-align: center;">
      <th>Fact</th>
      <th>Value</th>
    </tr>
  </thead>
  <tbody>
    <tr>
      <td>Equatorial Diameter:</td>
      <td>6,792 km</td>
    </tr>
    <tr>
      <td>Polar Diameter:</td>
      <td>6,752 km</td>
    </tr>
    <tr>
      <td>Mass:</td>
      <td>6.39 × 10^23 kg (0.11 Earths)</td>
    </tr>
    <tr>
      <td>Moons:</td>
      <td>2 (Phobos &amp; Deimos)</td>
    </tr>
    <tr>
      <td>Orbit Distance:</td>
      <td>227,943,824 km (1.38 AU)</td>
    </tr>
    <tr>
      <td>Orbit Period:</td>
      <td>687 days (1.9 years)</td>
    </tr>
    <tr>
      <td>Surface Temperature:</td>
      <td>-87 to -5 °C</td>
    </tr>
    <tr>
      <td>First Record:</td>
      <td>2nd millennium BC</td>
    </tr>
    <tr>
      <td>Recorded By:</td>
      <td>Egyptian astronomers</td>
    </tr>
  </tbody>
</table>


## Mars Hemispheres

In [14]:
mars_hemispheres_browser = init_browser()

mars_hemispheres_url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
mars_hemispheres_browser.visit(mars_hemispheres_url)

time.sleep(10)

html = mars_hemispheres_browser.html
soup = bs(html, 'html.parser')


In [15]:
results = soup.find('div', class_ = 'collapsible results')
items_list = results.find_all('div', class_ = 'item')
items_list


[<div class="item"><a class="itemLink product-item" href="/search/map/Mars/Viking/cerberus_enhanced"><img alt="Cerberus Hemisphere Enhanced thumbnail" class="thumb" src="/cache/images/39d3266553462198bd2fbc4d18fbed17_cerberus_enhanced.tif_thumb.png"/></a><div class="description"><a class="itemLink product-item" href="/search/map/Mars/Viking/cerberus_enhanced"><h3>Cerberus Hemisphere Enhanced</h3></a><span class="subtitle" style="float:left">image/tiff 21 MB</span><span class="pubDate" style="float:right"></span><br/><p>Mosaic of the Cerberus hemisphere of Mars projected into point perspective, a view similar to that which one would see from a spacecraft. This mosaic is composed of 104 Viking Orbiter images acquired…</p></div> <!-- end description --></div>,
 <div class="item"><a class="itemLink product-item" href="/search/map/Mars/Viking/schiaparelli_enhanced"><img alt="Schiaparelli Hemisphere Enhanced thumbnail" class="thumb" src="/cache/images/08eac6e22c07fb1fe72223a79252de20_schiapa

In [16]:
mars_hemispheres = []

for item in items_list:
    link_element = item.find('a', class_ = 'itemLink product-item')
    image_link = link_element['href']
    full_link = "https://astrogeology.usgs.gov" + image_link
    response = requests.get(full_link)
    soup = bs(response.text, 'html.parser')
    
    content_section = soup.find('section', class_ = 'block metadata')
    title = content_section.find('h2').text
    
    downloads_element = soup.find('div', class_ = 'downloads')
    images_links = downloads_element.find_all('a')
    link_to_full_hemisphere_pic = images_links[0]['href']
    
    title_and_image  = {
        "title" : title,
        "image_url" : link_to_full_hemisphere_pic
    } 
    
    mars_hemispheres.append(title_and_image) 

mars_hemispheres

[{'title': 'Cerberus Hemisphere Enhanced',
  'image_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg'},
 {'title': 'Schiaparelli Hemisphere Enhanced',
  'image_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg'},
 {'title': 'Syrtis Major Hemisphere Enhanced',
  'image_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg'},
 {'title': 'Valles Marineris Hemisphere Enhanced',
  'image_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg'}]

In [17]:
# Testing scrape class functions

# def scrape():
#     scrape_data = {
#         "nasa_mars_news": mars_news,
#         "jpl_mars_space_images": featured_image_url,
#         "mars_weather": mars_weather,
#         "mars_facts": mars_facts_table,
#         "mars_hemisphere": mars_hemispheres
#     }

#     return scrape_data

# def persist_mars_data(mars_data):
#     conn = "mongodb://localhost:27017"
#     client = pymongo.MongoClient(conn)
#     mars_db = client.mars
#     mission_data = mars_db.mission_to_mars
#     mission_data.insert_one(
#         mars_data
#     )


# def load_mars_data():
#     conn = "mongodb://localhost:27017"
#     client = pymongo.MongoClient(conn)
#     mars_db = client.mars
#     mission_data = mars_db.mission_to_mars
    
#     return mission_data.find_one()


In [18]:
# mars_data_dict = scrape()
# persist_mars_data(mars_data_dict)
# mars_data_loaded = load_mars_data()
