# Mission to Mars

In this assignment, you will build a web application that scrapes various websites for data related to the Mission to Mars and displays the information in a single HTML page. The following outlines what you need to do.

In [1]:
#Import dependencies
from bs4 import BeautifulSoup
import requests
import pandas as pd
import pymongo
from splinter import Browser
import sys
import json

In [2]:
#Find the image using splinter
executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)

# Step 1 - Scraping

In [3]:
#Initialize Mongo to collect database
conn = 'mongodb://localhost:27017'
client = pymongo.MongoClient(conn)

In [4]:
#Define database and collection
db = client.mars_db
collection = db.news

# NASA

In [8]:
#URL to be scrapped
url = "https://mars.nasa.gov/news/"

#Retrive page with the request module
response = requests.get(url)

#Create a BeautifulSoup object
soup_nasa = BeautifulSoup(response.text, 'html')
#soup_nasa

In [10]:
#Retrieve title and paragraph
nasa_title = soup_nasa.find('div', class_="content_title").text
nasa_text = soup_nasa.find('div', class_='rollover_description_inner').text

In [11]:
#Clean data
nasa_title = nasa_title.replace('\n', '')
nasa_text = nasa_text.replace('\n', '')

In [12]:
print(nasa_title)
print(nasa_text)

NASA's Curiosity Mars Rover Finds a Clay Cache
The rover recently drilled two samples, and both showed the highest levels of clay ever found during the mission.


# JPL NASA

In [18]:
#URL to be scrapped
jpl_url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
browser.visit(jpl_url)

In [19]:
browser.find_by_css('a.button').click()

In [20]:
jpl_soup = BeautifulSoup(browser.html,'html.parser')
#jpl_soup

In [21]:
#Retrieve image url
image = jpl_soup.find('img',class_='fancybox-image')['src']
featured_image_url = "https://www.jpl.nasa.gov"+ image
featured_image_url

'https://www.jpl.nasa.gov/spaceimages/images/mediumsize/PIA17175_ip.jpg'

# Mars Weather

In [22]:
#URL to be scrapped
twitter_url = "https://twitter.com/marswxreport?lang=en"

#Retrive page with the request module
response = requests.get(twitter_url)

#Create a BeautifulSoup object
soup_twitter = BeautifulSoup(response.text, 'html')
#soup_twitter

In [23]:
#Retreive latest tweet
mars_weather = soup_twitter.find('div', class_="js-tweet-text-container").text
mars_weather = mars_weather.replace('\n', '')
extra = 'pic.twitter.com/MhPPOHJg3m'
mars_weather = mars_weather.split(extra, 1)[0]
mars_weather

'InSight sol 261 (2019-08-21) low -102.4ºC (-152.4ºF) high -26.6ºC (-15.8ºF)winds from the SSE at 4.9 m/s (11.0 mph) gusting to 16.0 m/s (35.8 mph)pressure at 7.70 hPa'

# Mars Facts

In [24]:
#URL to be scrapped
facts_url = 'https://space-facts.com/mars/'

In [25]:
# Use Panda's `read_html` to parse the url
tables = pd.read_html(facts_url)
#tables

In [26]:
# Create DataFrame
df = tables[0]
df.columns = ['Concept', 'Mars', 'Earth']
df = df.drop(columns=["Earth"])
df

Unnamed: 0,Concept,Mars
0,Diameter:,"6,779 km"
1,Mass:,6.39 × 10^23 kg
2,Moons:,2
3,Distance from Sun:,"227,943,824 km"
4,Length of Year:,687 Earth days
5,Temperature:,-153 to 20 °C


In [27]:
#Build html table
tables = df.to_html()
#table

# Mars Hemispheres

In [51]:
#URL to be scrapped
hemis_url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
browser.visit(hemis_url)
hemis_soup = BeautifulSoup(browser.html, 'html.parser')
#hemis_soup

In [52]:
#Find headers
headers = []
titles = hemis_soup.find_all('h3')
titles

[<h3>Cerberus Hemisphere Enhanced</h3>,
 <h3>Schiaparelli Hemisphere Enhanced</h3>,
 <h3>Syrtis Major Hemisphere Enhanced</h3>,
 <h3>Valles Marineris Hemisphere Enhanced</h3>]

In [53]:
#Clean headers
for title in titles:
    headers.append(title.text)
print(headers[0:3])

['Cerberus Hemisphere Enhanced', 'Schiaparelli Hemisphere Enhanced', 'Syrtis Major Hemisphere Enhanced']


In [54]:
#Images
one = 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg'
two = 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg'
three = 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg'
four = 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg'

In [57]:
images = [one, two, three, four]
images

['https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg',
 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg',
 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg',
 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg']

In [61]:
#Append list
hemisphere_image_urls = [{'title': headers, 'img_url': images} 
    for headers, images in zip(headers,images)]
hemisphere_image_urls

[{'title': 'Cerberus Hemisphere Enhanced',
  'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg'},
 {'title': 'Schiaparelli Hemisphere Enhanced',
  'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg'},
 {'title': 'Syrtis Major Hemisphere Enhanced',
  'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg'},
 {'title': 'Valles Marineris Hemisphere Enhanced',
  'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg'}]