### Mission to Mars
### Scraping
### NASA Mars News

In [1]:
# Dependencies
from bs4 import BeautifulSoup as bs
import requests
import pymongo
from splinter import Browser
import os
import pandas as pd
import time

In [2]:
#pointing to the directory where chromedriver exists
executable_path = {'executable_path': '/Users/ey/Desktop/chromedriver'}
browser = Browser('chrome', **executable_path, headless=False)

In [3]:
# Initialize PyMongo to work with MongoDBs
conn = 'mongodb://localhost:27017'
client = pymongo.MongoClient(conn)

In [4]:
# Define database and collection
db = client.nasa_db
collection = db.items

In [41]:
#visiting the page
url = "https://mars.nasa.gov/news/?page=0&per_page=40&order=publish_date+desc%2Ccreated_at+desc&search=&category=19%2C165%2C184%2C204&blank_scope=Latest"

In [42]:
browser.visit(url)

In [43]:
#using bs to write it into html
html = browser.html
soup = bs(html,"html.parser")

In [8]:
# Retrieve page with the requests module
# response = requests.get(url)

In [9]:
# response.status_code

In [10]:
# Examine the results, then determine element that contains sought info
# print(soup.prettify())

In [44]:
#collect the latest News Title and Paragraph Text
news_title = soup.find("div",class_="content_title").text

print(f"This is Title: {news_title}")


This is Title: Curiosity Tastes First Sample in 'Clay-Bearing Unit'


In [47]:
news_paragraph = soup.find("div", class_="article_teaser_body").text
print(f"This is Paragraph: {news_paragraph}")

This is Paragraph: This new region on Mars might reveal more about the role of water on Mount Sharp.


### JPL Mars Space Images - Featured Image

In [48]:
#Use splinter to navigate the site and find the image url for the current Featured Mars Image
#Make sure to find the image url to the full size .jpg image using BeautifulSoup
image_url = "https://www.jpl.nasa.gov/spaceimages/?search=&category=featured#submit"
browser.visit(image_url)
browser.is_text_present('FULL IMAGE')
browser.click_link_by_partial_text('FULL IMAGE')

In [49]:
html_jpl = browser.html

In [50]:
soup_jpl = bs(html_jpl,"html.parser" )

In [64]:
# xpath = '//*[@id="fancybox-lock"]/div/div[1]/img'

In [52]:
#Use splinter to click on the mars featured image
#to bring the full resolution image
# results = browser.find_by_xpath(xpath)
# img = results[0]
# img.click()

In [65]:
featured_image_location = soup_jpl.find("div", class_="fancybox-inner").img["src"].strip()


In [None]:
# featured_image_location = soup_jpl.find('img', class_="fancybox-image")
# print(featured_image_location)


In [None]:
# featured_image_location = soup_jpl.find('a', class_='fancybox')["scr"].get('data-fancybox-href').strip()
# featured_image_location

In [None]:
# featured_image_location = soup_jpl.find('//*[@id="fancybox-lock"]/div/div[1]/img')
# print(featured_image_location)

In [66]:
base_url = "https://www.jpl.nasa.gov/"

In [68]:
featured_image_url = base_url + featured_image_location
featured_image_url

'https://www.jpl.nasa.gov//spaceimages/images/mediumsize/PIA17841_ip.jpg'

### Mars Weather

In [26]:
#get mars weather's latest tweet from the website
url_weather = "https://twitter.com/marswxreport?lang=en"
browser.visit(url_weather)

In [27]:
# Visit the Mars Weather twitter account and scrape the latest Mars weather tweet from the page. 
# Save the tweet text for the weather report 
html_weather = browser.html
soup = bs(html_weather, "html.parser")
mars_weather = soup.find("p", class_="TweetTextSize TweetTextSize--normal js-tweet-text tweet-text").text
print(mars_weather)



InSight sol 134 (2019-04-12) low -98.4ºC (-145.1ºF) high -18.2ºC (-0.8ºF)
pressure at 7.30 hPapic.twitter.com/1DE7KRn8xy


In [28]:
mars_weather_text = mars_weather.replace('\n',' ').split('pic')[0]
mars_weather_text

'InSight sol 134 (2019-04-12) low -98.4ºC (-145.1ºF) high -18.2ºC (-0.8ºF) pressure at 7.30 hPa'

### Mars Facts

In [29]:
# Visit the Mars Facts webpage and use Pandas to scrape the table containing facts about the planet 
# including Diameter, Mass, etc.
url_facts = "https://space-facts.com/mars/"


In [30]:

tables = pd.read_html(url_facts)
tables

[                      0                              1
 0  Equatorial Diameter:                       6,792 km
 1       Polar Diameter:                       6,752 km
 2                 Mass:  6.42 x 10^23 kg (10.7% Earth)
 3                Moons:            2 (Phobos & Deimos)
 4       Orbit Distance:       227,943,824 km (1.52 AU)
 5         Orbit Period:           687 days (1.9 years)
 6  Surface Temperature:                  -153 to 20 °C
 7         First Record:              2nd millennium BC
 8          Recorded By:           Egyptian astronomers]

In [31]:
df = tables[0]
df.columns = ['Description', 'Values']
df.head()

Unnamed: 0,Description,Values
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.42 x 10^23 kg (10.7% Earth)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.52 AU)"


In [32]:
# Use Pandas to convert the data to a HTML table string.

html_table = df.to_html()
html_table

'<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: right;">\n      <th></th>\n      <th>Description</th>\n      <th>Values</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>Equatorial Diameter:</td>\n      <td>6,792 km</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>Polar Diameter:</td>\n      <td>6,752 km</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>Mass:</td>\n      <td>6.42 x 10^23 kg (10.7% Earth)</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>Moons:</td>\n      <td>2 (Phobos &amp; Deimos)</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>Orbit Distance:</td>\n      <td>227,943,824 km (1.52 AU)</td>\n    </tr>\n    <tr>\n      <th>5</th>\n      <td>Orbit Period:</td>\n      <td>687 days (1.9 years)</td>\n    </tr>\n    <tr>\n      <th>6</th>\n      <td>Surface Temperature:</td>\n      <td>-153 to 20 °C</td>\n    </tr>\n    <tr>\n      <th>7</th>\n      <td>First Record:</td>\n      <td>2nd millenniu

In [33]:
html_table.replace('\n', '')

'<table border="1" class="dataframe">  <thead>    <tr style="text-align: right;">      <th></th>      <th>Description</th>      <th>Values</th>    </tr>  </thead>  <tbody>    <tr>      <th>0</th>      <td>Equatorial Diameter:</td>      <td>6,792 km</td>    </tr>    <tr>      <th>1</th>      <td>Polar Diameter:</td>      <td>6,752 km</td>    </tr>    <tr>      <th>2</th>      <td>Mass:</td>      <td>6.42 x 10^23 kg (10.7% Earth)</td>    </tr>    <tr>      <th>3</th>      <td>Moons:</td>      <td>2 (Phobos &amp; Deimos)</td>    </tr>    <tr>      <th>4</th>      <td>Orbit Distance:</td>      <td>227,943,824 km (1.52 AU)</td>    </tr>    <tr>      <th>5</th>      <td>Orbit Period:</td>      <td>687 days (1.9 years)</td>    </tr>    <tr>      <th>6</th>      <td>Surface Temperature:</td>      <td>-153 to 20 °C</td>    </tr>    <tr>      <th>7</th>      <td>First Record:</td>      <td>2nd millennium BC</td>    </tr>    <tr>      <th>8</th>      <td>Recorded By:</td>      <td>Egyptian astron

In [34]:
df.to_html('table.html')

In [None]:
# {{ items.table|safe }}


### Mars Hemispheres

In [35]:
# Visit the USGS Astrogeology site 
url_hemisphere = "https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars"

In [36]:
browser.visit(url_hemisphere)

In [37]:
html_hemisphere = browser.html
soup_hemisphere = bs(html_hemisphere, "html.parser")


In [38]:
mars_hemisphere = soup_hemisphere.find("div", class_="item").text
print(mars_hemisphere)

Cerberus Hemisphere Enhancedimage/tiff 21 MBMosaic of the Cerberus hemisphere of Mars projected into point perspective, a view similar to that which one would see from a spacecraft. This mosaic is composed of 104 Viking Orbiter images acquired… 


In [39]:
hemisphere_titles = soup_hemisphere.find_all('div', class_='item')
hemisphere_titles

[<div class="item"><a class="itemLink product-item" href="/search/map/Mars/Viking/cerberus_enhanced"><img alt="Cerberus Hemisphere Enhanced thumbnail" class="thumb" src="/cache/images/dfaf3849e74bf973b59eb50dab52b583_cerberus_enhanced.tif_thumb.png"/></a><div class="description"><a class="itemLink product-item" href="/search/map/Mars/Viking/cerberus_enhanced"><h3>Cerberus Hemisphere Enhanced</h3></a><span class="subtitle" style="float:left">image/tiff 21 MB</span><span class="pubDate" style="float:right"></span><br/><p>Mosaic of the Cerberus hemisphere of Mars projected into point perspective, a view similar to that which one would see from a spacecraft. This mosaic is composed of 104 Viking Orbiter images acquired…</p></div> <!-- end description --></div>,
 <div class="item"><a class="itemLink product-item" href="/search/map/Mars/Viking/schiaparelli_enhanced"><img alt="Schiaparelli Hemisphere Enhanced thumbnail" class="thumb" src="/cache/images/7677c0a006b83871b5a2f66985ab5857_schiapa

In [40]:
hemisphere_images = []
#loop over hemispheres and save the titles and image links
for item in hemisphere_titles:
    url_hemisphere = "https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars"
    browser.visit(url_hemisphere)
    html_hemisphere = browser.html
    soup_hemisphere = bs(html_hemisphere, "html.parser")
    title = item.find("h3").text
    #navigate to specific hemisphere page
    browser.click_link_by_partial_text(title)
    time.sleep(2)

    html_hemisphere = browser.html
    soup_hemisphere = bs(html_hemisphere, "html.parser")

   #construct link to hemisphere image
    partial_img_url = soup_hemisphere.find('img', class_='wide-image')['src']
    img_url = url_hemisphere + partial_img_url

   #append hemisphere title and image url to list
    hemisphere_images.append({"title":title,"img_url=":img_url})


In [69]:
hemisphere_images

[{'title': 'Cerberus Hemisphere Enhanced',
  'img_url=': 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars/cache/images/cfa62af2557222a02478f1fcd781d445_cerberus_enhanced.tif_full.jpg'},
 {'title': 'Schiaparelli Hemisphere Enhanced',
  'img_url=': 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars/cache/images/3cdd1cbf5e0813bba925c9030d13b62e_schiaparelli_enhanced.tif_full.jpg'},
 {'title': 'Syrtis Major Hemisphere Enhanced',
  'img_url=': 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars/cache/images/ae209b4e408bb6c3e67b6af38168cf28_syrtis_major_enhanced.tif_full.jpg'},
 {'title': 'Valles Marineris Hemisphere Enhanced',
  'img_url=': 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars/cache/images/7cf2da4bf549ed01c17f206327be4db7_valles_marineris_enhanced.tif_full.jpg'}]