# Module Installation for Scraping and importing dependencies

In [1]:
!pip install selenium
!pip install splinter
!pip install shutil

# Dependencies
from bs4 import BeautifulSoup
import requests
import pymongo
from splinter import Browser
import tweepy
import time
import pandas as pd
import regex as re

Collecting shutil


  Could not find a version that satisfies the requirement shutil (from versions: )
No matching distribution found for shutil


## Acquistion of NASA Mars News using Beautiful Soup

In [2]:
# Mars News URL
url = "https://mars.nasa.gov/news/"

# Retrieve page with the requests module
html = requests.get(url)

# Create BeautifulSoup object; parse with 'html.parser'
soup = BeautifulSoup(html.text, 'html.parser')

# Get title & description
news_title = soup.find('div', 'content_title', 'a').text
news_p = soup.find('div', 'rollover_description_inner').text
#print(soup.prettify())

In [3]:
print(news_title)
print(news_p)



Opportunity Hunkers Down During Dust Storm



It's the beginning of the end for the planet-encircling dust storm on Mars. But it could still be weeks, or even months, before skies are clear enough for NASA's Opportunity rover to recharge its batteries and phone home. 



## Acquistion of Mars featured image of the day from JPL Mars Space Library

In [4]:
# JPL Mars URL
url = "https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars"

# Setting up splinter
executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path)
browser.visit(url)

# Moving through the pages
time.sleep(5)
browser.click_link_by_partial_text('FULL IMAGE')
time.sleep(5)
browser.click_link_by_partial_text('more info')
time.sleep(5)

# Create BeautifulSoup object; parse with 'html.parser'
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

# Get featured image
results = soup.find('article')
extension = results.find('figure', 'lede').a['href']
link = "https://www.jpl.nasa.gov"
featured_image_url = link + extension 
print(featured_image_url)

https://www.jpl.nasa.gov/spaceimages/images/largesize/PIA16192_hires.jpg


In [5]:
# Use the requests library to download and save the image from the `img_url` above(RSH)
import requests
import shutil
response = requests.get(featured_image_url, stream=True)
with open('img.png', 'wb') as out_file:
    shutil.copyfileobj(response.raw, out_file)

# Display the image with IPython.display (RSH)
from IPython.display import Image
Image(url='img.png')

## Acquistion of Mars Weather Report

In [6]:
 # set path to mars weather report Twitter page
weather_url = "https://twitter.com/MarsWxReport?lang=en"
browser.visit(weather_url)
time.sleep(2)

# Create BeautifulSoup object; parse with 'html.parser'
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

# find the paragraph tab, 
mars_soup = soup.find_all("p", class_="TweetTextSize")

weather_list = []

for weather in mars_soup:
    if re.search("Sol ", weather.text):
        weather_list.append(weather.text)

# pull just the first weather report from the list       
mars_weather = weather_list[0]

print(mars_weather)
# add this data to mars_data dict
#mars_data["mars_weather"] = mars_weather

Sol 2171 (2018-09-14), high -12C/10F, low -65C/-84F, pressure at 8.79 hPa, daylight 05:43-17:59


## Acquistion of Mars Facts to display in a table

In [7]:
# Mars Facts URL
url = "https://space-facts.com/mars/"

# Retrieve page with the requests module
html = requests.get(url)

# Create BeautifulSoup object; parse with 'html.parser'
soup = BeautifulSoup(html.text, 'html.parser')

# Empty dictionary for info
mars_profile = {}

# Get info
results = soup.find('tbody').find_all('tr')

# Storing profile information
for result in results:
    key = result.find('td', 'column-1').text.split(":")[0]
    value = result.find('td', 'column-2').text
    
    mars_profile[key] = value
    
# Creating a DataFrame
profile_df = pd.DataFrame([mars_profile]).T.rename(columns = {0: "Value"})
profile_df.index.rename("Description", inplace=True)
profile_df

# Converting to html
#profile_html = "".join(profile_df.to_html().split("\n"))

Unnamed: 0_level_0,Value
Description,Unnamed: 1_level_1
Equatorial Diameter,"6,792 km\n"
First Record,2nd millennium BC
Mass,6.42 x 10^23 kg (10.7% Earth)
Moons,2 (Phobos & Deimos)
Orbit Distance,"227,943,824 km (1.52 AU)"
Orbit Period,687 days (1.9 years)\n
Polar Diameter,"6,752 km\n"
Recorded By,Egyptian astronomers
Surface Temperature,-153 to 20 °C


In [8]:
# Converting to html
profile_html = "".join(profile_df.to_html().split("\n"))
profile_html

'<table border="1" class="dataframe">  <thead>    <tr style="text-align: right;">      <th></th>      <th>Value</th>    </tr>    <tr>      <th>Description</th>      <th></th>    </tr>  </thead>  <tbody>    <tr>      <th>Equatorial Diameter</th>      <td>6,792 km\\n</td>    </tr>    <tr>      <th>First Record</th>      <td>2nd millennium BC</td>    </tr>    <tr>      <th>Mass</th>      <td>6.42 x 10^23 kg (10.7% Earth)</td>    </tr>    <tr>      <th>Moons</th>      <td>2 (Phobos &amp; Deimos)</td>    </tr>    <tr>      <th>Orbit Distance</th>      <td>227,943,824 km (1.52 AU)</td>    </tr>    <tr>      <th>Orbit Period</th>      <td>687 days (1.9 years)\\n</td>    </tr>    <tr>      <th>Polar Diameter</th>      <td>6,752 km\\n</td>    </tr>    <tr>      <th>Recorded By</th>      <td>Egyptian astronomers</td>    </tr>    <tr>      <th>Surface Temperature</th>      <td>-153 to 20 °C</td>    </tr>  </tbody></table>'

## Acquistion of Mars Hemispheres Images

In [9]:
# Mars Hemispheres URL
url = "https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars"

# Empty list of image urls
hemisphere_image_urls = []

### Valles Marineris

In [10]:
# Setting up splinter
executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=True)
browser.visit(url)

# Moving through pages
time.sleep(5)
browser.click_link_by_partial_text('Valles Marineris Hemisphere Enhanced')
time.sleep(5)

# Create BeautifulSoup object; parse with 'html.parser'
html = browser.html
soup = BeautifulSoup(html, 'html.parser')
#print(soup.prettify())

# Store link
valles_link = soup.find('div', 'downloads').a['href']

# Create dictionary
valles_marineris = {
    "title": "Valles Marineris Hemisphere",
    "img_url": valles_link
}

# Appending dictionary
hemisphere_image_urls.append(valles_marineris)
print(valles_marineris)

{'title': 'Valles Marineris Hemisphere', 'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg'}


### Cerberus

In [11]:
# Setting up splinter
executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=True)
browser.visit(url)

# Moving through pages
time.sleep(5)
browser.click_link_by_partial_text('Cerberus Hemisphere Enhanced')
time.sleep(5)

# Create BeautifulSoup object; parse with 'html.parser'
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

# Store link
cerberus_link = soup.find('div', 'downloads').a['href']

# Create dictionary
cerberus = {
    "title": "Cerberus Hemisphere",
    "img_url": cerberus_link
}

# Appending dictionary
hemisphere_image_urls.append(cerberus)
print(cerberus)

{'title': 'Cerberus Hemisphere', 'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg'}


### Schiaparelli

In [12]:
# Setting up splinter
executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=True)
browser.visit(url)

# Moving through pages
time.sleep(5)
browser.click_link_by_partial_text('Schiaparelli Hemisphere Enhanced')
time.sleep(5)

# Create BeautifulSoup object; parse with 'html.parser'
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

# Store link
schiaparelli_link = soup.find('div', 'downloads').a['href']

# Create dictionary
schiaparelli = {
    "title": "Schiaparelli Hemisphere",
    "img_url": schiaparelli_link
}

# Appending dictionary
hemisphere_image_urls.append(schiaparelli)
print(schiaparelli)

{'title': 'Schiaparelli Hemisphere', 'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg'}


### Syrtis Major

In [13]:
# Setting up splinter
executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=True)
browser.visit(url)

# Moving through pages
time.sleep(5)
browser.click_link_by_partial_text('Syrtis Major Hemisphere Enhanced')
time.sleep(5)

# Create BeautifulSoup object; parse with 'html.parser'
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

# Store link
syrtis_link = soup.find('div', 'downloads').a['href']

# Create dictionary
syrtis_major = {
    "title": "Syrtis Major Hemisphere",
    "img_url": syrtis_link
}

# Appending dictionary
hemisphere_image_urls.append(syrtis_major)
print(syrtis_major)

{'title': 'Syrtis Major Hemisphere', 'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg'}


In [14]:
scraped_mars = {'news': {'news_title': news_title, 'news_p': news_p},
               'featured_image_url': featured_image_url,
                'weather': mars_weather,
                'facts_table': profile_html,
                'hemispheres': hemisphere_image_urls
               }
scraped_mars

{'news': {'news_title': '\n\nOpportunity Hunkers Down During Dust Storm\n\n',
  'news_p': "\nIt's the beginning of the end for the planet-encircling dust storm on Mars. But it could still be weeks, or even months, before skies are clear enough for NASA's Opportunity rover to recharge its batteries and phone home. \n"},
 'featured_image_url': 'https://www.jpl.nasa.gov/spaceimages/images/largesize/PIA16192_hires.jpg',
 'weather': 'Sol 2171 (2018-09-14), high -12C/10F, low -65C/-84F, pressure at 8.79 hPa, daylight 05:43-17:59',
 'facts_table': '<table border="1" class="dataframe">  <thead>    <tr style="text-align: right;">      <th></th>      <th>Value</th>    </tr>    <tr>      <th>Description</th>      <th></th>    </tr>  </thead>  <tbody>    <tr>      <th>Equatorial Diameter</th>      <td>6,792 km\\n</td>    </tr>    <tr>      <th>First Record</th>      <td>2nd millennium BC</td>    </tr>    <tr>      <th>Mass</th>      <td>6.42 x 10^23 kg (10.7% Earth)</td>    </tr>    <tr>      <th>