## Step 1 - Scraping

In [2]:
# Dependencies
from bs4 import BeautifulSoup as bs
from splinter import Browser
import pandas as pd
import requests
import pymongo


### NASA Mars News

In [4]:
# URL of page to be scraped
news_url = 'https://mars.nasa.gov/news/?page=0&per_page=40&order=publish_date+desc%2Ccreated_at+desc&search=&category=19%2C165%2C184%2C204&blank_scope=Latest'

# Retrieve page with the requests module
response = requests.get(news_url)

# Create BeautifulSoup object; parse with 'html.parser'
news_soup = bs(response.text, 'html.parser')

# Examine the results, then determine element that contains sought info
# print(news_soup.prettify())


In [5]:
news_title = news_soup.find('div', class_='content_title').text.strip()
news_title

"NASA's InSight Places First Instrument on Mars"

In [6]:
news_p = news_soup.find('div', class_='rollover_description_inner').text.strip()
news_p

'In deploying its first instrument onto the surface of Mars, the lander completes a major mission milestone.'

### JPL Mars Space Images - Featured Image

In [8]:
!which chromedriver


/usr/local/bin/chromedriver


In [7]:
executable_path = {'executable_path': '/usr/local/bin/chromedriver'}
browser = Browser('chrome', **executable_path, headless=False)

jpl_url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
browser.visit(jpl_url)

html = browser.html
jpl_soup = bs(html, 'html.parser')

# print(jpl_soup.prettify())

In [10]:
jpl_image = jpl_soup.find('a', class_='button fancybox')['data-fancybox-href']
jpl_image

'/spaceimages/images/mediumsize/PIA17171_ip.jpg'

In [11]:
featured_image_url = 'https://www.jpl.nasa.gov' + jpl_image
featured_image_url

'https://www.jpl.nasa.gov/spaceimages/images/mediumsize/PIA17171_ip.jpg'

### Mars Weather

In [12]:
weather_url = 'https://twitter.com/marswxreport?lang=en'
browser.visit(weather_url)

html = browser.html
weather_soup = bs(html, 'html.parser')

# print(weather_soup.prettify())

In [13]:
mars_weather = weather_soup.find('p', class_='TweetTextSize TweetTextSize--normal js-tweet-text tweet-text').text.strip()
mars_weather

'Sol 2270 (2018-12-25), high -5C/23F, low -75C/-102F, pressure at 8.35 hPa, daylight 06:41-18:53'

### Mars Facts

In [14]:
facts_url = 'https://space-facts.com/mars/'

In [15]:
tables = pd.read_html(facts_url)
tables

[                      0                              1
 0  Equatorial Diameter:                       6,792 km
 1       Polar Diameter:                       6,752 km
 2                 Mass:  6.42 x 10^23 kg (10.7% Earth)
 3                Moons:            2 (Phobos & Deimos)
 4       Orbit Distance:       227,943,824 km (1.52 AU)
 5         Orbit Period:           687 days (1.9 years)
 6  Surface Temperature:                  -153 to 20 °C
 7         First Record:              2nd millennium BC
 8          Recorded By:           Egyptian astronomers]

In [16]:
df = tables[0]
df.head()

Unnamed: 0,0,1
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.42 x 10^23 kg (10.7% Earth)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.52 AU)"


In [17]:
html_table = df.to_html()
html_table

'<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: right;">\n      <th></th>\n      <th>0</th>\n      <th>1</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>Equatorial Diameter:</td>\n      <td>6,792 km</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>Polar Diameter:</td>\n      <td>6,752 km</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>Mass:</td>\n      <td>6.42 x 10^23 kg (10.7% Earth)</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>Moons:</td>\n      <td>2 (Phobos &amp; Deimos)</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>Orbit Distance:</td>\n      <td>227,943,824 km (1.52 AU)</td>\n    </tr>\n    <tr>\n      <th>5</th>\n      <td>Orbit Period:</td>\n      <td>687 days (1.9 years)</td>\n    </tr>\n    <tr>\n      <th>6</th>\n      <td>Surface Temperature:</td>\n      <td>-153 to 20 °C</td>\n    </tr>\n    <tr>\n      <th>7</th>\n      <td>First Record:</td>\n      <td>2nd millennium BC</td>\n    

In [18]:
html_table.replace('\n', '')

'<table border="1" class="dataframe">  <thead>    <tr style="text-align: right;">      <th></th>      <th>0</th>      <th>1</th>    </tr>  </thead>  <tbody>    <tr>      <th>0</th>      <td>Equatorial Diameter:</td>      <td>6,792 km</td>    </tr>    <tr>      <th>1</th>      <td>Polar Diameter:</td>      <td>6,752 km</td>    </tr>    <tr>      <th>2</th>      <td>Mass:</td>      <td>6.42 x 10^23 kg (10.7% Earth)</td>    </tr>    <tr>      <th>3</th>      <td>Moons:</td>      <td>2 (Phobos &amp; Deimos)</td>    </tr>    <tr>      <th>4</th>      <td>Orbit Distance:</td>      <td>227,943,824 km (1.52 AU)</td>    </tr>    <tr>      <th>5</th>      <td>Orbit Period:</td>      <td>687 days (1.9 years)</td>    </tr>    <tr>      <th>6</th>      <td>Surface Temperature:</td>      <td>-153 to 20 °C</td>    </tr>    <tr>      <th>7</th>      <td>First Record:</td>      <td>2nd millennium BC</td>    </tr>    <tr>      <th>8</th>      <td>Recorded By:</td>      <td>Egyptian astronomers</td>    <

In [19]:
df.to_html('table.html')

In [22]:
# !open table.html

### Mars Hemispheres

In [20]:
hemi_url = 'http://www.planetary.org/blogs/guest-blogs/bill-dunford/20140203-the-faces-of-mars.html'
browser.visit(hemi_url)

html = browser.html
hemi_soup = bs(html, 'html.parser')

# print(hemi_soup.prettify())


In [21]:
hemispheres = hemi_soup.find_all('img', class_='img840')
hemispheres

[<img alt="Mars: Valles Marineris Hemisphere" class="img840" src="http://planetary.s3.amazonaws.com/assets/images/4-mars/2014/20140202_valles_marineris_enhanced_f840.jpg"/>,
 <img alt="Mars: Syrtis Major Hemisphere" class="img840" src="http://planetary.s3.amazonaws.com/assets/images/4-mars/2014/20140202_syrtis_major_enhanced_f840.jpg"/>,
 <img alt="Mars: Cerberus Hemisphere " class="img840" src="http://planetary.s3.amazonaws.com/assets/images/4-mars/2014/20140202_cerberus_enhanced_f840.jpg"/>,
 <img alt="Mars: Schiaparelli Hemisphere " class="img840" src="http://planetary.s3.amazonaws.com/assets/images/4-mars/2014/20140202_schiaparelli_enhanced_f840.jpg"/>]

In [28]:
hemisphere_image_urls = []
for hemisphere in hemispheres:
    hemisphere_dict = {}
    hemisphere_dict["title"] = hemisphere['alt']
    hemisphere_dict["img_url"] = hemisphere['src']
    hemisphere_image_urls.append(hemisphere_dict)


In [29]:
hemisphere_image_urls

[{'title': 'Mars: Valles Marineris Hemisphere',
  'img_url': 'http://planetary.s3.amazonaws.com/assets/images/4-mars/2014/20140202_valles_marineris_enhanced_f840.jpg'},
 {'title': 'Mars: Syrtis Major Hemisphere',
  'img_url': 'http://planetary.s3.amazonaws.com/assets/images/4-mars/2014/20140202_syrtis_major_enhanced_f840.jpg'},
 {'title': 'Mars: Cerberus Hemisphere ',
  'img_url': 'http://planetary.s3.amazonaws.com/assets/images/4-mars/2014/20140202_cerberus_enhanced_f840.jpg'},
 {'title': 'Mars: Schiaparelli Hemisphere ',
  'img_url': 'http://planetary.s3.amazonaws.com/assets/images/4-mars/2014/20140202_schiaparelli_enhanced_f840.jpg'}]