In [1]:
# Dependencies
import os
from bs4 import BeautifulSoup as bs
import requests
import pymongo
import time
from splinter import Browser
import pandas as pd

## Mars News 

In [2]:
# URL of page to be scraped
url_news = 'https://mars.nasa.gov/news/'

In [3]:
# Setting up splinter
executable_path = {'executable_path': 'chromedriver'}
browser = Browser('chrome', **executable_path)
browser.visit(url_news)

In [4]:
# Retrieve page with the requests module
response = requests.get(url_news)
# Create BeautifulSoup object; parse with 'lxml'
soup = bs(response.text, 'lxml')

In [5]:
# Latest News Title from NASA Mars News Site
titles = soup.find_all('div', class_='content_title')
news_title = titles[0].text
print(news_title)



Opportunity Hunkers Down During Dust Storm




In [6]:
# Latest News Paragraph Text from NASA Mars News Site
paragraphs = soup.find_all('div', class_="rollover_description_inner")
news_p = paragraphs[0].text
print(news_p)


It's the beginning of the end for the planet-encircling dust storm on Mars. But it could still be weeks, or even months, before skies are clear enough for NASA's Opportunity rover to recharge its batteries and phone home. 



In [None]:
browser.quit()

## Mars Featured Image

In [7]:
# URL of JPL Mars Space Image to be scraped for featured image
url_images = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'

# Setting up splinter
executable_path = {'executable_path': 'chromedriver'}
browser = Browser('chrome', **executable_path)
browser.visit(url_images)

In [8]:
# Browse through the pages# Brows 
time.sleep(5)

# Find and click the full image button
full_image_elem = browser.find_by_id('full_image')
full_image_elem.click()

In [9]:
time.sleep(5)

# Find the more info button and click that# Find  
more_info_elem = browser.find_link_by_partial_text('more info')
more_info_elem.click()

In [10]:
# Using BeautifulSoup create an object and parse with 'html.parser'# Using 
html = browser.html
img_soup = bs(html, 'html.parser')

In [11]:
# find the relative image url# find  
img_url_rel = img_soup.find('figure', class_='lede').find('img')['src']
img_url_rel

'/spaceimages/images/largesize/PIA16884_hires.jpg'

In [12]:
# Use the base url to create an absolute url
JPL_link = 'https://www.jpl.nasa.gov'
featured_image_url = JPL_link + img_url_rel
featured_image_url

'https://www.jpl.nasa.gov/spaceimages/images/largesize/PIA16884_hires.jpg'

In [21]:
browser.quit()

## Mars Weather

In [13]:
url_twitter = "https://twitter.com/marswxreport?lang=en"
response = requests.get(url_twitter)
soup = bs(response.text, 'html.parser')

result = soup.find('div', class_="js-tweet-text-container")

print(result)

<div class="js-tweet-text-container">
<p class="TweetTextSize TweetTextSize--normal js-tweet-text tweet-text" data-aria-label-part="0" lang="en">Sol 2169 (2018-09-12), high -10C/14F, low -70C/-93F, pressure at 8.82 hPa, daylight 05:41-17:58</p>
</div>


In [14]:
weather = result.p.text
print(weather)

Sol 2169 (2018-09-12), high -10C/14F, low -70C/-93F, pressure at 8.82 hPa, daylight 05:41-17:58


## Mars Facts

In [15]:
url_facts = "http://space-facts.com/mars/"

facts = pd.read_html(url_facts)[0]


print(facts)

                      0                              1
0  Equatorial Diameter:                       6,792 km
1       Polar Diameter:                       6,752 km
2                 Mass:  6.42 x 10^23 kg (10.7% Earth)
3                Moons:            2 (Phobos & Deimos)
4       Orbit Distance:       227,943,824 km (1.52 AU)
5         Orbit Period:           687 days (1.9 years)
6  Surface Temperature:                  -153 to 20 °C
7         First Record:              2nd millennium BC
8          Recorded By:           Egyptian astronomers


In [16]:
facts.columns=['description', 'value']
#facts.set_index('description', inplace=True)
facts.index.name = None

print(facts)

            description                          value
0  Equatorial Diameter:                       6,792 km
1       Polar Diameter:                       6,752 km
2                 Mass:  6.42 x 10^23 kg (10.7% Earth)
3                Moons:            2 (Phobos & Deimos)
4       Orbit Distance:       227,943,824 km (1.52 AU)
5         Orbit Period:           687 days (1.9 years)
6  Surface Temperature:                  -153 to 20 °C
7         First Record:              2nd millennium BC
8          Recorded By:           Egyptian astronomers


In [17]:
# Convert the dataframe to HTML table string# Conver 
mars_facts_html = facts.to_html()
mars_facts_html

'<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: right;">\n      <th></th>\n      <th>description</th>\n      <th>value</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>Equatorial Diameter:</td>\n      <td>6,792 km</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>Polar Diameter:</td>\n      <td>6,752 km</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>Mass:</td>\n      <td>6.42 x 10^23 kg (10.7% Earth)</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>Moons:</td>\n      <td>2 (Phobos &amp; Deimos)</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>Orbit Distance:</td>\n      <td>227,943,824 km (1.52 AU)</td>\n    </tr>\n    <tr>\n      <th>5</th>\n      <td>Orbit Period:</td>\n      <td>687 days (1.9 years)</td>\n    </tr>\n    <tr>\n      <th>6</th>\n      <td>Surface Temperature:</td>\n      <td>-153 to 20 °C</td>\n    </tr>\n    <tr>\n      <th>7</th>\n      <td>First Record:</td>\n      <td>2nd millennium

## Mars Hemispheres

In [18]:
# Scraping of USGS  
url_USGS = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'

# Setting up splinter
executable_path = {'executable_path': 'chromedriver'}
browser = Browser('chrome', **executable_path)
browser.visit(url_USGS)

html = browser.html
soup = bs(html, 'html.parser')

In [19]:
url_base = "https://astrogeology.usgs.gov"
result = soup.find_all('div', class_="item")

url_list = []

for y in result:
    link = y.find('a')['href']
    url_list.append(link)
    
print(url_list)


# Create an empty list to hold dictionaries of hemisphere title with the image url string# Create 
hemisphere_image_urls = []

for x in url_list:
    url = url_base + x
        
    browser.visit(url)
    
    # Sleep script to ensure the page fully loads
    time.sleep(5)
    
    soup = bs(browser.html, 'html.parser')
    
    # Grab image url
    result_image = soup.find('img', class_="wide-image")
    image = url_base + result_image["src"]
    
    # Grab page title and remove "Enhanced" from string
    result_title = soup.find('h2', class_='title')
    title = result_title.text
    title = title.rsplit(' ', 1)[0]
    
    diction = {"Title": title, "Image URL": image}
    hemisphere_image_urls.append(diction)
    
    time.sleep(10)
    
print(hemisphere_image_urls)

['/search/map/Mars/Viking/cerberus_enhanced', '/search/map/Mars/Viking/schiaparelli_enhanced', '/search/map/Mars/Viking/syrtis_major_enhanced', '/search/map/Mars/Viking/valles_marineris_enhanced']
[{'Title': 'Cerberus Hemisphere', 'Image URL': 'https://astrogeology.usgs.gov/cache/images/cfa62af2557222a02478f1fcd781d445_cerberus_enhanced.tif_full.jpg'}, {'Title': 'Schiaparelli Hemisphere', 'Image URL': 'https://astrogeology.usgs.gov/cache/images/3cdd1cbf5e0813bba925c9030d13b62e_schiaparelli_enhanced.tif_full.jpg'}, {'Title': 'Syrtis Major Hemisphere', 'Image URL': 'https://astrogeology.usgs.gov/cache/images/ae209b4e408bb6c3e67b6af38168cf28_syrtis_major_enhanced.tif_full.jpg'}, {'Title': 'Valles Marineris Hemisphere', 'Image URL': 'https://astrogeology.usgs.gov/cache/images/7cf2da4bf549ed01c17f206327be4db7_valles_marineris_enhanced.tif_full.jpg'}]


In [20]:
browser.quit()