In [1]:
# Dependencies
from bs4 import BeautifulSoup
import requests
import pymongo
from splinter import Browser
import pandas as pd
import time

In [34]:
# set the chromedriver path
executable_path = {"executable_path": "chromedriver.exe"}
browser = Browser("chrome", **executable_path, headless=False)

In [3]:
# Setup connection to mongodb
conn = "mongodb://localhost:27017"
client = pymongo.MongoClient(conn)

# Select database and collection to use
db = client.mars_db
mars_info = db.mars_info

In [4]:
mars_info = []

### Mars News

In [5]:
# URL of page to be scraped
mars_news_url = 'https://mars.nasa.gov/news/'

# Retrieve page with the requests module
mars_news_response = requests.get(mars_news_url)
# Create BeautifulSoup object; parse with 'lxml'
mars_news_html = BeautifulSoup(mars_news_response.text, 'lxml')

In [6]:
# Retrieve the parent divs for all articles
results = mars_news_html.find_all('div', class_='slide')

In [7]:
# Loop through results to retrieve article title, header, and timestamp of article
for result in results:
    news_t = result.find('div', class_='content_title').find('a').text.strip()

    news_p = result.find('div', class_='rollover_description_inner').text.strip()

    mars_info.append({'title': news_t, 'news': news_p})

In [8]:
db.mars_collection.insert_many(mars_info)

#print(mars_news)

<pymongo.results.InsertManyResult at 0x1f8f2d36948>

### JPL Mars Space Images - Featured Image

In [10]:
# Visit the url for JPL Featured Space Image [here](https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars).
jpl_base_url = 'https://www.jpl.nasa.gov'
mars_images_url = "https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars"
browser.visit(mars_images_url)

time.sleep(5)

In [11]:
xpath = '//footer//a[@id="full_image"]'

In [12]:
# Use splinter to navigate the site and find the image url for the current Featured Mars Image
results = browser.find_by_xpath(xpath)
mars_feat_image = results[0]
mars_feat_image.click()

In [13]:
# Scrape the browser into soup and use soup to find the full resolution image of mars
#assign the url string to a variable called `featured_image_url`.
html = browser.html
jpl_soup = BeautifulSoup(html, 'html.parser')
featured_image_url = jpl_soup.find("img", class_="fancybox-image")["src"]

featured_image_url = jpl_base_url + featured_image_url

In [14]:
db.mars_collection.insert_one({"feat_img_url":featured_image_url})

<pymongo.results.InsertOneResult at 0x1f8f2e4e488>

### Mars Weather

In [15]:
#mars weather twitter feed
mars_weather_twitter_url = 'https://twitter.com/marswxreport?lang=en'

# Retrieve page with the requests module
mars_weather_response = requests.get(mars_weather_twitter_url)

# Create BeautifulSoup object; 
mars_weather_html = BeautifulSoup(mars_weather_response.text, 'html.parser')

In [16]:
timeline = mars_weather_html.select('#timeline li.stream-item')

mars_weather = timeline[0].select('p.tweet-text')[0].get_text()

#mars_weather

In [17]:
db.mars_collection.insert_one({"weather_tweet":mars_weather})

<pymongo.results.InsertOneResult at 0x1f8f2e583c8>

### Mars Facts

In [18]:
# Visit the Mars Facts webpage [here](https://space-facts.com/mars/) 
mars_facts_url = 'https://space-facts.com/mars/'


In [19]:
# use Pandas to scrape the table containing facts about the planet including Diameter, Mass, etc.
tables = pd.read_html(mars_facts_url)
tables

[  Mars - Earth Comparison             Mars            Earth
 0               Diameter:         6,779 km        12,742 km
 1                   Mass:  6.39 × 10^23 kg  5.97 × 10^24 kg
 2                  Moons:                2                1
 3      Distance from Sun:   227,943,824 km   149,598,262 km
 4         Length of Year:   687 Earth days      365.24 days
 5            Temperature:    -153 to 20 °C      -88 to 58°C,
                       0                              1
 0  Equatorial Diameter:                       6,792 km
 1       Polar Diameter:                       6,752 km
 2                 Mass:  6.39 × 10^23 kg (0.11 Earths)
 3                Moons:            2 (Phobos & Deimos)
 4       Orbit Distance:       227,943,824 km (1.38 AU)
 5         Orbit Period:           687 days (1.9 years)
 6  Surface Temperature:                   -87 to -5 °C
 7         First Record:              2nd millennium BC
 8          Recorded By:           Egyptian astronomers]

In [20]:
#grab the second table
mars_facts_df = tables[1]

#transpose the table
#mars_facts_df = mars_facts_df.T

#use the first row as the column names
#mars_facts_df = mars_facts_df.rename(columns=mars_facts_df.iloc[0]).drop(mars_facts_df.index[0])

#cleanup column names
#mars_facts_df.columns = mars_facts_df.columns.str.strip().str.lower().str.replace(' ', '_').str.replace(':', '')

db.mars_collection.insert_one({"mars_facts":mars_facts_df.to_html(header=False, classes='table')})

<pymongo.results.InsertOneResult at 0x1f8f444da08>

### Mars Hemispheres

#Visit the USGS Astrogeology site [here](https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars) to obtain high resolution images for each of Mar's hemispheres.

#You will need to click each of the links to the hemispheres in order to find the image url to the full resolution image.

#Save both the image url string for the full resolution hemisphere image, and the Hemisphere title containing the hemisphere name. Use a Python dictionary to store the data using the keys `img_url` and `title`.

#Append the dictionary with the image url string and the hemisphere title to a list. This list will contain one dictionary for each hemisphere.

In [56]:
base_usgs_url = 'https://astrogeology.usgs.gov'
usgs_url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
browser.visit(usgs_url)

time.sleep(5)

In [37]:
html = browser.html
usgs_html = BeautifulSoup(html, 'html.parser')

#featured_image_url = jpl_soup.find("img", class_="fancybox-image")["src"]
#featured_image_url = jpl_base_url + featured_image_url

In [62]:
hemi_image_urls = usgs_html.find_all('div', class_='item')

In [63]:
hemi_image_urls

[<div class="item"><a class="itemLink product-item" href="/search/map/Mars/Viking/cerberus_enhanced"><img alt="Cerberus Hemisphere Enhanced thumbnail" class="thumb" src="/cache/images/dfaf3849e74bf973b59eb50dab52b583_cerberus_enhanced.tif_thumb.png"/></a><div class="description"><a class="itemLink product-item" href="/search/map/Mars/Viking/cerberus_enhanced"><h3>Cerberus Hemisphere Enhanced</h3></a><span class="subtitle" style="float:left">image/tiff 21 MB</span><span class="pubDate" style="float:right"></span><br/><p>Mosaic of the Cerberus hemisphere of Mars projected into point perspective, a view similar to that which one would see from a spacecraft. This mosaic is composed of 104 Viking Orbiter images acquired…</p></div> <!-- end description --></div>,
 <div class="item"><a class="itemLink product-item" href="/search/map/Mars/Viking/schiaparelli_enhanced"><img alt="Schiaparelli Hemisphere Enhanced thumbnail" class="thumb" src="/cache/images/7677c0a006b83871b5a2f66985ab5857_schiapa

In [57]:
for i in hemi_image_urls:
    hemi_img_url = base_usgs_url + i['src']
    print(hemi_img_url)

https://astrogeology.usgs.gov/cache/images/dfaf3849e74bf973b59eb50dab52b583_cerberus_enhanced.tif_thumb.png
https://astrogeology.usgs.gov/cache/images/7677c0a006b83871b5a2f66985ab5857_schiaparelli_enhanced.tif_thumb.png
https://astrogeology.usgs.gov/cache/images/aae41197e40d6d4f3ea557f8cfe51d15_syrtis_major_enhanced.tif_thumb.png
https://astrogeology.usgs.gov/cache/images/04085d99ec3713883a9a57f42be9c725_valles_marineris_enhanced.tif_thumb.png
