In [96]:
# Dependencies
import os
import pandas as pd
from bs4 import BeautifulSoup as bs
import urllib.request
import requests
import pymongo
from selenium import webdriver
from splinter import Browser

In [97]:
# Initialize PyMongo to work with MongoDBs
conn = 'mongodb://localhost:27017'
client = pymongo.MongoClient(conn)

# Define database and collection
db = client.mars_db


# NASA Mars News Scrape

In [133]:
# Import HTML Code
html = urllib.request.urlopen('https://mars.nasa.gov/news/').read()
soup = bs(html, 'html.parser')

marsnews_db = db.marsnews


In [134]:
# Print all title texts
news_title = soup.find_all('div', class_='content_title')
for title in news_title:
    news_tit = title.text.strip()
    marsnews = {'title': news_tit
            }
    marsnews_db.insert_one(marsnews)

In [135]:
# Print all paragraph texts
news_p = soup.find_all('div', class_='rollover_description_inner')
for p in news_p:
    text = p.text.strip()
    marstext = {'text': text
            }
    marsnews_db.insert_one(marstext)

# JPL Mars Space Images - Featured Image

In [136]:
jplimg_db = db.jplimg

# Import Splinter and set the chromedriver path
from splinter import Browser
executable_path = {"executable_path": "./chromedriver"}
browser = Browser("chrome", **executable_path, headless=False)

In [137]:
# Visit the following URL
url = "https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars"
browser.visit(url)

In [138]:
html = browser.html
soup = bs(html, 'html.parser')
articles = soup.find('a', class_ = 'button fancybox')
browser.click_link_by_partial_text('FULL')
xpath = '//*[@id="fancybox-lock"]/div/div[1]/img' # simple, right?

In [139]:
# Use splinter to Click the "Mars in natural color in 2007" image 
# to bring up the full resolution image
results = browser.find_by_xpath(xpath)
img = results['src']
featuredImage = {'featuredImage': img
                }
jplimg_db.insert_one(featuredImage)

<pymongo.results.InsertOneResult at 0x23dcd877c08>

In [140]:
# Use the requests library to download and save the image from the `img_url` above
import requests
import shutil
response = requests.get(img, stream=True)
with open('img.png', 'wb') as out_file:
    shutil.copyfileobj(response.raw, out_file)

In [141]:
# Display the image with IPython.display
from IPython.display import Image
Image(url='img.png')

# Mars Weather

In [152]:
html = urllib.request.urlopen('https://twitter.com/marswxreport?lang=en').read()
soup = bs(html, 'html.parser')

marsweather_db = db.marsweather

weather = soup.find("div", class_="js-tweet-text-container").text
text = p.text.strip()
marsweather = {'weather': weather
            }
marsweather_db.insert_one(marsweather)

<pymongo.results.InsertOneResult at 0x23dce847b88>

# Mars Facts

In [153]:
marsfacts_db = db.marsfacts

# Import HTML Code
url = 'https://space-facts.com/mars/'

In [154]:
tables = pd.read_html(url)
tables[0]

Unnamed: 0,0,1
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.42 x 10^23 kg (10.7% Earth)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.52 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-153 to 20 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [155]:
df = tables[0]

In [156]:
df.columns = ['','']
df.head()

Unnamed: 0,Unnamed: 1,Unnamed: 2
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.42 x 10^23 kg (10.7% Earth)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.52 AU)"


In [None]:
df.set_index(1)
df.head()

# Mars Hemisperes

In [89]:
# Import Splinter and set the chromedriver path
from splinter import Browser
executable_path = {"executable_path": "./chromedriver"}
browser = Browser("chrome", **executable_path, headless=False)
url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
browser.visit(url)

In [90]:
html = browser.html
soup = bs(html, 'html.parser')
hemispheres = soup.find('div', class_ = 'collapsible results')

In [91]:
title = []
img_url = []
hemispheres = ["Cerberus","Schiaparelli","Syrtis","Valles"]

# Define database and collection
#db = client.hemispheres_db
hemisphere_db = db.hemispheres 

In [93]:
for hemisphere in hemispheres:
    browser.click_link_by_partial_text(hemisphere) 
    html = browser.html
    soup = bs(html, 'html.parser')
    img = soup.find("div", class_="downloads").ul.li.a["href"]
    text = soup.body.find('h2').text
    post = {'title': text,
            'img_url': img
            }
    hemisphere_db.insert_one(post)
    browser.click_link_by_partial_text('Back')



In [94]:
hemisphere_db

Collection(Database(MongoClient(host=['localhost:27017'], document_class=dict, tz_aware=False, connect=True), 'hemispheres_db'), 'hemispheres')

In [95]:
# Display items in MongoDB collection
listings = db.hemispheres.find()

for listing in listings:
    print(listing)

{'_id': ObjectId('5a6a8284a10d9249f0121b1d'), 'title': 'Cerberus Hemisphere Enhanced', 'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg'}
{'_id': ObjectId('5a6a8288a10d9249f0121b1e'), 'title': 'Schiaparelli Hemisphere Enhanced', 'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg'}
{'_id': ObjectId('5a6a828da10d9249f0121b1f'), 'title': 'Syrtis Major Hemisphere Enhanced', 'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg'}
{'_id': ObjectId('5a6a8291a10d9249f0121b20'), 'title': 'Valles Marineris Hemisphere Enhanced', 'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg'}
