<b>Import Dependencies

In [1]:
from bs4 import BeautifulSoup
import requests
import pymongo

from splinter import Browser

import pandas as pd

<h1>NASA Mars News

In [2]:
# Initialize PyMongo to work with MongoDBs
conn = 'mongodb://localhost:27017'
client = pymongo.MongoClient(conn)

In [3]:
# Define database and collection
db = client.m2mars_db
collection = db.mars_news

In [4]:
# URL of page to be scraped
news_url = 'https://mars.nasa.gov/news/'
# Retrieve page with the requests module
response = requests.get(news_url)
# Create BeautifulSoup object; parse with 'lxml'
soup = BeautifulSoup(response.text, 'lxml')

In [5]:
title = soup.find('div', class_='content_title').find('a').text
news_title = title.replace("\n","")


In [6]:
paragraph = soup.find('div', class_='rollover_description_inner').text
news_paragraph = paragraph.replace("\n","")


<h1>JPL Mars Space Images - Featured Image

In [7]:
executable_path = {'executable_path': '/usr/local/bin/chromedriver'}
browser = Browser('chrome', **executable_path, headless=False)

In [8]:
img_url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
browser.visit(img_url)

In [9]:
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

browser.click_link_by_partial_text('FULL IMAGE')

In [11]:
browser.find_by_css('a.fancybox-expand').first.click()

In [12]:
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

In [13]:
full_image = soup.find('div', class_='fancybox-inner').find('img')['src']

In [14]:
featured_image_url = f'https://www.jpl.nasa.gov{full_image}'


<h1>Mars Weather

In [15]:
# URL of page to be scraped
weather_url = 'https://twitter.com/marswxreport?lang=en'
# Retrieve page with the requests module
response = requests.get(weather_url)
# Create BeautifulSoup object; parse with 'lxml'
soup = BeautifulSoup(response.text, 'lxml')

In [16]:
tweets = soup.find_all('div', class_="js-tweet-text-container")

In [17]:
notweather = []
weathertweet = []

for tweet in tweets:
    weather_tweet = tweet.find('p', class_="tweet-text").text
    check_tweet = weather_tweet.startswith('InSight')
    if check_tweet is False:
        notweather.append(weather_tweet)
    elif check_tweet is True:
        weathertweet.append(weather_tweet)
        break
first_tweet = weathertweet[0]

In [18]:
mars_weather = first_tweet.replace("\n"," ").replace("InSight sol","Sol")


<h1>Mars Facts

In [19]:
facts_url = 'https://space-facts.com/mars/'

In [20]:
tables = pd.read_html(facts_url)
# tables

In [21]:
facts_table = tables[1]
# facts_table

In [22]:
fact_df = facts_table.rename(columns={0: "fact_type", 1: "information"})


In [23]:
fact_dict = {}
for x in range (0,9):
    into_dict = {fact_df['fact_type'][x]:fact_df['information'][x]}
    fact_dict.update(into_dict)


In [24]:
html_table = fact_df.to_html()


In [25]:
html_table.replace('\n', '')

'<table border="1" class="dataframe">  <thead>    <tr style="text-align: right;">      <th></th>      <th>fact_type</th>      <th>information</th>    </tr>  </thead>  <tbody>    <tr>      <th>0</th>      <td>Equatorial Diameter:</td>      <td>6,792 km</td>    </tr>    <tr>      <th>1</th>      <td>Polar Diameter:</td>      <td>6,752 km</td>    </tr>    <tr>      <th>2</th>      <td>Mass:</td>      <td>6.39 × 10^23 kg (0.11 Earths)</td>    </tr>    <tr>      <th>3</th>      <td>Moons:</td>      <td>2 (Phobos &amp; Deimos)</td>    </tr>    <tr>      <th>4</th>      <td>Orbit Distance:</td>      <td>227,943,824 km (1.38 AU)</td>    </tr>    <tr>      <th>5</th>      <td>Orbit Period:</td>      <td>687 days (1.9 years)</td>    </tr>    <tr>      <th>6</th>      <td>Surface Temperature:</td>      <td>-87 to -5 °C</td>    </tr>    <tr>      <th>7</th>      <td>First Record:</td>      <td>2nd millennium BC</td>    </tr>    <tr>      <th>8</th>      <td>Recorded By:</td>      <td>Egyptian astr

In [None]:
# fact_df.to_html('mars_facts.html')

In [None]:
# !open mars_facts.html

<h1>Mars Hemispheres

In [26]:
executable_path = {'executable_path': '/usr/local/bin/chromedriver'}
browser = Browser('chrome', **executable_path, headless=False)

In [27]:
hem_url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
browser.visit(hem_url)

In [28]:
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

In [29]:
hemispheres = []

desc = soup.find_all('div', class_="description")

for x in desc:
    titles = x.find('a', class_="product-item").text
    hem_title = titles.replace(' Enhanced','')
    hemispheres.append(hem_title)
    


In [30]:
hem_images = []

for hemisphere in hemispheres:
    browser.visit(hem_url)
    html = browser.html
    soup = BeautifulSoup(html, 'html.parser')
    
    browser.click_link_by_partial_text(hemisphere)
    
    html = browser.html
    soup = BeautifulSoup(html, 'html.parser')
    
    hem_img = soup.find('div', class_="downloads").find('a')['href']
    hem_images.append(hem_img)
    


In [31]:
hemisphere_image_urls = []

for x in range(0,4):
    dictn = {"title":hemispheres[x],"img_url":hem_images[x]}
    hemisphere_image_urls.append(dictn)



<h1>Scrapes --> dictionary

In [32]:
mars_combined = {"news": {"title" : news_title, "paragraph": news_paragraph},
                "featured_img": featured_image_url,
                "current_weather": mars_weather,
                "facts": fact_dict,
                "hemispheres": hemisphere_image_urls}

In [33]:
mars_combined

{'news': {'title': 'NASA Invites Students to Name Mars 2020 Rover',
  'paragraph': "Through Nov. 1, K-12 students in the U.S. are encouraged to enter an essay contest to name NASA's next Mars rover."},
 'featured_img': 'https://www.jpl.nasa.gov/spaceimages/images/mediumsize/PIA18429_ip.jpg',
 'current_weather': 'Sol 265 (2019-08-25) low -99.4ºC (-146.9ºF) high -26.3ºC (-15.3ºF) winds from the SSE at 5.3 m/s (12.0 mph) gusting to 16.1 m/s (35.9 mph) pressure at 7.50 hPapic.twitter.com/9YLawm67zS',
 'facts': {'Equatorial Diameter:': '6,792 km',
  'Polar Diameter:': '6,752 km',
  'Mass:': '6.39 × 10^23 kg (0.11 Earths)',
  'Moons:': '2 (Phobos & Deimos)',
  'Orbit Distance:': '227,943,824 km (1.38 AU)',
  'Orbit Period:': '687 days (1.9 years)',
  'Surface Temperature:': '-87 to -5 °C',
  'First Record:': '2nd millennium BC',
  'Recorded By:': 'Egyptian astronomers'},
 'hemispheres': [{'title': 'Cerberus Hemisphere',
   'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Vik