In [7]:
from splinter import Browser
from bs4 import BeautifulSoup
from pprint import pprint
import pandas as pd
import pymongo

In [8]:
executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)

In [9]:
url = 'https://mars.nasa.gov/news/'
browser.visit(url)

In [10]:
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

news_titles = soup.find_all('div', class_ = "content_title")
news_p = soup.find_all('div', class_ = "article_teaser_body" )

mars_new_instance = {'news-title': news_titles[0].text, 'news-summary': news_p[0].text}

In [11]:
img_url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
browser.visit(img_url)
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

featured_image_url = soup.find('a', class_= "button fancybox")['data-fancybox-href']

In [12]:
featured_image_url = 'https://www.jpl.nasa.gov'+ featured_image_url
mars_new_instance['featured-image-url'] = featured_image_url

In [13]:
tw_url = 'https://twitter.com/marswxreport?lang=en'
browser.visit(tw_url)
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

In [14]:
latest_tweet = soup.find_all('p', class_="TweetTextSize TweetTextSize--normal js-tweet-text tweet-text")
mars_new_instance['current-weather'] = latest_tweet[0].text

In [15]:
sf_url = 'https://space-facts.com/mars/'
browser.visit(sf_url)
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

In [16]:
mars_attr = soup.find_all('td', class_="column-1")
mars_facts = soup.find_all('td', class_="column-2")
for attr, fact in zip(mars_attr, mars_facts):
    print(attr.text, fact.text)

Equatorial Diameter: 6,792 km

Polar Diameter: 6,752 km

Mass: 6.42 x 10^23 kg (10.7% Earth)
Moons: 2 (Phobos & Deimos)
Orbit Distance: 227,943,824 km (1.52 AU)
Orbit Period: 687 days (1.9 years)

Surface Temperature:  -153 to 20 °C
First Record: 2nd millennium BC
Recorded By: Egyptian astronomers


In [17]:
mars_facts_dict = {}
for attr, fact in zip(mars_attr, mars_facts):
    mars_facts_dict[attr.text.strip().split(':')[0]] = fact.text.strip()

mars_facts_dict

{'Equatorial Diameter': '6,792 km',
 'Polar Diameter': '6,752 km',
 'Mass': '6.42 x 10^23 kg (10.7% Earth)',
 'Moons': '2 (Phobos & Deimos)',
 'Orbit Distance': '227,943,824 km (1.52 AU)',
 'Orbit Period': '687 days (1.9 years)',
 'Surface Temperature': '-153 to 20 °C',
 'First Record': '2nd millennium BC',
 'Recorded By': 'Egyptian astronomers'}

In [18]:
facts_df = pd.DataFrame.from_dict(mars_facts_dict, orient = "index")
facts_df

Unnamed: 0,0
Equatorial Diameter,"6,792 km"
Polar Diameter,"6,752 km"
Mass,6.42 x 10^23 kg (10.7% Earth)
Moons,2 (Phobos & Deimos)
Orbit Distance,"227,943,824 km (1.52 AU)"
Orbit Period,687 days (1.9 years)
Surface Temperature,-153 to 20 °C
First Record,2nd millennium BC
Recorded By,Egyptian astronomers


In [19]:
facts_df.to_html(header = False)

'<table border="1" class="dataframe">\n  <tbody>\n    <tr>\n      <th>Equatorial Diameter</th>\n      <td>6,792 km</td>\n    </tr>\n    <tr>\n      <th>Polar Diameter</th>\n      <td>6,752 km</td>\n    </tr>\n    <tr>\n      <th>Mass</th>\n      <td>6.42 x 10^23 kg (10.7% Earth)</td>\n    </tr>\n    <tr>\n      <th>Moons</th>\n      <td>2 (Phobos &amp; Deimos)</td>\n    </tr>\n    <tr>\n      <th>Orbit Distance</th>\n      <td>227,943,824 km (1.52 AU)</td>\n    </tr>\n    <tr>\n      <th>Orbit Period</th>\n      <td>687 days (1.9 years)</td>\n    </tr>\n    <tr>\n      <th>Surface Temperature</th>\n      <td>-153 to 20 °C</td>\n    </tr>\n    <tr>\n      <th>First Record</th>\n      <td>2nd millennium BC</td>\n    </tr>\n    <tr>\n      <th>Recorded By</th>\n      <td>Egyptian astronomers</td>\n    </tr>\n  </tbody>\n</table>'

In [20]:
hemi_url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
browser.visit(hemi_url)
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

In [21]:
hemi_links = soup.find_all('a', class_= "itemLink product-item")
links = []
for a in hemi_links:
    hemi_url = 'https://astrogeology.usgs.gov'+a['href']
    if hemi_url not in links:
        links.append(hemi_url)
print(links)

['https://astrogeology.usgs.gov/search/map/Mars/Viking/cerberus_enhanced', 'https://astrogeology.usgs.gov/search/map/Mars/Viking/schiaparelli_enhanced', 'https://astrogeology.usgs.gov/search/map/Mars/Viking/syrtis_major_enhanced', 'https://astrogeology.usgs.gov/search/map/Mars/Viking/valles_marineris_enhanced']


In [22]:
import time
hemisphere_image_urls = []
for link in links:
    browser.visit(link)
    html = browser.html
    soup = BeautifulSoup(html, 'html.parser')
    img_dict = {}
    img_dict['title'] = soup.find('h2', class_ = "title").text.strip('Enhanced')
    img_dict['img_url'] = soup.find('a', target = "_blank")['href']
    hemisphere_image_urls.append(img_dict)
    time.sleep(3)

In [23]:
hemisphere_image_urls

[{'title': 'Cerberus Hemisphere ',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg'},
 {'title': 'Schiaparelli Hemisphere ',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg'},
 {'title': 'Syrtis Major Hemisphere ',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg'},
 {'title': 'Valles Marineris Hemisphere ',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg'}]

In [24]:
# Create connection variable
conn = 'mongodb://localhost:27017'

# Pass connection to the pymongo instance.
client = pymongo.MongoClient(conn)

# Connect to a database. Will create one if not already available.
db = client.mars_db

In [28]:
# Drops collection if available to remove duplicates
db.request_instances.drop()

# Creates a collection in the database and inserts two documents
db.request_instances.insert_one(mars_new_instance)

<pymongo.results.InsertOneResult at 0x26a93d1bc48>

In [30]:
# Drops collection if available to remove duplicates
db.facts.drop()

# Creates a collection in the database and inserts two documents
db.facts.insert_one(mars_facts_dict)

<pymongo.results.InsertOneResult at 0x26a93d958c8>

In [31]:
# Drops collection if available to remove duplicates
db.images.drop()

# Creates a collection in the database and inserts two documents
db.images.insert_many(hemisphere_image_urls)

<pymongo.results.InsertManyResult at 0x26a93d0a5c8>