# Mars Analysis

## Dependencies

In [53]:
#Dependencies
from bs4 import BeautifulSoup as bs
import requests
from splinter import Browser
import pymongo

import tweepy
from config import api_key as consumer_key
from config import api_secret as consumer_secret
from config import access_token as access_token
from config import access_secret as access_token_secret

import pandas as pd
import shutil

from flask import Flask, render_template, render_template_string

##  Finding the first article on Nasa.gov

In [54]:
#Initial URL
mars_url = "https://mars.nasa.gov/news/"

In [55]:
#Pulling down URL for Mars News site
response = requests.get(mars_url)

In [56]:
#Creating a beautiful soup object for the mars news site
soup = bs(response.text, 'lxml')

In [57]:
#Checking the contents of the beautiful soup object
print(soup.body.prettify())

<body id="news">
 <div id="main_container">
  <div id="site_body">
   <div class="site_header_area">
    <header class="site_header">
     <div class="brand_area">
      <div class="brand1">
       <a class="nasa_logo" href="http://www.nasa.gov" target="_blank" title="visit nasa.gov">
        NASA
       </a>
      </div>
      <div class="brand2">
       <a class="top_logo" href="https://science.nasa.gov/" target="_blank" title="Explore NASA Science">
        NASA Science
       </a>
       <a class="sub_logo" href="/mars-exploration/#" title="Mars">
        Mars Exploration Program
       </a>
      </div>
      <img alt="" class="print_only print_logo" src="/assets/logo_nasa_trio_black@2x.png"/>
     </div>
     <a class="visuallyhidden focusable" href="#page">
      Skip Navigation
     </a>
     <div class="right_header_container">
      <a class="menu_button" href="javascript:void(0);" id="menu_button">
       <span class="menu_icon">
        menu
       </span>
      </a>
      

In [63]:
#Grabbing necessary content
#Please note - this grabs the first item in the carousel at the bottom
#Beautiful soup does not seem able to scrape some content rendered dynamically in the webpage in this instance
news_title = soup.find('div', class_= 'content_title').text.strip()
news_body = soup.find('div', class_ = 'image_and_description_container').text.strip()
news_href = mars_url[:-2] + soup.find('div', class_= 'content_title').a['href']

In [64]:
news_href

'https://mars.nasa.gov/new/news/8340/demonstration-proves-nuclear-fission-system-can-provide-space-exploration-power/'

## Finding the featured image

In [7]:
#defining the path for the next web scrape
jpl_url = "https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars"

In [8]:
#reading the featured URL
response = requests.get(jpl_url)

In [9]:
#instantiating a beautiful soup object
#NOTE - I know this is not explicitly required, but I wanted the opportunity to practice and to identify the target for splinter
soup = bs(response.text, 'lxml')

In [10]:
#checking the structure of the soup object
print(soup.prettify())

<!DOCTYPE html>
<!--[if IE 9]> <html class="no-js ie ie9" lang="en"> <![endif]-->
<!--[if IE 8]> <html class="no-js ie ie8" lang="en"> <![endif]-->
<html>
 <!-- START HEADER: "DEFAULT" -->
 <head>
  <meta charset="utf-8"/>
  <!-- Always force latest IE rendering engine or request Chrome Frame -->
  <meta content="IE=edge,chrome=1" http-equiv="X-UA-Compatible"/>
  <meta content="width=device-width, initial-scale=1.0, maximum-scale=1.0, user-scalable=no" name="viewport"/>
  <title>
   Space Images
  </title>
  <link href="/assets/stylesheets/manifest.css" media="all" rel="stylesheet" type="text/css"/>
  <link href="/assets/stylesheets/print.css" media="print" rel="stylesheet" type="text/css"/>
  <script src="/assets/javascripts/public_manifest.js" type="text/javascript">
  </script>
  <script src="/assets/javascripts/vendor/jquery.fancybox.js" type="text/javascript">
  </script>
  <script src="/assets/javascripts/vendor/jquery.fancybox-thumbs.js" type="text/javascript">
  </script>
 </he

In [11]:
#Instantiating a splinter browser object
executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)

In [12]:
#Use the browser to visit the specified URL
browser.visit(jpl_url)

In [13]:
#follow the link for the button with "Full Image" text
browser.click_link_by_partial_text('FULL IMAGE')

In [14]:
#find the featured image url by looping through the img tags and pulling out the one only with teh class of "fancybox-image"
results = browser.find_by_tag('img')
for result in results:
    if result['class'] == "fancybox-image":
        featured_image_url = result['src']
        
print(featured_image_url)

https://www.jpl.nasa.gov/spaceimages/images/mediumsize/PIA01320_ip.jpg


## Find Twitter Data

In [15]:
#Tweepy OAuth dependencies
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth, parser = tweepy.parsers.JSONParser())

In [16]:
#Defining the target user
target_user = "marswxreport"

In [17]:
#gettig all the tweets from the target user
public_tweets = api.user_timeline(target_user)

In [18]:
#finding the last tweet & saving as a variable called "mars_weather"
mars_weather = public_tweets[0]['text']
print(mars_weather)

Sol 2062 (May 25, 2018), Sunny, high 2C/35F, low -72C/-97F, pressure at 7.45 hPa, daylight 05:19-17:20


## PULL DATAFRAME FROM MARS SITE & READ BACK TO HTML

In [19]:
# creating an object to hold the url data
mars_facts_url = "http://space-facts.com/mars/"

In [20]:
# Reading the url into a dataframe
mars_data = pd.read_html(mars_facts_url)

In [21]:
# taking the dataframe out of the list
mars_df = mars_data[0]

In [22]:
#renaming the columns
mars_df = mars_df.rename(columns = {0:'Metrics', 1: 'Parameters'})

In [23]:
# checking the data
mars_df.set_index('Metrics', inplace = True)
mars_df

Unnamed: 0_level_0,Parameters
Metrics,Unnamed: 1_level_1
Equatorial Diameter:,"6,792 km"
Polar Diameter:,"6,752 km"
Mass:,6.42 x 10^23 kg (10.7% Earth)
Moons:,2 (Phobos & Deimos)
Orbit Distance:,"227,943,824 km (1.52 AU)"
Orbit Period:,687 days (1.9 years)
Surface Temperature:,-153 to 20 °C
First Record:,2nd millennium BC
Recorded By:,Egyptian astronomers


In [24]:
#convert mars dataframe to html
mars_data_html = mars_df.to_html()

In [25]:
#Checking mars data html
print(mars_data_html)

<table border="1" class="dataframe">
  <thead>
    <tr style="text-align: right;">
      <th></th>
      <th>Parameters</th>
    </tr>
    <tr>
      <th>Metrics</th>
      <th></th>
    </tr>
  </thead>
  <tbody>
    <tr>
      <th>Equatorial Diameter:</th>
      <td>6,792 km</td>
    </tr>
    <tr>
      <th>Polar Diameter:</th>
      <td>6,752 km</td>
    </tr>
    <tr>
      <th>Mass:</th>
      <td>6.42 x 10^23 kg (10.7% Earth)</td>
    </tr>
    <tr>
      <th>Moons:</th>
      <td>2 (Phobos &amp; Deimos)</td>
    </tr>
    <tr>
      <th>Orbit Distance:</th>
      <td>227,943,824 km (1.52 AU)</td>
    </tr>
    <tr>
      <th>Orbit Period:</th>
      <td>687 days (1.9 years)</td>
    </tr>
    <tr>
      <th>Surface Temperature:</th>
      <td>-153 to 20 °C</td>
    </tr>
    <tr>
      <th>First Record:</th>
      <td>2nd millennium BC</td>
    </tr>
    <tr>
      <th>Recorded By:</th>
      <td>Egyptian astronomers</td>
    </tr>
  </tbody>
</table>


## Astrogeology

In [26]:
#astrogeology url
astro_url = "https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars"

In [27]:
#Reading the url
response = requests.get(astro_url)

In [28]:
# turning the htrml response to a Beautiful Soup object
soup = bs(response.text, 'lxml')

In [29]:
#checking the soup object
print(soup.prettify())

<!DOCTYPE html>
<html lang="en">
 <head>
  <link href="//ajax.googleapis.com/ajax/libs/jqueryui/1.11.3/themes/smoothness/jquery-ui.css" rel="stylesheet" type="text/css"/>
  <script src="https://ajax.googleapis.com/ajax/libs/jquery/1.11.1/jquery.min.js" type="text/javascript">
  </script>
  <title>
   Astropedia Search Results | USGS Astrogeology Science Center
  </title>
  <meta content="USGS Astrogeology Science Center Astropedia search results." name="description"/>
  <meta content="USGS,Astrogeology Science Center,Cartography,Geology,Space,Geological Survey,Mapping" name="keywords"/>
  <meta content="IE=edge" http-equiv="X-UA-Compatible"/>
  <meta content="text/html; charset=utf-8" http-equiv="Content-Type"/>
  <meta content="width=device-width, initial-scale=1, maximum-scale=1" name="viewport"/>
  <meta content="x61hXXVj7wtfBSNOPnTftajMsZ5yB2W-qRoyr7GtOKM" name="google-site-verification"/>
  <!--<link rel="stylesheet" href="http://fonts.googleapis.com/css?family=Open+Sans:400italic

In [30]:
#Instantiating a splinter browser object
executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)

In [31]:
#Use the browser to visit the specified URL
browser.visit(astro_url)

In [32]:
#find the links to the thumbnails
html = browser.html
soup = bs(html, 'lxml')
base_path = 'https://astrogeology.usgs.gov'
thumb_urls = []
img_url = soup.find_all("img", class_= "thumb")
for url in img_url:
    thumb_urls.append(('https://astrogeology.usgs.gov' + url['src']))
thumb_urls

['https://astrogeology.usgs.gov/cache/images/dfaf3849e74bf973b59eb50dab52b583_cerberus_enhanced.tif_thumb.png',
 'https://astrogeology.usgs.gov/cache/images/7677c0a006b83871b5a2f66985ab5857_schiaparelli_enhanced.tif_thumb.png',
 'https://astrogeology.usgs.gov/cache/images/aae41197e40d6d4f3ea557f8cfe51d15_syrtis_major_enhanced.tif_thumb.png',
 'https://astrogeology.usgs.gov/cache/images/04085d99ec3713883a9a57f42be9c725_valles_marineris_enhanced.tif_thumb.png']

In [34]:
i = 1
for url in thumb_urls:
    response = requests.get(url, stream = True)
    with open(f"static\Thumb{i}.png", 'wb') as out_file:
        shutil.copyfileobj(response.raw, out_file)
    i += 1

In [35]:
#go to the specified path, find the links to the hi-res images, and put in a list.
base_path = 'https://astrogeology.usgs.gov'
image_links = []
results = soup.find_all('div', class_='item')
for result in results:
    tag = base_path + result.a['href']
    image_links.append(tag)
print(image_links)

['https://astrogeology.usgs.gov/search/map/Mars/Viking/cerberus_enhanced', 'https://astrogeology.usgs.gov/search/map/Mars/Viking/schiaparelli_enhanced', 'https://astrogeology.usgs.gov/search/map/Mars/Viking/syrtis_major_enhanced', 'https://astrogeology.usgs.gov/search/map/Mars/Viking/valles_marineris_enhanced']


In [36]:
#visit each url and grab the required information
hemisphere_image_urls = []
for link in image_links:
    hemisphere = link.split("/")[-1][:-9].capitalize()
    response = requests.get(link)
    soup = bs(response.text, 'lxml')
    items = soup.find_all('a', target = "_blank")
    for item in items:
        small_dict = {}
        if item.text == "Original":
            small_dict['img_url'] = small_dict.get('img_url', item['href'])
            small_dict["Hemisphere"] = small_dict.get("Hemisphere", hemisphere)
            hemisphere_image_urls.append(small_dict)

In [37]:
hemisphere_image_urls

[{'Hemisphere': 'Cerberus',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif'},
 {'Hemisphere': 'Schiaparelli',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif'},
 {'Hemisphere': 'Syrtis_major',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif'},
 {'Hemisphere': 'Valles_marineris',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif'}]

# Create dictionary from all content in notebook

In [38]:
mars_dict = {
    "News_Site": mars_url,
    "News_Article": {
        "Article_Title": news_title,
        "Article_Body": news_body,
        "Article_Link": news_href
        },
    "Image_Site": jpl_url,
    "Featured_Image": featured_image_url,
    "Mars_Weather": {
        "Tweeter": target_user,
        "Weather": mars_weather
        },
    "Mars_Facts": {
        "Source": mars_facts_url,
        "HTML_Table": mars_data_html,
        },
    "Astrogeology": {
        "Source": astro_url,
        "Images": hemisphere_image_urls
    }
}

In [39]:
print(mars_dict['Mars_Facts']['HTML_Table'])

<table border="1" class="dataframe">
  <thead>
    <tr style="text-align: right;">
      <th></th>
      <th>Parameters</th>
    </tr>
    <tr>
      <th>Metrics</th>
      <th></th>
    </tr>
  </thead>
  <tbody>
    <tr>
      <th>Equatorial Diameter:</th>
      <td>6,792 km</td>
    </tr>
    <tr>
      <th>Polar Diameter:</th>
      <td>6,752 km</td>
    </tr>
    <tr>
      <th>Mass:</th>
      <td>6.42 x 10^23 kg (10.7% Earth)</td>
    </tr>
    <tr>
      <th>Moons:</th>
      <td>2 (Phobos &amp; Deimos)</td>
    </tr>
    <tr>
      <th>Orbit Distance:</th>
      <td>227,943,824 km (1.52 AU)</td>
    </tr>
    <tr>
      <th>Orbit Period:</th>
      <td>687 days (1.9 years)</td>
    </tr>
    <tr>
      <th>Surface Temperature:</th>
      <td>-153 to 20 °C</td>
    </tr>
    <tr>
      <th>First Record:</th>
      <td>2nd millennium BC</td>
    </tr>
    <tr>
      <th>Recorded By:</th>
      <td>Egyptian astronomers</td>
    </tr>
  </tbody>
</table>


# PyMongo Test

In [40]:
# Initialize PyMongo to work with MongoDBs
conn = 'mongodb://localhost:27017'
client = pymongo.MongoClient(conn)

In [41]:
# Define database and collection
db = client.mars_db
collection = db.items

In [42]:
mars_dict

{'Astrogeology': {'Images': [{'Hemisphere': 'Cerberus',
    'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif'},
   {'Hemisphere': 'Schiaparelli',
    'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif'},
   {'Hemisphere': 'Syrtis_major',
    'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif'},
   {'Hemisphere': 'Valles_marineris',
    'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif'}],
  'Source': 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'},
 'Featured_Image': 'https://www.jpl.nasa.gov/spaceimages/images/mediumsize/PIA01320_ip.jpg',
 'Image_Site': 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars',
 'Mars_Facts': {'HTML_Table': '<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: right;">\n      <th></th>\n      <

In [43]:
collection.update_one({}, {"$set":mars_dict}, upsert = True)

<pymongo.results.UpdateResult at 0x28ace5e7798>

In [44]:
collection.find()

<pymongo.cursor.Cursor at 0x28ace230b38>

In [45]:
results = list(collection.find())

In [46]:
for result in results:
    print(result)

{'_id': ObjectId('5b08d3ce7a2bf2b4bf84197d'), 'Astrogeology': {'Source': 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars', 'Images': [{'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif', 'Hemisphere': 'Cerberus'}, {'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif', 'Hemisphere': 'Schiaparelli'}, {'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif', 'Hemisphere': 'Syrtis_major'}, {'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif', 'Hemisphere': 'Valles_marineris'}]}, 'Image_Site': 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars', 'Mars_Facts': {'Source': 'http://space-facts.com/mars/', 'HTML_Table': '<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: right;">\n      <th></th>\n      <th>Parameters</th>\n    </tr>\n    

In [47]:
results[0]['Featured_Image']

'https://www.jpl.nasa.gov/spaceimages/images/mediumsize/PIA01320_ip.jpg'

In [48]:
results[0]['Astrogeology']['Images'][0]['img_url']

'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif'

In [49]:
results[0]['Mars_Facts']['HTML_Table']

'<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: right;">\n      <th></th>\n      <th>Parameters</th>\n    </tr>\n    <tr>\n      <th>Metrics</th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>Equatorial Diameter:</th>\n      <td>6,792 km</td>\n    </tr>\n    <tr>\n      <th>Polar Diameter:</th>\n      <td>6,752 km</td>\n    </tr>\n    <tr>\n      <th>Mass:</th>\n      <td>6.42 x 10^23 kg (10.7% Earth)</td>\n    </tr>\n    <tr>\n      <th>Moons:</th>\n      <td>2 (Phobos &amp; Deimos)</td>\n    </tr>\n    <tr>\n      <th>Orbit Distance:</th>\n      <td>227,943,824 km (1.52 AU)</td>\n    </tr>\n    <tr>\n      <th>Orbit Period:</th>\n      <td>687 days (1.9 years)</td>\n    </tr>\n    <tr>\n      <th>Surface Temperature:</th>\n      <td>-153 to 20 °C</td>\n    </tr>\n    <tr>\n      <th>First Record:</th>\n      <td>2nd millennium BC</td>\n    </tr>\n    <tr>\n      <th>Recorded By:</th>\n      <td>Egyptian astronomers</td>\n    </tr