# Mission to Mars

## Step 1 - Scraping

In [1]:
# Dependencies
import pandas as pd
import requests
import pymongo
import os
from splinter import Browser
from splinter.exceptions import ElementDoesNotExist
from bs4 import BeautifulSoup
from flask import Flask, render_template
from datetime import datetime

### NASA Mars News

In [2]:
# URL of page to be scraped
url = 'https://mars.nasa.gov/news/'

# Retrieve page with the requests module
response = requests.get(url)

# Create BeautifulSoup object; parse with 'lxml'
soup = BeautifulSoup(response.text, 'lxml')

In [3]:
# results are returned news_title and news_p 
news_title = soup.find('div', class_="content_title")
news_p = soup.find('div', class_="rollover_description_inner")
news_title = news_title.a.text.strip()
news_p = news_p.text.strip()
print("news_title = " + news_title)
print('\n-----------------\n')
print("news_p = " + news_p)

news_title = Opportunity Hunkers Down During Dust Storm

-----------------

news_p = It's the beginning of the end for the planet-encircling dust storm on Mars. But it could still be weeks, or even months, before skies are clear enough for NASA's Opportunity rover to recharge its batteries and phone home.


### JPL Mars Space Images

In [18]:
executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)

In [19]:
url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
browser.visit(url)
html = browser.html
soup = BeautifulSoup(html, 'lxml')

In [20]:
#browser.click_link_by_partial_href('/spaceimages/images/mediumsize/PIA17932_ip.jpg')
browser.click_link_by_id('full_image')

In [21]:
html = browser.html
soup = BeautifulSoup(html, 'lxml')
featured_image = soup.find('img', class_='fancybox-image')
featured_image_url = 'https://www.jpl.nasa.gov'+featured_image['src']
browser.quit()
print(featured_image_url)

https://www.jpl.nasa.gov/spaceimages/images/mediumsize/PIA18322_ip.jpg


### Mars Weather

In [22]:
# Retrieve page with the requests module
url='https://twitter.com/marswxreport?lang=en'
response=requests.get(url)
# Create BeautifulSoup object; parse with 'lxml'
soup=BeautifulSoup(response.text, 'lxml')
#find all tweets on the page
mars_tweets=soup.find_all('p', class_='js-tweet-text')
#since this page contains other tweets related to Mars weather, try to find the first tweet with 'Sol ' substring.
for tweet in mars_tweets:
    try:
        p_tweet=tweet.text
        if p_tweet.find('Sol ')!=-1:
            mars_weather=p_tweet
            print("mars_weather = " + mars_weather)
            break
    except AttributeError as e:
        print(e)

mars_weather = Sol 2108 (2018-07-12), Sunny, high -24C/-11F, low -65C/-84F, pressure at 8.06 hPa, daylight 05:19-17:27


### Mars Facts

In [9]:
# URL of page to be scraped
url = 'http://space-facts.com/mars/'

In [10]:
tables = pd.read_html(url)
print(type(tables))
tables

<class 'list'>


[                      0                              1
 0  Equatorial Diameter:                       6,792 km
 1       Polar Diameter:                       6,752 km
 2                 Mass:  6.42 x 10^23 kg (10.7% Earth)
 3                Moons:            2 (Phobos & Deimos)
 4       Orbit Distance:       227,943,824 km (1.52 AU)
 5         Orbit Period:           687 days (1.9 years)
 6  Surface Temperature:                  -153 to 20 °C
 7         First Record:              2nd millennium BC
 8          Recorded By:           Egyptian astronomers]

In [11]:
df = tables[0]
df.columns = ['Attribute','Value']
df.head(10)

Unnamed: 0,Attribute,Value
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.42 x 10^23 kg (10.7% Earth)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.52 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-153 to 20 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [12]:
html_table = df.to_html()
html_table.replace('\n', '')

'<table border="1" class="dataframe">  <thead>    <tr style="text-align: right;">      <th></th>      <th>Attribute</th>      <th>Value</th>    </tr>  </thead>  <tbody>    <tr>      <th>0</th>      <td>Equatorial Diameter:</td>      <td>6,792 km</td>    </tr>    <tr>      <th>1</th>      <td>Polar Diameter:</td>      <td>6,752 km</td>    </tr>    <tr>      <th>2</th>      <td>Mass:</td>      <td>6.42 x 10^23 kg (10.7% Earth)</td>    </tr>    <tr>      <th>3</th>      <td>Moons:</td>      <td>2 (Phobos &amp; Deimos)</td>    </tr>    <tr>      <th>4</th>      <td>Orbit Distance:</td>      <td>227,943,824 km (1.52 AU)</td>    </tr>    <tr>      <th>5</th>      <td>Orbit Period:</td>      <td>687 days (1.9 years)</td>    </tr>    <tr>      <th>6</th>      <td>Surface Temperature:</td>      <td>-153 to 20 °C</td>    </tr>    <tr>      <th>7</th>      <td>First Record:</td>      <td>2nd millennium BC</td>    </tr>    <tr>      <th>8</th>      <td>Recorded By:</td>      <td>Egyptian astronome

### Mars Hemispheres

In [13]:
# URL of page to be scraped
url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'

# Retrieve page with the requests module
response = requests.get(url)

# Create BeautifulSoup object; parse with 'lxml'
soup = BeautifulSoup(response.text, 'lxml')

In [14]:
# results are returned as an iterable list
results = soup.find_all('a', class_="itemLink product-item")
link_texts = []
for result in results:
    link_text = result.find('h3').text.strip()
    link_texts.append(link_text)

link_texts

['Cerberus Hemisphere Enhanced',
 'Schiaparelli Hemisphere Enhanced',
 'Syrtis Major Hemisphere Enhanced',
 'Valles Marineris Hemisphere Enhanced']

In [15]:
executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)

In [16]:
full_image_link = []
for link in link_texts:
    browser.visit(url)
    browser.click_link_by_partial_text(link)
    html = browser.html
    soup = BeautifulSoup(html, 'lxml')

    featured_image = soup.find('img', class_='wide-image')
    featured_image_url = 'https://astrogeology.usgs.gov'+featured_image['src']
    full_image_link.append(featured_image_url)
full_image_link

['https://astrogeology.usgs.gov/cache/images/cfa62af2557222a02478f1fcd781d445_cerberus_enhanced.tif_full.jpg',
 'https://astrogeology.usgs.gov/cache/images/3cdd1cbf5e0813bba925c9030d13b62e_schiaparelli_enhanced.tif_full.jpg',
 'https://astrogeology.usgs.gov/cache/images/ae209b4e408bb6c3e67b6af38168cf28_syrtis_major_enhanced.tif_full.jpg',
 'https://astrogeology.usgs.gov/cache/images/7cf2da4bf549ed01c17f206327be4db7_valles_marineris_enhanced.tif_full.jpg']

In [17]:
 # Append a Dictionary 

hemisphere_image_urls =[
    {'title': link_texts[0], 'img_url': full_image_link[0]},
    {'title': link_texts[0], 'img_url': full_image_link[1]},
    {'title': link_texts[0], 'img_url': full_image_link[2]},
    {'title': link_texts[0], 'img_url': full_image_link[3]}
]

print(hemisphere_image_urls)

[{'title': 'Cerberus Hemisphere Enhanced', 'img_url': 'https://astrogeology.usgs.gov/cache/images/cfa62af2557222a02478f1fcd781d445_cerberus_enhanced.tif_full.jpg'}, {'title': 'Cerberus Hemisphere Enhanced', 'img_url': 'https://astrogeology.usgs.gov/cache/images/3cdd1cbf5e0813bba925c9030d13b62e_schiaparelli_enhanced.tif_full.jpg'}, {'title': 'Cerberus Hemisphere Enhanced', 'img_url': 'https://astrogeology.usgs.gov/cache/images/ae209b4e408bb6c3e67b6af38168cf28_syrtis_major_enhanced.tif_full.jpg'}, {'title': 'Cerberus Hemisphere Enhanced', 'img_url': 'https://astrogeology.usgs.gov/cache/images/7cf2da4bf549ed01c17f206327be4db7_valles_marineris_enhanced.tif_full.jpg'}]
