In [1]:
# Dependencies
from bs4 import BeautifulSoup
from splinter import Browser
import pymongo
import time

In [2]:
def exec_path():
    executable_path = {'executable_path': '\\Users\\Paul-DS\\Downloads\\chromedriver.exe'}
    return executable_path

In [3]:
def open_Chrome():
    browser = Browser('chrome', **exec_path(), headless=False)
    return browser

In [4]:
# URL Dictionary Menu:
# 1: NASA Mars News
# 2: JPL Mars Space Images - Featured Image
# 3: Mars Weather
# 4: Mars Facts
# 5: Mars Hemispheres

In [5]:
url = {
    1: 'https://mars.nasa.gov/news/',
    2: 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars',
    3: 'https://twitter.com/marswxreport?lang=en',
    4: 'https://space-facts.com/mars/',
    5: 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
}

### NASA Mars News

In [6]:
browser = open_Chrome()

# Retrieve 'NASA Mars News' page with splinter module
browser.visit(url[1])
print(url[1])

# Create BeautifulSoup object; parse with html
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

https://mars.nasa.gov/news/


In [7]:
# Extract title text
title = soup.title.text
print(title)

News  – NASA’s Mars Exploration Program 


In [9]:
# Examine the results, then determine element that contains news info
# results are returned as an iterable list
nasa_mars_news = []
results = soup.find_all('div', class_="image_and_description_container")
for result in results:
    mars_news = {}
    news_link=result.find('a')
    #print(news_link.text)
    news_summary=news_link.find('div', class_="rollover_description_inner").text.strip('\n')
    try:
        news_title=news_link.h3.text
    except AttributeError:  # When H3 tag not available then use Alt tag
        news_title_img=news_link.find_all('img',alt=True)
        news_title=news_title_img[1]['alt'] 
    mars_news[news_title]=news_summary
    nasa_mars_news.append(mars_news)

browser.quit()

In [10]:
nasa_mars_news

[{'NASA to Host Media Call on Next Mars Landing Site': 'NASA will host a media teleconference at 9 a.m. PST (noon EST) Monday, Nov. 19, to provide details about the Mars 2020 rover’s landing site on the Red Planet.'},
 {'How NASA Will Know When InSight Touches Down': 'On Nov. 26, engineers will look for a combination of signals to determine whether the next spacecraft to Mars lands safely.'},
 {'NASA Brings Mars Landing to Viewers Everywhere': "NASA's InSight lander is scheduled to touch down on the Red Planet at approximately noon PST  on Nov. 26, with a new suite of instruments to probe below the Martian surface."},
 {'Curiosity on the Move Again': "NASA's Mars Curiosity rover drove about 197 feet over the weekend to a site called Lake Orcadie, pushing its total odometry to over 12 miles."},
 {'The Mars InSight Landing Site Is Just Plain Perfect': 'If the InSight landing zone were ice cream, it would be vanilla.'},
 {"Five Things to Know About InSight's Mars Landing": "NASA engineers

### JPL Mars Space Images - Featured Image

In [11]:
browser = open_Chrome()

# Retrieve 'JPL Mars Space Images - Featured Image' page with splinter module
browser.visit(url[2])
print(url[2])

# Create BeautifulSoup object; parse with html
html = browser.html
soup = BeautifulSoup(html, 'html.parser')
time.sleep(1)

https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars


In [12]:
soup.find('footer')

<footer>
<a class="button fancybox" data-description="This composite is a mosaic comprising four individual Rosetta NAVCAM images taken from 19 miles from the center of comet 67P/Churyumov-Gerasimenko." data-fancybox-group="images" data-fancybox-href="/spaceimages/images/mediumsize/PIA18899_ip.jpg" data-link="/spaceimages/details.php?id=PIA18899" data-title="Rosetta Comet" id="full_image">
					FULL IMAGE
				  </a>
</footer>

In [13]:
browser.click_link_by_id('full_image')

In [14]:
soup=BeautifulSoup(browser.html,'html.parser')

In [15]:
soup.find_all('a', class_='button')

[<a class="button fancybox" data-description="This composite is a mosaic comprising four individual Rosetta NAVCAM images taken from 19 miles from the center of comet 67P/Churyumov-Gerasimenko." data-fancybox-group="images" data-fancybox-href="/spaceimages/images/mediumsize/PIA18899_ip.jpg" data-link="/spaceimages/details.php?id=PIA18899" data-title="Rosetta Comet" id="full_image">
 					FULL IMAGE
 				  </a>, <a class="button" href="">
 				MORE
 			  </a>, <a class="button" href="/spaceimages/details.php?id=PIA18899 " target="_top">more info     </a>]

In [16]:
try:
    time.sleep(5)
    browser.click_link_by_partial_text('more info')
    print('clicked more info button')
except:
    time.sleep(5)
    browser.click_link_by_partial_href('/spaceimages/details.php?id=')
    print('clicked href link')

clicked more info button


In [17]:
browser.click_link_by_partial_href('/spaceimages/images/largesize')

In [18]:
nasa_mars_featured_image_url=browser.url
print(nasa_mars_featured_image_url)

https://www.jpl.nasa.gov/spaceimages/images/largesize/PIA18899_hires.jpg


In [19]:
# (Re)Create dict()
nasa_mars_featured_image={}
nasa_mars_featured_image["nasa_mars_featured_image_url"]=nasa_mars_featured_image_url

In [20]:
nasa_mars_featured_image

{'nasa_mars_featured_image_url': 'https://www.jpl.nasa.gov/spaceimages/images/largesize/PIA18899_hires.jpg'}

In [21]:
browser.quit()

### Mars Weather

In [22]:
browser = open_Chrome()

# Retrieve 'Mars Weather' twitter page with splinter module
browser.visit(url[3])
print(url[3])
# Create BeautifulSoup object; parse with html
html = browser.html
soup = BeautifulSoup(html, 'html.parser')
time.sleep(1)

https://twitter.com/marswxreport?lang=en


In [23]:
# Scrape the latest Mars weather tweet from the page. 
# Save the tweet text for the weather report as a variable called `mars_weather`.

# Get first Top tweet on twitter page
mars_weather = soup.find('div', class_="js-tweet-text-container").p.text
print(mars_weather)

Sol 2229 (2018-11-13), high -2C/28F, low -71C/-95F, pressure at 8.62 hPa, daylight 06:22-18:39


In [24]:
twitter_mars_weather={}
twitter_mars_weather["mars_weather"]=mars_weather

In [25]:
twitter_mars_weather

{'mars_weather': 'Sol 2229 (2018-11-13), high -2C/28F, low -71C/-95F, pressure at 8.62 hPa, daylight 06:22-18:39'}

In [26]:
browser.quit()

### Mars Hemispheres

In [27]:
browser = open_Chrome()

# Retrieve 'Mars Hemispheres' page with splinter module
browser.visit(url[5])
print(url[5])

# Create BeautifulSoup object; parse with html
html = browser.html
soup = BeautifulSoup(html, 'html.parser')
time.sleep(1)

https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars


In [28]:
# Obtain high resolution images for each of Mar's hemispheres.

# Navigate the site and find then click each of the links to the hemispheres in 
# order to find the image url to the full resolution image.
results = soup.find_all('div', class_="item")
nasa_mars_hemisphere_image_urls=[]
for result in results:
    title_image_url={}
    href_string=result.find('div',class_='description').a.string
    print(href_string)
    try:
        browser.click_link_by_partial_text(href_string)
        html=browser.html
        soup=BeautifulSoup(html, 'html.parser')
        download_url=soup.find_all('div',class_='downloads')
        #print(download_url)
        for download in download_url:
            #print(download.find('a').text)
            image_url=download.a['href']
            print(image_url)
        title_image_url['title']=href_string
        title_image_url['image_url']=image_url
        nasa_mars_hemisphere_image_urls.append(title_image_url)
    # Go back to initial page with splinter module to click on next div item
        browser.visit(url[5])
    except ElementDoesNotExist:
        print("Scraping Complete")
browser.quit()
print(nasa_mars_hemisphere_image_urls)

Cerberus Hemisphere Enhanced
http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg
Schiaparelli Hemisphere Enhanced
http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg
Syrtis Major Hemisphere Enhanced
http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg
Valles Marineris Hemisphere Enhanced
http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg
[{'title': 'Cerberus Hemisphere Enhanced', 'image_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg'}, {'title': 'Schiaparelli Hemisphere Enhanced', 'image_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg'}, {'title': 'Syrtis Major Hemisphere Enhanced', 'image_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg'}, {'title': 'Val

### Mars Facts

In [29]:
import pandas as pd

In [30]:
mars_space_facts_url = url[4]

In [43]:
tables = pd.read_html(mars_space_facts_url)
tables

[                      0                              1
 0  Equatorial Diameter:                       6,792 km
 1       Polar Diameter:                       6,752 km
 2                 Mass:  6.42 x 10^23 kg (10.7% Earth)
 3                Moons:            2 (Phobos & Deimos)
 4       Orbit Distance:       227,943,824 km (1.52 AU)
 5         Orbit Period:           687 days (1.9 years)
 6  Surface Temperature:                  -153 to 20 °C
 7         First Record:              2nd millennium BC
 8          Recorded By:           Egyptian astronomers]

In [32]:
type(tables)

list

In [37]:
mars_facts={}
mars_facts["table"]=tables

In [38]:
mars_facts

{'table': [                                        Measurement
  Metric                                             
  Equatorial Diameter:                       6,792 km
  Polar Diameter:                            6,752 km
  Mass:                 6.42 x 10^23 kg (10.7% Earth)
  Moons:                          2 (Phobos & Deimos)
  Orbit Distance:            227,943,824 km (1.52 AU)
  Orbit Period:                  687 days (1.9 years)
  Surface Temperature:                  -153 to 20 °C
  First Record:                     2nd millennium BC
  Recorded By:                   Egyptian astronomers]}

In [44]:
df=tables[0]
df.columns=['Metric','Measurement']
df

Unnamed: 0,Metric,Measurement
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.42 x 10^23 kg (10.7% Earth)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.52 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-153 to 20 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [34]:
df.set_index('Metric',inplace=True)

In [35]:
df.index

Index(['Equatorial Diameter:', 'Polar Diameter:', 'Mass:', 'Moons:',
       'Orbit Distance:', 'Orbit Period:', 'Surface Temperature:',
       'First Record:', 'Recorded By:'],
      dtype='object', name='Metric')

In [36]:
html_table = df.to_html()
html_table

'<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: right;">\n      <th></th>\n      <th>Measurement</th>\n    </tr>\n    <tr>\n      <th>Metric</th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>Equatorial Diameter:</th>\n      <td>6,792 km</td>\n    </tr>\n    <tr>\n      <th>Polar Diameter:</th>\n      <td>6,752 km</td>\n    </tr>\n    <tr>\n      <th>Mass:</th>\n      <td>6.42 x 10^23 kg (10.7% Earth)</td>\n    </tr>\n    <tr>\n      <th>Moons:</th>\n      <td>2 (Phobos &amp; Deimos)</td>\n    </tr>\n    <tr>\n      <th>Orbit Distance:</th>\n      <td>227,943,824 km (1.52 AU)</td>\n    </tr>\n    <tr>\n      <th>Orbit Period:</th>\n      <td>687 days (1.9 years)</td>\n    </tr>\n    <tr>\n      <th>Surface Temperature:</th>\n      <td>-153 to 20 °C</td>\n    </tr>\n    <tr>\n      <th>First Record:</th>\n      <td>2nd millennium BC</td>\n    </tr>\n    <tr>\n      <th>Recorded By:</th>\n      <td>Egyptian astronomers</td>\n    </tr

In [46]:
html_table=html_table.replace('\n', '')

In [47]:
html_table

'<table border="1" class="dataframe">  <thead>    <tr style="text-align: right;">      <th></th>      <th>Measurement</th>    </tr>    <tr>      <th>Metric</th>      <th></th>    </tr>  </thead>  <tbody>    <tr>      <th>Equatorial Diameter:</th>      <td>6,792 km</td>    </tr>    <tr>      <th>Polar Diameter:</th>      <td>6,752 km</td>    </tr>    <tr>      <th>Mass:</th>      <td>6.42 x 10^23 kg (10.7% Earth)</td>    </tr>    <tr>      <th>Moons:</th>      <td>2 (Phobos &amp; Deimos)</td>    </tr>    <tr>      <th>Orbit Distance:</th>      <td>227,943,824 km (1.52 AU)</td>    </tr>    <tr>      <th>Orbit Period:</th>      <td>687 days (1.9 years)</td>    </tr>    <tr>      <th>Surface Temperature:</th>      <td>-153 to 20 °C</td>    </tr>    <tr>      <th>First Record:</th>      <td>2nd millennium BC</td>    </tr>    <tr>      <th>Recorded By:</th>      <td>Egyptian astronomers</td>    </tr>  </tbody></table>'

In [48]:
type(html_table)

str

In [49]:
mars_fact_html_table={}

In [50]:
mars_fact_html_table["html_table"]=html_table

In [51]:
df.to_html('table.html')

In [52]:
mars_facts={}
mars_facts["table_url"]="table.html"

In [53]:
mars_facts

{'table_url': 'table.html'}

## Step 2 - MongoDB and Flask Application

In [54]:
# Establish a connection to MongoDB with PyMongo you use the MongoClient class
conn = 'mongodb://localhost:27017'
client = pymongo.MongoClient(conn)

In [55]:
# create a database object referencing a new database, called 'nasa_mars_landing_db'
db = client.nasa_mars_landing_db

In [56]:
# List of dictionaries and assigned variables from above:
#print(nasa_mars_featured_image_url)
#print(mars_weather)
#print(html_table)
#print(nasa_mars_hemisphere_image_urls)
#print(nasa_mars_news)


### JPL Mars Space Images - Featured Image

In [57]:
# Drop/Create 'table_nasa_mars_featured_image'
# then insert 'nasa_mars_featured_image' into database document collections table
db.table_nasa_mars_featured_image.drop()
db.table_nasa_mars_featured_image.insert_one(nasa_mars_featured_image)

<pymongo.results.InsertOneResult at 0x26a0699f448>

In [61]:
list(db.table_nasa_mars_featured_image.find())

[{'_id': ObjectId('5bef7d7a20276f0890ed336c'),
  'nasa_mars_featured_image_url': 'https://www.jpl.nasa.gov/spaceimages/images/largesize/PIA18899_hires.jpg'}]

### Mars Weather

In [59]:
# Drop/Create 'table_twitter_mars_weather'
# then insert 'twitter_mars_weather' into database document collections table
db.table_twitter_mars_weather.drop()
db.table_twitter_mars_weather.insert_one(twitter_mars_weather)

<pymongo.results.InsertOneResult at 0x26a05c2e808>

In [60]:
list(db.table_twitter_mars_weather.find())

[{'_id': ObjectId('5bef7e0f20276f0890ed336d'),
  'mars_weather': 'Sol 2229 (2018-11-13), high -2C/28F, low -71C/-95F, pressure at 8.62 hPa, daylight 06:22-18:39'}]

### Mars Facts

In [62]:
db.table_mars_facts.drop()
db.table_mars_facts.insert_one(mars_facts)

<pymongo.results.InsertOneResult at 0x26a070547c8>

In [63]:
list(db.table_mars_facts.find())

[{'_id': ObjectId('5bef7e4c20276f0890ed336e'), 'table_url': 'table.html'}]

### Mars Hemispheres

In [64]:
# Drop/Create 'table_nasa_mars_hemisphere_image_urls'
# then insert 'nasa_mars_hemisphere_image_urls' into database document collections table
db.table_nasa_mars_hemisphere_image_urls.drop()
db.table_nasa_mars_hemisphere_image_urls.insert_many(nasa_mars_hemisphere_image_urls)

<pymongo.results.InsertManyResult at 0x26a05a62ec8>

In [65]:
list(db.table_nasa_mars_hemisphere_image_urls.find())

[{'_id': ObjectId('5bef7e5520276f0890ed336f'),
  'title': 'Cerberus Hemisphere Enhanced',
  'image_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg'},
 {'_id': ObjectId('5bef7e5520276f0890ed3370'),
  'title': 'Schiaparelli Hemisphere Enhanced',
  'image_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg'},
 {'_id': ObjectId('5bef7e5520276f0890ed3371'),
  'title': 'Syrtis Major Hemisphere Enhanced',
  'image_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg'},
 {'_id': ObjectId('5bef7e5520276f0890ed3372'),
  'title': 'Valles Marineris Hemisphere Enhanced',
  'image_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg'}]

### NASA Mars News

In [66]:
# Drop/Create 'table_nasa_news' (if it does not exist) then insert 'nasa_mars_news' 
# into database document collections table
db.table_nasa_mars_news.drop()
db.table_nasa_mars_news.insert_many(nasa_mars_news)

<pymongo.results.InsertManyResult at 0x26a05a62f08>

In [67]:
list(db.table_nasa_mars_news.find())

[{'_id': ObjectId('5bef7e7c20276f0890ed3373'),
  'NASA to Host Media Call on Next Mars Landing Site': 'NASA will host a media teleconference at 9 a.m. PST (noon EST) Monday, Nov. 19, to provide details about the Mars 2020 rover’s landing site on the Red Planet.'},
 {'_id': ObjectId('5bef7e7c20276f0890ed3374'),
  'How NASA Will Know When InSight Touches Down': 'On Nov. 26, engineers will look for a combination of signals to determine whether the next spacecraft to Mars lands safely.'},
 {'_id': ObjectId('5bef7e7c20276f0890ed3375'),
  'NASA Brings Mars Landing to Viewers Everywhere': "NASA's InSight lander is scheduled to touch down on the Red Planet at approximately noon PST  on Nov. 26, with a new suite of instruments to probe below the Martian surface."},
 {'_id': ObjectId('5bef7e7c20276f0890ed3376'),
  'Curiosity on the Move Again': "NASA's Mars Curiosity rover drove about 197 feet over the weekend to a site called Lake Orcadie, pushing its total odometry to over 12 miles."},
 {'_id'

In [74]:
db.list_collection_names()

['table_twitter_mars_weather',
 'table_nasa_mars_featured_image',
 'collection',
 'table_nasa_mars_hemisphere_image_urls',
 'table_mars_facts',
 'table_nasa_mars_news']

In [69]:
client.list_database_names()

['ClassDB',
 'admin',
 'config',
 'dumpster_db',
 'fruitDB',
 'local',
 'nasa_mars_landing_db',
 'store_inventory',
 'traveldb']

In [70]:
if 'nasa_mars_landing_db' in client.list_database_names():
  print("The database exists.")

The database exists.


In [77]:
!jupyter nbconvert --to=python mission_to_mars-Copy1.ipynb

[NbConvertApp] Converting notebook mission_to_mars-Copy1.ipynb to python
[NbConvertApp] Writing 8443 bytes to mission_to_mars-Copy1.py
