In [19]:
#import dependencies
from bs4 import BeautifulSoup
import requests
import pandas as pd
from splinter import Browser
from webdriver_manager.chrome import ChromeDriverManager
import pymongo

In [35]:
# Setup splinter
executable_path = {'executable_path': ChromeDriverManager().install()}
browser = Browser('chrome', **executable_path, headless=False)

[WDM] - Current google-chrome version is 87.0.4280
[WDM] - Get LATEST driver version for 87.0.4280
[WDM] - Driver [C:\Users\Steven\.wdm\drivers\chromedriver\win32\87.0.4280.88\chromedriver.exe] found in cache


 


# NASA Mars News

In [47]:
def mars_news():
    # URL of page to be scraped
    url = ('https://mars.nasa.gov/news/')

    # Retrieve page with the browser module
    browser.visit(url)

    html = browser.html
    news_soup = BeautifulSoup(html, 'html.parser')

    # save the most recent article, title and date
    article = news_soup.find("div", class_="list_text")
    news_p = article.find("div", class_="article_teaser_body").text
    news_title = article.find("div", class_="content_title").text
    news_date = article.find("div", class_="list_date").text
    print(news_date)
    print(news_title)
    print(news_p)
    
    return news_title, news_p

news_title, news_p

("A Martian Roundtrip: NASA's Perseverance Rover Sample Tubes",
 "Marvels of engineering, the rover's sample tubes must be tough enough to safely bring Red Planet samples on the long journey back to Earth in immaculate condition. ")

# JPL Mars Space Images - Featured Image

In [52]:
def featured_image():

    #Use splinter to navigate the site and find the image url for the current Featured Mars Image
    # URL of page to be scraped
    url = ('https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars')

    # Retrieve page with the browser module
    browser.visit(url)

    html = browser.html
    image_soup = BeautifulSoup(html, 'html.parser')
    
    #find the image url for the current Featured Mars Image
    image_url = image_soup.find("a", class_="fancybox")['data-fancybox-href']
    featured_image_url = 'https://www.jpl.nasa.gov' + image_url
    print(featured_image_url)
    
    return featured_image_url

featured_image_url

'https://www.jpl.nasa.gov/spaceimages/images/mediumsize/PIA23436_ip.jpg'

# Mars Facts

In [50]:
def mars_facts():
    
    # Visit the Mars facts webpage and scrape table data into Pandas
    url = "https://space-facts.com/mars/"
    browser.visit(url)
    
    # place data into a dataframe, clean it up and output it into an HTML table
    mars_data = pd.read_html(url)
    mars_data_df = mars_data[0]
    
    #rename columns and set index
    mars_data_df.columns=['Fact', 'Data']
    mars_data_df.set_index(['Fact'], inplace=True)
    mars_data_df
    
    #convert to html
    mars_data_html = mars_data_df.to_html(classes='table table-striped')
    #mars_data_html = mars_data_html.replace('\n', ' ')
    mars_data_html
    
    return mars_data_html

mars_data_html

'<table border="1" class="dataframe table table-striped">\n  <thead>\n    <tr style="text-align: right;">\n      <th></th>\n      <th>Data</th>\n    </tr>\n    <tr>\n      <th>Fact</th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>Equatorial Diameter:</th>\n      <td>6,792 km</td>\n    </tr>\n    <tr>\n      <th>Polar Diameter:</th>\n      <td>6,752 km</td>\n    </tr>\n    <tr>\n      <th>Mass:</th>\n      <td>6.39 × 10^23 kg (0.11 Earths)</td>\n    </tr>\n    <tr>\n      <th>Moons:</th>\n      <td>2 (Phobos &amp; Deimos)</td>\n    </tr>\n    <tr>\n      <th>Orbit Distance:</th>\n      <td>227,943,824 km (1.38 AU)</td>\n    </tr>\n    <tr>\n      <th>Orbit Period:</th>\n      <td>687 days (1.9 years)</td>\n    </tr>\n    <tr>\n      <th>Surface Temperature:</th>\n      <td>-87 to -5 °C</td>\n    </tr>\n    <tr>\n      <th>First Record:</th>\n      <td>2nd millennium BC</td>\n    </tr>\n    <tr>\n      <th>Recorded By:</th>\n      <td>Egyptian astronomers</td>

# Mars Hemispheres

In [34]:
#Visit the USGS Astrogeology site 
url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
browser.visit(url)
html = browser.html
hemisphere_soup = BeautifulSoup(html, 'html.parser')

In [14]:
#loop thru the links to the hemispheres to find the image url to the full resolution image
#add each 'title' and 'img_url' to a list
results = hemisphere_soup.find_all("div", class_="item")

hemisphere_list = []
for result in results:
    title = result.find("h3").text
    href = result.find("a", class_= 'itemLink')['href']
    img_url = 'https://astrogeology.usgs.gov' + href
    hemisphere_list.append({'title' : title, 'img_url' : img_url})
    
hemisphere_list

[{'title': 'Cerberus Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/search/map/Mars/Viking/cerberus_enhanced'},
 {'title': 'Schiaparelli Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/search/map/Mars/Viking/schiaparelli_enhanced'},
 {'title': 'Syrtis Major Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/search/map/Mars/Viking/syrtis_major_enhanced'},
 {'title': 'Valles Marineris Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/search/map/Mars/Viking/valles_marineris_enhanced'}]

# Mongo DB

In [62]:
def scrape_all():
    news_tite, news_p = mars_news()
    featured_img_url = featured_image()
    mars_facts_html = mars_facts()

    nasa_document = {
        'news_title': news_tite,
        'news_paragraph': news_p,
        'featured_img_url': featured_img_url,
        'mars_facts_html': mars_facts_html 
    }

    return nasa_document

print(nasa_document)


{'news_title': "A Martian Roundtrip: NASA's Perseverance Rover Sample Tubes", 'news_paragraph': "Marvels of engineering, the rover's sample tubes must be tough enough to safely bring Red Planet samples on the long journey back to Earth in immaculate condition. ", 'featured_img_url': 'https://www.jpl.nasa.gov/spaceimages/images/mediumsize/PIA14934_ip.jpg', 'mars_facts_html': '<table border="1" class="dataframe table table-striped">\n  <thead>\n    <tr style="text-align: right;">\n      <th></th>\n      <th>Data</th>\n    </tr>\n    <tr>\n      <th>Fact</th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>Equatorial Diameter:</th>\n      <td>6,792 km</td>\n    </tr>\n    <tr>\n      <th>Polar Diameter:</th>\n      <td>6,752 km</td>\n    </tr>\n    <tr>\n      <th>Mass:</th>\n      <td>6.39 × 10^23 kg (0.11 Earths)</td>\n    </tr>\n    <tr>\n      <th>Moons:</th>\n      <td>2 (Phobos &amp; Deimos)</td>\n    </tr>\n    <tr>\n      <th>Orbit Distance:</th>\n      <td

In [64]:
# connect to mongodb
conn = 'mongodb://localhost:27017'
client = pymongo.MongoClient(conn)

# connect to mars_app database
db = client.mars_app

# connect to mars collection
mars = db.mars

# gather document to insert
data_document = scrape_all()

# insert
#mars.insert_one(data_document)

# upsert
mars.update_one({}, {'$set': data_document}, upsert=True)

December 22, 2020
A Martian Roundtrip: NASA's Perseverance Rover Sample Tubes
Marvels of engineering, the rover's sample tubes must be tough enough to safely bring Red Planet samples on the long journey back to Earth in immaculate condition. 
https://www.jpl.nasa.gov/spaceimages/images/mediumsize/PIA18358_ip.jpg


<pymongo.results.UpdateResult at 0x22377452d00>