In [1]:
# Dependencies
import pandas as pd
import requests
import pymongo
from bs4 import BeautifulSoup
from splinter import Browser
from splinter.exceptions import ElementDoesNotExist

In [31]:
executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)

## NASA Mars News
Collect the latest News Title and Paragraph Text. Assign the text to variables that you can reference later.

In [3]:
nasa_url = 'https://mars.nasa.gov/news/'
browser.visit(nasa_url)

In [4]:
# HTML Object
html = browser.html

# Parse HTML with Beautiful Soup
soup = BeautifulSoup(html, 'html.parser')

# Retrieve the latest news_title and news_paragraph
news_title = soup.find('div', class_='content_title').find('a').text
news_p = soup.find('div', class_='article_teaser_body').text

# Display article tital and teaser text 
print(f'Article: {news_title}')
print(f'Teaser: {news_p}')

Article: Robotic Toolkit Added to NASA's Mars 2020 Rover
Teaser: The bit carousel, which lies at the heart of the rover's Sample Caching System, is now aboard NASA's newest rover. 


## JPL Mars Space Images - Featured Image
Use splinter to find the image url for the current Featured Mars Image. 
Assign the url string to a variable called `featured_image_url`.
Make sure to find the image url to the full size `.jpg` image
Make sure to save a complete url string for this image

In [5]:
# Visit Mars Space Images through splinter module
featured_image_url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
browser.visit(featured_image_url)

In [6]:
# JPL main Website Ur 'https://www.jpl.nasa.gov'

# HTML Object 
html_image = browser.html

# Parse HTML with Beautiful Soup
image_soup = BeautifulSoup(html_image, 'html.parser')

# Retrieve background-image url from style tag 
featured_image_url = image_soup.find('article')['style'].replace('background-image: url(','').replace(');', '')[1:-1]

print(f'Featured Image URL: https://www.jpl.nasa.gov{featured_image_url}')

Featured Image URL: https://www.jpl.nasa.gov/spaceimages/images/wallpaper/PIA00069-1920x1200.jpg


## Mars Weather 
Scrape the latest Mars weather tweets. Save the tweet text for the weather report as a variable called `mars_weather`

In [7]:
# Visit Mars Weather Twitter through splinter module
weather_url = 'https://twitter.com/marswxreport?lang=en'
browser.visit(weather_url)

In [8]:
# HTML Object 
weather_html = browser.html

# Parse HTML with Beautiful Soup
tweet_soup = BeautifulSoup(weather_html, 'html.parser')

# Find all elements that contain tweets
weather_tweet = tweet_soup.find('p', class_='TweetTextSize TweetTextSize--normal js-tweet-text tweet-text').text

print(f'Latest Tweet about Mars Weather: "{weather_tweet}"')

Latest Tweet about Mars Weather: "InSight sol 261 (2019-08-21) low -102.4ºC (-152.4ºF) high -26.6ºC (-15.8ºF)
winds from the SSE at 4.9 m/s (11.0 mph) gusting to 16.0 m/s (35.8 mph)
pressure at 7.70 hPapic.twitter.com/MhPPOHJg3m"


## Mars Facts
Use Pandas to scrape the table containing facts about the planet including Diameter, Mass, etc. Use Pandas to convert the data to a HTML table string

In [9]:
# Scrape the table of Mars facts
facts_url = 'https://space-facts.com/mars/'

# Use Panda's `read_html` to parse the url
tables = pd.read_html(facts_url)

# Create datarame and columns 
df = tables[0]

# Rename Columns
df.columns = ['Fact_Category', 'Mars_Value', 'Earth_Value']

# Remove Earth_Values
facts_df = df[['Fact_Category', 'Mars_Value',]]

# Set Facts_Category as Index
facts_df.set_index('Fact_Category', inplace=True)

# Print Dataframe
facts_df.head()

Unnamed: 0_level_0,Mars_Value
Fact_Category,Unnamed: 1_level_1
Diameter:,"6,779 km"
Mass:,6.39 × 10^23 kg
Moons:,2
Distance from Sun:,"227,943,824 km"
Length of Year:,687 Earth days


In [10]:
# Convert Dataframe to HTML
facts_df.to_html()

'<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: right;">\n      <th></th>\n      <th>Mars_Value</th>\n    </tr>\n    <tr>\n      <th>Fact_Category</th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>Diameter:</th>\n      <td>6,779 km</td>\n    </tr>\n    <tr>\n      <th>Mass:</th>\n      <td>6.39 × 10^23 kg</td>\n    </tr>\n    <tr>\n      <th>Moons:</th>\n      <td>2</td>\n    </tr>\n    <tr>\n      <th>Distance from Sun:</th>\n      <td>227,943,824 km</td>\n    </tr>\n    <tr>\n      <th>Length of Year:</th>\n      <td>687 Earth days</td>\n    </tr>\n    <tr>\n      <th>Temperature:</th>\n      <td>-153 to 20 °C</td>\n    </tr>\n  </tbody>\n</table>'

## Mars Hemispheres
Obtain high resolution images for each of Mar's hemispheres. You will need to click each of the links to the hemispheres in order to find the image url to the full resolution image
* Save both the image url string for the full resolution hemisphere image
* Save the Hemisphere title containing the hemisphere name
* Use a Python dictionary to store the data using the keys `img_url` and `title`. 
* Append the dictionary with the image url string and the hemisphere title to a list. 
This list will contain one dictionary for each hemisphere

In [32]:
# Visit hemispheres website through splinter module 
hemispheres_url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
browser.visit(hemispheres_url)

In [33]:
# HTML Object
html_hemi = browser.html

# Parse HTML with Beautiful Soup
hemi_soup = BeautifulSoup(html_hemi, 'html.parser')

# Retreive all items that contain mars hemispheres information
items = hemi_soup.find_all('div', class_='item')

# Create empty list for hemisphere urls 
hemisphere_image_urls = []

# Store the main_ul 
hemispheres_main_url = 'https://astrogeology.usgs.gov'

# Loop through the items previously stored
for i in items: 
    # Store title
    title = i.find('h3').text
    
    # Store link that leads to full image website
    partial_img_url = i.find('a', class_='itemLink product-item')['href']
    
    # Visit the link that contains the full image website 
    browser.visit(hemispheres_main_url + partial_img_url)
    
    # HTML Object of individual hemisphere information website 
    partial_img_html = browser.html
    
    # Parse HTML with Beautiful Soup for every individual hemisphere information website 
    soup = BeautifulSoup( partial_img_html, 'html.parser')
    
    # Retrieve full image source 
    img_url = hemispheres_main_url + soup.find('img', class_='wide-image')['src']
    
    # Append the retreived information into a list of dictionaries 
    hemisphere_image_urls.append({"title" : title, "img_url" : img_url})


In [34]:
# Print hemisphere_image_urls
hemisphere_image_urls

[{'title': 'Cerberus Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/cfa62af2557222a02478f1fcd781d445_cerberus_enhanced.tif_full.jpg'},
 {'title': 'Schiaparelli Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/3cdd1cbf5e0813bba925c9030d13b62e_schiaparelli_enhanced.tif_full.jpg'},
 {'title': 'Syrtis Major Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/ae209b4e408bb6c3e67b6af38168cf28_syrtis_major_enhanced.tif_full.jpg'},
 {'title': 'Valles Marineris Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/7cf2da4bf549ed01c17f206327be4db7_valles_marineris_enhanced.tif_full.jpg'}]

# Import Data into PyMongo

In [None]:
        # Setup connection to mongodb
        conn = "mongodb://localhost:27017"
        client = pymongo.MongoClient(conn)

        #Select database and collection to use
        db = db.mars
        collection = db.mars_info

        db.mars_info.insert_many(

        [
            {'news_title': news_title,
            'news_p': news_p, 
            'featured_image_url': featured_image_url,
            'weather_tweet': weather_tweet,
            'mars_facts': mars_facts,
            'hemisphere_image_urls':hemisphere_image_urls}
            ]
        )
        print("Data Uploaded!")