In [9]:
!pip install lxml



In [10]:
# Import Dependencies
from splinter import Browser
from bs4 import BeautifulSoup as bs
import requests as req
import re
import pandas as pd
import pymongo
from selenium import webdriver
import time
import os

In [11]:
# Use chromedriver and splinter
executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)

<b>Nasa Mars News</b>
- Scrape the [NASA Mars News Site](https://mars.nasa.gov/news/) and collect the latest News Title and Paragraph Text. Assign the text to variables that you can reference later.
- https://mars.nasa.gov/news/



In [None]:
# Visit the NASA Mars News Site
news_url = 'https://mars.nasa.gov/news/'
browser.visit(news_url)
response = requests.get(news_url)
response.headers

In [86]:
# Create an HTML object
soup = bs(response.text, 'html.parser')
# print(soup.prettify())

In [87]:
# Extract lastest news title and paragraph text
news_title = soup.find('div', class_='content_title').a.text.strip()
news_p = soup.find('div', class_='rollover_description_inner').text.strip()

print(news_title)
print("-------------------")
print(news_p)

NASA to Broadcast Mars 2020 Perseverance Launch, Prelaunch Activities
-------------------
Starting July 27, news activities will cover everything from mission engineering and science to returning samples from Mars to, of course, the launch itself.


<b>JPL Mars Space Image</b>
- Visit (https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars) and scrape the most recent images of Mars
- Use splinter to navigate the site and find the image URL for the current Featured Mars Image and assign the url string to a variable called featured_image_url
- Find the image url to the full size .jpg image
- Save a complete URL string for this image

In [38]:
# Tell the browser to visit the below url
img_url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
browser.visit(img_url)

In [39]:
# HTML object and parse with Beautiful Soup
html_image = browser.html
soup = bs(html_image, "html.parser")

In [41]:
# Get background image url using style tag
image_url = soup.find('article')['style'].replace('background-image: url(','').replace(');', '')[1:-1]

In [42]:
# Connect image url to the website's main url
main_url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
featured_image_url = main_url + image_url

In [43]:
# Display link the the featured_image_url
featured_image_url

'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars/spaceimages/images/wallpaper/PIA18614-1920x1200.jpg'

<b>Mars Facts</b>
- Visit the Mars Facts webpage (https://space-facts.com/mars/) and use Pandas to Scrape the table containing facts about the planet including Diameter, Mass, etc
- Use Pandas to convert the data to a HTML table string

In [44]:
# URL to the Mars Facts webpage
facts_url = 'https://space-facts.com/mars/'
browser.visit(facts_url)

In [45]:
# Use Pandas to read the HTML table on the page and make into a list of dictionaries
tables = pd.read_html(facts_url)
df = tables[1]

In [47]:
# Assign columns and show esarth to mars comparison
df.columns = ['Mars - Earth Comaprison', 'Mars', 'Earth']
html_table = df.to_html(table_id="tablepress-p-mars", justify="left", index=False)
df.to_dict(orient='records')
df

Unnamed: 0,Mars - Earth Comaprison,Mars,Earth
0,Diameter:,"6,779 km","12,742 km"
1,Mass:,6.39 × 10^23 kg,5.97 × 10^24 kg
2,Moons:,2,1
3,Distance from Sun:,"227,943,824 km","149,598,262 km"
4,Length of Year:,687 Earth days,365.24 days
5,Temperature:,-87 to -5 °C,-88 to 58°C


<b>Mars Hemispheres</b>
- Visit the USGS Astrogeology site (https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars) to obtain high resolution images for each of Mar's Hemispheres
- Click each of the links to the hemipsheres in order to find the image url to the full resolution image
- Save <b>both</b> the image url string for the full resolution hemisphere image, and the Hemisphere title containing the hemisphere name. Use a Python dictionary to store the data using keys 'img_url' and 'title'
- Append the ditctionary with the image url string and the hemisphere title to a list. This list will contain one dictionary for each hemisphere.

In [76]:
# Mars Hemisphere url...tell browser to go to the link
hemisphere_url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
browser.visit(hemisphere_url)

In [77]:
# HTML Object and parse with Beautiful Soup
html = browser.html
soup = bs(html, 'html.parser')

In [78]:
# Find all items that contain mars hemispheres information
title_list = soup.find_all('div', class_='description')

In [82]:
# Create a list for hemisphere urls
hemisphere_image_urls = []

In [83]:
# Loop through the 'div' objects and scrape the titles and urls of images
for title in title_list: 
    # Navigate browser to page then click on title link to image page
    browser.visit(hemisphere_url)
    browser.click_link_by_partial_text(title.a.h3.text)
    
    # Grab the destination page html and make into BeautifulSoup Object
    html = browser.html
    soup = BeautifulSoup(html, 'html.parser')
    
    # Parse the imgage source(src) relative url and then append to domain name
    # for absolute url
    img_url_list = soup.find('img', class_='wide-image')
    image_url = f"https://astrogeology.usgs.gov{img_url_list['src']}"
    
    # Create Dictionary with returned values and add dict to hemi_image_urls list
    post = {
            'title': title.a.h3.text,
            'image_url': image_url
            }
    hemisphere_image_urls.append(post)

In [84]:
# Show Dictionary of image urls
hemisphere_image_urls

[{'title': 'Cerberus Hemisphere Enhanced',
  'image_url': 'https://astrogeology.usgs.gov/cache/images/f5e372a36edfa389625da6d0cc25d905_cerberus_enhanced.tif_full.jpg'},
 {'title': 'Schiaparelli Hemisphere Enhanced',
  'image_url': 'https://astrogeology.usgs.gov/cache/images/3778f7b43bbbc89d6e3cfabb3613ba93_schiaparelli_enhanced.tif_full.jpg'},
 {'title': 'Syrtis Major Hemisphere Enhanced',
  'image_url': 'https://astrogeology.usgs.gov/cache/images/555e6403a6ddd7ba16ddb0e471cadcf7_syrtis_major_enhanced.tif_full.jpg'},
 {'title': 'Valles Marineris Hemisphere Enhanced',
  'image_url': 'https://astrogeology.usgs.gov/cache/images/b3c7c6c9138f57b4756be9b9c43e3a48_valles_marineris_enhanced.tif_full.jpg'}]