In [45]:
# Import dependencies

import pandas as pd
from splinter import Browser
from splinter.exceptions import ElementDoesNotExist
from bs4 import BeautifulSoup
import requests
import pymongo
import os
from time import sleep

# NASA Mars News

Scrape the [NASA Mars News Site](https://mars.nasa.gov/news/) and collect the latest News Title and Paragraph Text. Assign the text to variables that you can reference later.

In [46]:
# Splinter/chromedriver

executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)

In [47]:
# Define the url and visit the url via splinter

article_url = 'https://mars.nasa.gov/news/?page=0&per_page=40&order=publish_date+desc%2Ccreated_at+desc&search=&category=19%2C165%2C184%2C204&blank_scope=Latest'
browser.visit(article_url)

In [48]:
# Sleep to let page load

sleep(1)

In [49]:
# Navigate into the first article

browser.find_link_by_text("Curiosity Tastes First Sample in 'Clay-Bearing Unit'").first.click()

In [50]:
# Html and BeautifulSoup

html = browser.html
soup = BeautifulSoup(html)

In [51]:
# Pull the news title

news_title = soup.find('h1', class_="article_title").text

In [52]:
# Clean the line breaks from the title

news_title = news_title.replace('\n', '')

In [53]:
# ID the div where the paragraphs (p) are

all_p = soup.find('div', class_='wysiwyg_content')

In [54]:
# Get the 1st paragraph

news_p = all_p.find('p').text

# JPL Mars Space Images - Featured Image

* Visit the url for JPL Featured Space Image [here](https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars).

* Use splinter to navigate the site and find the image url for the current Featured Mars Image and assign the url string to a variable called `featured_image_url`.

* Make sure to find the image url to the full size `.jpg` image.

* Make sure to save a complete url string for this image.

In [55]:
# Define the url and visit the url via splinter

feat_image_url = 'https://www.jpl.nasa.gov'
browser.visit(feat_image_url)

In [56]:
# Sleep to let page load

sleep(1)

In [57]:
# Select the images link on the page to reveal and images button

browser.click_link_by_partial_text('Images')

In [58]:
# Sleep to let the page load

sleep(2)

In [59]:
# Click to the images section

browser.click_link_by_href('/images')

In [60]:
# Sleep to let the page load

sleep(1)

In [61]:
# Click to show the full image

browser.click_link_by_partial_text('FULL IMAGE')

In [62]:
# Sleep to let the page load

sleep(2)

In [63]:
# click more info to get to the biggest version of the image

browser.find_link_by_text('more info     ').first.click()

In [64]:
# Sleep to let the page load

sleep(1)

In [65]:
# Html and BeautifulSoup again since you changed pages

html = browser.html
soup = BeautifulSoup(html)

In [66]:
# Sleep to let the page load

sleep(1)

In [67]:
# Find where the image is kept in the code

fig_img = soup.find('figure', class_='lede')

In [68]:
# Find the relative image path

relative_image_path = fig_img.find('a')["href"]

In [69]:
# Combine the base and relative url to get the image url

featured_image_url = feat_image_url + relative_image_path

# Mars Weather

* Visit the Mars Weather twitter account [here](https://twitter.com/marswxreport?lang=en) and scrape the latest Mars weather tweet from the page. Save the tweet text for the weather report as a variable called `mars_weather`.

In [70]:
# Define the url and visit the url via splinter

url = 'https://twitter.com/marswxreport?lang=en'
browser.visit(url)

In [71]:
# Sleep to let page load

sleep(1)

In [72]:
# Html and BeautifulSoup

html = browser.html
soup = BeautifulSoup(html)

In [73]:
# Find the div of the first tweet

first_tweet_div = soup.find('div', class_='js-tweet-text-container')

In [74]:
# Pull the text from the tweet

mars_weather = first_tweet_div.find('p').text

In [75]:
# Remove the last 26 characters from the text, which is a pic

mars_weather = mars_weather[:-26]

In [76]:
# Clean up the breaks and replace them with commas

mars_weather = mars_weather.replace('\n', ', ')

# Mars Facts

* Visit the Mars Facts webpage [here](https://space-facts.com/mars/) and use Pandas to scrape the table containing facts about the planet including Diameter, Mass, etc.

* Use Pandas to convert the data to a HTML table string.

In [77]:
# Define the url

url = 'https://space-facts.com/mars/'

In [78]:
# Pandas reads the tables on the site
tables = pd.read_html(url)

In [79]:
# Define the table wanted as df

df = tables[0]
df.columns = ['Description', 'Value']
df.set_index('Description', inplace=True)

In [80]:
# Transform the df to an html table

mars_html_table = df.to_html()

In [81]:
# Clean the breaks out of the html table

mars_html_table = mars_html_table.replace('\n', '')

# Mars Hemispheres

* Visit the USGS Astrogeology site [here](https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars) to obtain high resolution images for each of Mar's hemispheres.

* You will need to click each of the links to the hemispheres in order to find the image url to the full resolution image.

* Save both the image url string for the full resolution hemisphere image, and the Hemisphere title containing the hemisphere name. Use a Python dictionary to store the data using the keys `img_url` and `title`.

* Append the dictionary with the image url string and the hemisphere title to a list. This list will contain one dictionary for each hemisphere.

In [82]:
# Define the url and visit the url via splinter

url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
browser.visit(url)

In [83]:
# Sleep to let page load

sleep(1)

In [84]:
#Html and BeautifulSoup

html = browser.html
soup = BeautifulSoup(html)

In [85]:
# Loop the page to get the hemisphere titles

hemispheres = []
for h3 in soup.find_all('h3'):
    hemispheres.append(h3.get_text())

In [86]:
# Define an empty list to store the 4 dictionaries of titles and image urls

hemisphere_image_urls = []

# Loop the hemispher titles and get the image urls for each

for hemisphere in hemispheres:
    
    browser.click_link_by_partial_text(hemisphere)
    
    sleep(2)
    
    html = browser.html
    soup = BeautifulSoup(html)
    
    div = soup.find('div', class_='downloads')
    
    list_1 = div.find('li')
    
    pic_url = list_1.a['href']
    
    hemisphere_image_urls.append({'title': hemisphere, 'img_url': pic_url})
    
    sleep(2)
        
    browser.back()

In [87]:
# Verify the list of dictionaries

hemisphere_image_urls

[{'title': 'Cerberus Hemisphere Enhanced',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg'},
 {'title': 'Schiaparelli Hemisphere Enhanced',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg'},
 {'title': 'Syrtis Major Hemisphere Enhanced',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg'},
 {'title': 'Valles Marineris Hemisphere Enhanced',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg'}]

In [89]:
mars_data = {
        "news_title": news_title,
        "news_p": news_p,
        "featured_image_url": featured_image_url,
        "mars_weather": mars_weather,
        "mars_facts": mars_html_table,
        "hemisphere_image_urls": hemisphere_image_urls
    }

mars_data

{'news_title': "Curiosity Tastes First Sample in 'Clay-Bearing Unit'",
 'news_p': 'Scientists working with NASA\'s Curiosity Mars rover have been excited to explore a region called "the clay-bearing unit" since before the spacecraft launched. Now, the rover has finally tasted its first sample from this part of Mount Sharp. Curiosity drilled a piece of bedrock nicknamed "Aberlady" on Saturday, April 6 (the 2,370th Martian day, or sol, of the mission), and delivered the sample to its internal mineralogy lab on Wednesday, April 10 (Sol 2374).',
 'featured_image_url': 'https://www.jpl.nasa.gov/spaceimages/images/largesize/PIA19048_hires.jpg',
 'mars_weather': 'InSight sol 135 (2019-04-13) low -96.5ºC (-141.8ºF) high -16.6ºC (2.2ºF), winds from the SW at 4.2 m/s (9.4 mph) gusting to 11.3 m/s (25.3 mph), pressure at 7.30 hPa',
 'mars_facts': '<table border="1" class="dataframe">  <thead>    <tr style="text-align: right;">      <th></th>      <th>Value</th>    </tr>    <tr>      <th>Descripti

In [88]:
# Close the browser

browser.quit()