In [97]:
import time
import requests
import pandas as pd
from splinter import Browser
from bs4 import BeautifulSoup
from selenium import webdriver
from difflib import SequenceMatcher
from selenium.webdriver.common.keys import Keys

## Scraping NASA Website for Recent Mars Story

In [2]:
article_url = 'https://mars.nasa.gov/news/?page=0&per_page=40&order=publish_date+desc%2Ccreated_at+desc&search=&category=19%2C165%2C184%2C204&blank_scope=Latest'

In [3]:
response = requests.get(article_url)

In [4]:
soup = BeautifulSoup(response.text, 'html.parser')

In [5]:
print(soup.prettify())

<!DOCTYPE html>
<!--[if lte IE 9]> <p class="browsehappy">You are using an <strong>outdated</strong> browser. Please <a href="http://browsehappy.com/">upgrade your browser</a> to improve your experience.</p> <![endif]-->
<html lang="en" xml:lang="en" xmlns="http://www.w3.org/1999/xhtml">
 <head>
  <meta content="text/html; charset=utf-8" http-equiv="Content-Type"/>
  <!-- Always force latest IE rendering engine or request Chrome Frame -->
  <meta content="IE=edge,chrome=1" http-equiv="X-UA-Compatible"/>
  <script type="text/javascript">
   window.NREUM||(NREUM={});NREUM.info={"beacon":"bam.nr-data.net","errorBeacon":"bam.nr-data.net","licenseKey":"5e33925808","applicationID":"59562082","transactionName":"JVcPR0MLWApSRU1eAQVVEhxSC1oSUlkWbBMHXwRAHhdcCUA=","queueTime":0,"applicationTime":163,"agent":""}
  </script>
  <script type="text/javascript">
   (window.NREUM||(NREUM={})).loader_config={xpid:"VQcPUlZTDxAFXVRUBQEPVA=="};window.NREUM||(NREUM={}),__nr_require=function(t,n,e){function r

### News Title Function

In [6]:
title_results = soup.find_all('div', class_="content_title")

In [7]:
news_titles = []

for result in title_results:
    title_text = result.text.strip()
    news_titles.append(title_text)

In [8]:
news_titles

['NASA Invests in Visionary Technology',
 'NASA is Ready to Study the Heart of Mars',
 'NASA Briefing on First Mission to Study Mars Interior',
 "New 'AR' Mobile App Features 3-D NASA Spacecraft",
 'Witness First Mars Launch from West Coast',
 'NASA InSight Mission to Mars Arrives at Launch Site']

In [9]:
news_title = news_titles[0]
news_title

'NASA Invests in Visionary Technology'

In [10]:
def find_latest_news_title(article_url):
    """Returns the latest News Article from the article url provided"""
    
    response = requests.get(article_url)
    
    soup = BeautifulSoup(response.text, 'html.parser')
    
    title_results = soup.find_all('div', class_="content_title")
    
    news_titles = []

    for result in title_results:
        title_text = result.text.strip()
        news_titles.append(title_text)
    
    news_title = news_titles[0]
    return news_title

In [11]:
find_latest_news_title(article_url)

'NASA Invests in Visionary Technology'

### News Article Description Function

In [12]:
description_results = soup.find_all('div', class_="rollover_description_inner")

In [13]:
news_descriptions = []

for result in description_results:
    description_text = result.text.strip()
    news_descriptions.append(description_text)

In [14]:
news_descriptions

['NASA is investing in technology concepts, including several from JPL, that may one day be used for future space exploration missions.',
 'NASA is about to go on a journey to study the center of Mars.',
 'NASA’s next mission to Mars will be the topic of a media briefing Thursday, March 29, at JPL. The briefing will air live on NASA Television and the agency’s website.',
 "NASA spacecraft travel to far-off destinations in space, but a new mobile app produced by NASA's Jet Propulsion Laboratory, Pasadena, California, brings spacecraft to users.",
 "NASA invites digital creators to apply for social media credentials to cover the launch of the InSight mission to Mars, May 3-5, at California's Vandenberg Air Force Base.",
 "NASA's InSight spacecraft has arrived at Vandenberg Air Force Base in central California to begin final preparations for a launch this May."]

In [15]:
news_description = news_descriptions[0]
news_description

'NASA is investing in technology concepts, including several from JPL, that may one day be used for future space exploration missions.'

In [16]:
def find_latest_news_description(article_url):
    """Returns a description of the latest news article from the url provided"""
    
    response = requests.get(article_url)
    
    soup = BeautifulSoup(response.text, 'html.parser')
    
    description_results = soup.find_all('div', class_="rollover_description_inner")
    
    news_descriptions = []

    for result in description_results:
        description_text = result.text.strip()
        news_descriptions.append(description_text)
        
    news_description = news_descriptions[0]
    
    return news_description

In [17]:
find_latest_news_description(article_url)

'NASA is investing in technology concepts, including several from JPL, that may one day be used for future space exploration missions.'

### JPL Mars Space Images - Featured Image

In [18]:
image_url = "https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars"

In [19]:
!which chromedriver
executable_path = {'executable_path': '/usr/local/bin/chromedriver'}
browser = Browser('chrome', **executable_path, headless=False)
browser.visit(image_url)

/usr/local/bin/chromedriver


In [20]:
def find_feature_image():
    """Returns feature image url from NASA's website"""
    html = browser.html
    soup = BeautifulSoup(html, 'html.parser')

    feature_images = soup.find_all('article', class_='carousel_item')

    def similar(a, b):
        return SequenceMatcher(None, a, b).ratio()
    
    image_tags = []

    for item in feature_images:
        target_item = str(item.a)
        split_target = target_item.split(" ")
        image_tags.append(split_target)

    text_list = []
    score_list = []

    for y in image_tags[0]:

        similarity = similar(y, 'data-fancybox-href="/spaceimages/images/')

        text_list.append(y)
        score_list.append(similarity)

    target_url = str(text_list[score_list.index(max(score_list))])
    target_url_list = target_url.split('"')

    beg_url = 'https://www.jpl.nasa.gov'

    featured_image_url = beg_url + target_url_list[1]

    return featured_image_url

In [21]:
find_feature_image()

'https://www.jpl.nasa.gov/spaceimages/images/mediumsize/PIA17794_ip.jpg'

### Mars Weather Function

In [28]:
twitter_url = "https://twitter.com/marswxreport?lang=en"
response = requests.get(twitter_url)
soup = BeautifulSoup(response.text, 'html.parser')
twitter_results = soup.body.find_all('div', class_="timeline")

In [37]:
recent_tweets = []

for tweet in twitter_results:
    recent = tweet.find_all('div', class_="dir-ltr")
    
    for tweet_text in recent:
        recent_tweets.append(tweet_text.text.strip())

In [36]:
most_recent_weather_tweet = recent_tweets[0]

In [41]:
def find_most_recent_weather_tweet(twitter_url):
    """Returns most recent tweet about weather on Mars"""
    response = requests.get(twitter_url)
    soup = BeautifulSoup(response.text, 'html.parser')
    twitter_results = soup.body.find_all('div', class_="timeline")
    
    recent_tweets = []

    for tweet in twitter_results:
        recent = tweet.find_all('div', class_="dir-ltr")
        for tweet_text in recent:
            recent_tweets.append(tweet_text.text.strip())
            
    most_recent_weather_tweet = recent_tweets[0]
    
    return most_recent_weather_tweet

In [42]:
twitter_url = "https://twitter.com/marswxreport?lang=en"
find_most_recent_weather_tweet(twitter_url)

'Sol 2054 (May 17, 2018), Sunny, high 4C/39F, low -72C/-97F, pressure at 7.40 hPa, daylight 05:21-17:20'

### Mars Facts

In [45]:
mars_url = "https://space-facts.com/mars/"

In [60]:
mars_pd = pd.read_html(mars_url)
initial_mars_df = mars_pd[0]
renamed_mars_df = initial_mars_df.rename(columns={0 : 'Scientific Measures', 1 : 'Values'})
mars_df = renamed_mars_df.set_index('Scientific Measures')

In [64]:
mars_html_table = mars_df.to_html()
mars_html_table

'<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: right;">\n      <th></th>\n      <th>Values</th>\n    </tr>\n    <tr>\n      <th>Scientific Measures</th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>Equatorial Diameter:</th>\n      <td>6,792 km</td>\n    </tr>\n    <tr>\n      <th>Polar Diameter:</th>\n      <td>6,752 km</td>\n    </tr>\n    <tr>\n      <th>Mass:</th>\n      <td>6.42 x 10^23 kg (10.7% Earth)</td>\n    </tr>\n    <tr>\n      <th>Moons:</th>\n      <td>2 (Phobos &amp; Deimos)</td>\n    </tr>\n    <tr>\n      <th>Orbit Distance:</th>\n      <td>227,943,824 km (1.52 AU)</td>\n    </tr>\n    <tr>\n      <th>Orbit Period:</th>\n      <td>687 days (1.9 years)</td>\n    </tr>\n    <tr>\n      <th>Surface Temperature:</th>\n      <td>-153 to 20 °C</td>\n    </tr>\n    <tr>\n      <th>First Record:</th>\n      <td>2nd millennium BC</td>\n    </tr>\n    <tr>\n      <th>Recorded By:</th>\n      <td>Egyptian astronomers</td>\n

In [65]:
mars_html_table.replace('\n', '')

'<table border="1" class="dataframe">  <thead>    <tr style="text-align: right;">      <th></th>      <th>Values</th>    </tr>    <tr>      <th>Scientific Measures</th>      <th></th>    </tr>  </thead>  <tbody>    <tr>      <th>Equatorial Diameter:</th>      <td>6,792 km</td>    </tr>    <tr>      <th>Polar Diameter:</th>      <td>6,752 km</td>    </tr>    <tr>      <th>Mass:</th>      <td>6.42 x 10^23 kg (10.7% Earth)</td>    </tr>    <tr>      <th>Moons:</th>      <td>2 (Phobos &amp; Deimos)</td>    </tr>    <tr>      <th>Orbit Distance:</th>      <td>227,943,824 km (1.52 AU)</td>    </tr>    <tr>      <th>Orbit Period:</th>      <td>687 days (1.9 years)</td>    </tr>    <tr>      <th>Surface Temperature:</th>      <td>-153 to 20 °C</td>    </tr>    <tr>      <th>First Record:</th>      <td>2nd millennium BC</td>    </tr>    <tr>      <th>Recorded By:</th>      <td>Egyptian astronomers</td>    </tr>  </tbody></table>'

In [None]:
mars_df.to_html('mars_table.html')

### Mars Hemispheres

In [127]:
!which chromedriver
executable_path = {'executable_path': '/usr/local/bin/chromedriver'}
browser = Browser('chrome', **executable_path, headless=False)
hemispheres_url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
browser.visit(hemispheres_url)

/usr/local/bin/chromedriver


In [128]:
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

In [129]:
description_class = soup.find_all('div', class_='description')

hemisphere_images = []

for hemispheres in description_class:
    hemisphere_images.append(hemispheres.find('h3').text)

In [130]:
hemisphere_images

['Cerberus Hemisphere Enhanced',
 'Schiaparelli Hemisphere Enhanced',
 'Syrtis Major Hemisphere Enhanced',
 'Valles Marineris Hemisphere Enhanced']

In [133]:
# Cerberus Hemisphere Enhanced
start_url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'

for hemispheres_url in hemisphere_images:
    browser.visit(cerberus_url)
    cerberus_image = soup.body.find_all('div', class_='container')
    cerberus_image

[<div class="container">
 <form action="/search/results" class="bar widget block" id="search-bar">
 <input name="q" type="hidden" value="hemisphere-enhanced"/>
 <input name="target" type="hidden" value="Mars"/><input name="__ncforminfo" type="hidden" value="A47it6D6xZjBRJ2EhF67-te0N6nnvtQ_Qj9PWnmtos9gJToPci9PpIqhqKI5UjVKzebez9GtyG-GlFt0ARG1rj0rSS7w_7wTLu_UuYJBQDL3l97yCw0-QQ=="/></form><div class="full-content"> <section class="block" id="results-accordian">
 <div class="result-list" data-section="product" id="product-section">
 <div class="accordian">
 <h2>Products</h2>
 <span class="count">4 Results</span>
 <span class="collapse">Collapse</span>
 </div>
 <div class="collapsible results">
 <div class="item"><a class="itemLink product-item" href="/search/map/Mars/Viking/cerberus_enhanced"><img alt="Cerberus Hemisphere Enhanced thumbnail" class="thumb" src="/cache/images/dfaf3849e74bf973b59eb50dab52b583_cerberus_enhanced.tif_thumb.png"/></a><div class="description"><a class="itemLink pro

In [None]:
<img class="wide-image" src="/cache/images/cfa62af2557222a02478f1fcd781d445_cerberus_enhanced.tif_full.jpg">