In [1]:
# import dependencies
from bs4 import BeautifulSoup
from splinter import Browser
import requests
import os
import time
from selenium import webdriver

In [2]:
# use Selenium to scrape html from Nasa's Mars homepage
chromedriver = 'chromedriver.exe'
os.environ["webdriver.chrome.driver"] = chromedriver
driver = webdriver.Chrome(chromedriver)
driver.get('https://mars.nasa.gov/news/?page=0&per_page=40&order=publish_date+desc%2Ccreated_at+desc&search=&category=19%2C165%2C184%2C204&blank_scope=Latest')
time.sleep(5)
html = driver.page_source
soup = BeautifulSoup(html, 'lxml')

In [3]:
# find article headers by searching by class
newsTitle = soup.find('div', class_='content_title')
title = newsTitle.text
newsTeaser = soup.find('div', class_='article_teaser_body')
teaser = newsTeaser.text
print(title + " - " + teaser)

NASA Sets Sights on May 5 Launch of InSight to Mars - NASA’s next mission to Mars, InSight, is scheduled to launch Saturday, May 5, on a first-ever mission to study the heart of the Red Planet.


In [4]:
executable_path = {'executable_path': './chromedriver'}
browser = Browser('chrome', **executable_path)
url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
browser.visit(url)

In [5]:
browser.click_link_by_id('full_image')

In [6]:
#locate featured image in browser html
html = browser.html
soup = BeautifulSoup(html, 'lxml')
topImage = soup.find('img', class_='fancybox-image')
topImage

<img class="fancybox-image" src="/spaceimages/images/mediumsize/PIA03519_ip.jpg" style="display: inline;"/>

In [7]:
# generate string for featured image URL
topImageUrl = "https://www.jpl.nasa.gov" + topImage['src']
print("Featured Image URL: "+topImageUrl)

Featured Image URL: https://www.jpl.nasa.gov/spaceimages/images/mediumsize/PIA03519_ip.jpg


In [8]:
# pull text from latest tweet
browser.visit("https://twitter.com/marswxreport?lang=en")
html = browser.html
soup = BeautifulSoup(html, 'lxml')
latestWeather = soup.find('p', class_='TweetTextSize TweetTextSize--normal js-tweet-text tweet-text').text
latestWeather

'Sol 2033 (April 25, 2018), Sunny, high -10C/14F, low -71C/-95F, pressure at 7.23 hPa, daylight 05:24-17:20'

In [10]:
# read stat table from Mars fact website
import pandas as pd
marsTables = pd.read_html("https://space-facts.com/mars/")
marsTables

[                      0                              1
 0  Equatorial Diameter:                       6,792 km
 1       Polar Diameter:                       6,752 km
 2                 Mass:  6.42 x 10^23 kg (10.7% Earth)
 3                Moons:            2 (Phobos & Deimos)
 4       Orbit Distance:       227,943,824 km (1.52 AU)
 5         Orbit Period:           687 days (1.9 years)
 6  Surface Temperature:                  -153 to 20 °C
 7         First Record:              2nd millennium BC
 8          Recorded By:           Egyptian astronomers]

In [11]:
# make dataframe of results, save as html
df = marsTables[0]
df.columns = ["","Stat"]
df.set_index('', inplace=True)
df.to_html('marsTable.html')

In [12]:
# select hemisphere items from homepage
browser.visit("https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars")
html = browser.html
soup = BeautifulSoup(html, 'lxml')
hemis = soup.find_all('div', class_='item')
hemis

[<div class="item"><a class="itemLink product-item" href="/search/map/Mars/Viking/cerberus_enhanced"><img alt="Cerberus Hemisphere Enhanced thumbnail" class="thumb" src="/cache/images/dfaf3849e74bf973b59eb50dab52b583_cerberus_enhanced.tif_thumb.png"/></a><div class="description"><a class="itemLink product-item" href="/search/map/Mars/Viking/cerberus_enhanced"><h3>Cerberus Hemisphere Enhanced</h3></a><span class="subtitle" style="float:left">image/tiff 21 MB</span><span class="pubDate" style="float:right"></span><br/><p>Mosaic of the Cerberus hemisphere of Mars projected into point perspective, a view similar to that which one would see from a spacecraft. This mosaic is composed of 104 Viking Orbiter images acquired…</p></div> <!-- end description --></div>,
 <div class="item"><a class="itemLink product-item" href="/search/map/Mars/Viking/schiaparelli_enhanced"><img alt="Schiaparelli Hemisphere Enhanced thumbnail" class="thumb" src="/cache/images/7677c0a006b83871b5a2f66985ab5857_schiapa

In [23]:
# for each item in homepage, follow its link to retrieve the full-size jpg url of the hemisphere image
hemiPics = []
for hemi in hemis:
    a = hemi.find('a', class_= 'itemLink product-item')
    url = 'https://astrogeology.usgs.gov'+a['href']
    name = hemi.find('h3').text
    browser.visit(url)
    
    html = browser.html
    picSoup = BeautifulSoup(html, 'lxml')
    fullPic = picSoup.find('img', class_='wide-image')
    picURL = 'https://astrogeology.usgs.gov' + fullPic['src']
    picPair = {'name':name, 'img_url':picURL}
    hemiPics.append(picPair)
hemiPics

[{'img_url': 'https://astrogeology.usgs.gov/cache/images/cfa62af2557222a02478f1fcd781d445_cerberus_enhanced.tif_full.jpg',
  'name': 'Cerberus Hemisphere Enhanced'},
 {'img_url': 'https://astrogeology.usgs.gov/cache/images/3cdd1cbf5e0813bba925c9030d13b62e_schiaparelli_enhanced.tif_full.jpg',
  'name': 'Schiaparelli Hemisphere Enhanced'},
 {'img_url': 'https://astrogeology.usgs.gov/cache/images/ae209b4e408bb6c3e67b6af38168cf28_syrtis_major_enhanced.tif_full.jpg',
  'name': 'Syrtis Major Hemisphere Enhanced'},
 {'img_url': 'https://astrogeology.usgs.gov/cache/images/7cf2da4bf549ed01c17f206327be4db7_valles_marineris_enhanced.tif_full.jpg',
  'name': 'Valles Marineris Hemisphere Enhanced'}]