In [1]:
# import dependencies
import pandas as pd
from bs4 import BeautifulSoup as bs
from splinter import Browser

In [2]:
executable_path = {'executable_path': '../chromedriver'}
browser = Browser('chrome', **executable_path, headless=False)

In [3]:
# saving urls of all sites to be scraped
news_url = 'https://mars.nasa.gov/news/?page=0&per_page=40&order=publish_date+desc%2Ccreated_at+desc&search=&category=19%2C165%2C184%2C204&blank_scope=Latest'
images_url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
weather_url = 'https://twitter.com/marswxreport?lang=en'
facts_url = 'https://space-facts.com/mars/'
astro_url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'

### NASA Mars News

In [6]:
browser.visit(news_url)
news_soup = bs(browser.html, 'html.parser')
news_title = news_soup.find_all('div', class_='content_title')[0].text
news_p = news_soup.find_all('div', class_='article_teaser_body')[0].text
print(news_title)
print(news_p)

Small Satellite Mission of the Year
The first interplanetary CubeSats were recognized by the engineering community with the 2019 Small Satellite Mission of the Year award.


### JPL Mars Space Images - Featured Image

In [8]:
# find featured_image_url
image_base_url = 'https://www.jpl.nasa.gov'
browser.visit(images_url)
browser.click_link_by_partial_text('FULL IMAGE')
image_soup = bs(browser.html, 'html.parser')
img = image_soup.find('img', class_='fancybox-image')['src']

# combining base url with incomplete url from scraped featured img src
featured_image_url = image_base_url + img
featured_image_url

'https://www.jpl.nasa.gov/spaceimages/images/mediumsize/PIA19382_ip.jpg'

### Mars Weather

In [9]:
# scrape latest weather tweet
browser.visit(weather_url)
weather_soup = bs(browser.html, 'html.parser')
latest_weather_tweet = weather_soup.find('p', class_='TweetTextSize').text
latest_weather_tweet

'InSight sol 250 (2019-08-10) low -100.0ºC (-148.1ºF) high -26.2ºC (-15.1ºF)\nwinds from the SSE at 4.4 m/s (9.8 mph) gusting to 16.2 m/s (36.2 mph)\npressure at 7.60 hPapic.twitter.com/9sZRRUi3dm'

### Mars Facts

In [10]:
# reading html into a dataframe
facts_table = pd.read_html(facts_url)
mars_table = facts_table[1]

# renaming columns
facts_mapping = {0:'Specifications', 1:'Measurements'}
mars_table = mars_table.rename(columns=facts_mapping)

# saving as html table format
mars_table.to_html('mars_table.html', index=False)

### Mars Hemispheres

In [11]:
# scrape hemisphere images
browser.visit(astro_url)
astro_soup = bs(browser.html, 'html.parser')
astro_base_url = "https://astrogeology.usgs.gov"

hemi_image_list = []

title_list = []
url_list = []

div_list = astro_soup.find_all('a', class_='itemLink')
for link in div_list:
    image_url = link.get('href')
    if image_url not in hemi_image_list:
        hemi_image_list.append(image_url)
        browser.visit(astro_base_url + image_url)
        title_list.append(browser.find_by_tag('h2').text)
        browser.find_link_by_text('Sample').click()
        
for i in range(4,0,-1):
    url_list.append(browser.windows[i].url)

In [12]:
featured_hemisphere_list = [{'title': title_list[0], 'img_url': url_list[0]},
                            {'title': title_list[1], 'img_url': url_list[1]},
                            {'title': title_list[2], 'img_url': url_list[2]},
                            {'title': title_list[3], 'img_url': url_list[3]}]
featured_hemisphere_list

[{'title': 'Cerberus Hemisphere Enhanced',
  'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg'},
 {'title': 'Schiaparelli Hemisphere Enhanced',
  'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg'},
 {'title': 'Syrtis Major Hemisphere Enhanced',
  'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg'},
 {'title': 'Valles Marineris Hemisphere Enhanced',
  'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg'}]