In [1]:
# Dependencies
from bs4 import BeautifulSoup as bs
from splinter import Browser
from splinter.exceptions import ElementDoesNotExist
import pandas as pd
import requests
import os
import time

# Step 1 - Scraping

In [2]:
# URLs of pages to be scraped
nasa_url = 'https://mars.nasa.gov/news/'
jpl_url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
mars_weather_url = 'https://twitter.com/marswxreport?lang=en'
mars_facts_url = 'https://space-facts.com/mars/'
usgs_url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'

# start browser for splinter
executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)

### NASA Mars News
Scrape the NASA Mars News Site and collect the latest News Title and Paragraph Text. Assign the text to variables that you can reference later.

In [13]:
# Visit the NASA Website
browser.visit(nasa_url)
browser.is_element_present_by_css("div.article_teaser_body", wait_time=1)

html = browser.html
soup_nasa = bs(html, 'html.parser')

In [4]:
news_title = soup_nasa.find("div", class_="content_title").text
news_title

"Things Are Stacking up for NASA's Mars 2020 Spacecraft"

In [5]:
news_p = soup_nasa.find('div', class_='article_teaser_body').text
news_p


'As the July 2020 launch date inches closer, the next spacecraft headed to the Red Planet is assembled for more testing.'

### Mars Space Images - Featured Image

In [6]:
# Visit the url for JPL Featured Space Image
browser.visit(jpl_url)
url1 = 'https://www.jpl.nasa.gov'

In [7]:
# Use splinter to navigate the site and find the image url for the current Featured Mars Image 
# and assign the url string to a variable called featured_image_url.
html_jpl = browser.html
soup_jpl = bs(html_jpl, 'html.parser')
url2 = soup_jpl.find('article', class_="carousel_item")['style'].split("'")[1]
featured_image_url = url1 + url2
featured_image_url

'https://www.jpl.nasa.gov/spaceimages/images/wallpaper/PIA17356-1920x1200.jpg'

### Mars Weather
Visit the Mars Weather twitter account and scrape the latest Mars weather tweet from the page. Save the tweet text for the weather report as a variable called mars_weather.

In [8]:
browser.visit(mars_weather_url)
html_mw = browser.html
soup_mw = bs(html_mw, 'html.parser')
mars_weather = soup_mw.find("p", class_="TweetTextSize TweetTextSize--normal js-tweet-text tweet-text").text
mars_weather

'InSight sol 141 (2019-04-20) low -98.3ºC (-144.9ºF) high -19.7ºC (-3.5ºF)\nwinds from the SW at 4.7 m/s (10.6 mph) gusting to 12.9 m/s (28.8 mph)\npressure at 7.40 hPapic.twitter.com/CQr1QQt3cM'

### Mars Facts

In [9]:
# Visit the Mars Facts webpage and use Pandas to scrape the table containing facts about the planet 
# including Diameter, Mass, etc.
browser.visit(mars_facts_url)
table_df = pd.read_html(browser.url)[0]
table_df = table_df.rename(columns={0:'Fact', 1:'Value'})
table_df

Unnamed: 0,Fact,Value
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.42 x 10^23 kg (10.7% Earth)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.52 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-153 to 20 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [10]:
# Use Pandas to convert the data to a HTML table string.
html_table = table_df.to_html('mars_facts.html')
html_table

### Mars Hemispheres

- Visit the USGS Astrogeology site to obtain high resolution images for each of Mar's hemispheres.
- You will need to click each of the links to the hemispheres in order to find the image url to the full resolution image.
- Save both the image url string for the full resolution hemisphere image, and the Hemisphere title containing the hemisphere name. Use a Python dictionary to store the data using the keys img_url and title.
- Append the dictionary with the image url string and the hemisphere title to a list. This list will contain one dictionary for each hemisphere.

In [11]:
browser.visit(usgs_url)
html_hemi = browser.html
soup_hemi = bs(html_hemi, 'html.parser')

In [12]:
links = []
hemi_dict = []
url = 'https://astrogeology.usgs.gov'

description = soup_hemi.find_all("div", class_="description")

for a in description:
    links.append(a.find('a')['href'])
    

for link in links:
    # browser.find_link_by_href(link).last.click()
    browser.visit(url+link)
    soup = bs(browser.html, 'html.parser')
    title = soup.find("h2", class_="title").text
    img_url = url + soup.find("img", class_="wide-image")['src']
    hemi_dict.append({'title':title, 'img_url':img_url}) 

hemi_dict

[{'title': 'Cerberus Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/cfa62af2557222a02478f1fcd781d445_cerberus_enhanced.tif_full.jpg'},
 {'title': 'Schiaparelli Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/3cdd1cbf5e0813bba925c9030d13b62e_schiaparelli_enhanced.tif_full.jpg'},
 {'title': 'Syrtis Major Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/ae209b4e408bb6c3e67b6af38168cf28_syrtis_major_enhanced.tif_full.jpg'},
 {'title': 'Valles Marineris Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/7cf2da4bf549ed01c17f206327be4db7_valles_marineris_enhanced.tif_full.jpg'}]