In [9]:
from bs4 import BeautifulSoup as bs
from splinter import Browser
import pandas as pd
import requests
import time

### NASA Mars News

In [10]:
#Use splinter to navigate to Mars site
executable_path = {'executable_path': 'c:/chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)

url_news = "https://mars.nasa.gov/news/"
browser.visit(url_news)

#give time for dynamic page to load before python script captures page's html
time.sleep(2)

#Retrieve page html, create BeautifulSoup object and parse w/ html.parser
html_news = browser.html
soup_news = bs(html_news, 'html.parser')

#grab parent slide containing latest news title and paragraph description
title_text = soup_news.find('li', class_='slide')

In [11]:
#Extract news title from parent beautiful soup object
news_title = title_text.h3.text
news_title

'NASA, ULA Launch Mars 2020 Perseverance Rover Mission to Red Planet'

In [12]:
#Extract paragraph from parent beautiful soup object
news_p = title_text.a.text
news_p

"The agency's Mars 2020 mission is on its way. It will land at Jezero Crater in about seven months, on Feb. 18, 2021. NASA, ULA Launch Mars 2020 Perseverance Rover Mission to Red Planet"

### JPL Mars Space Images - Featured Image

In [13]:
#Use splinter to navigate the site and fnid the image url for the current Featured Mars Image

executable_path = {'executable_path': 'c:/chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)

#visit URL for JPL Featured Space Image

url = "https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars"
browser.visit(url)

#navigate to featured image page with large res
browser.click_link_by_partial_text('FULL IMAGE')
browser.click_link_by_partial_text('more info')

#get html on featured image page
html = browser.html
soup_img = bs(html, 'html.parser')

#scrape out url for featured image
image_url = soup_img.find('figure',class_="lede").a['href']

#concatenate image_url extension to JPL base url
base_url = 'https://www.jpl.nasa.gov'
featured_image_url = base_url + image_url
featured_image_url



'https://www.jpl.nasa.gov/spaceimages/images/largesize/PIA17462_hires.jpg'

### Mars Weather

In [14]:
executable_path = {'executable_path': 'c:/chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)

url = "https://twitter.com/MarsWxReport?lang=en"
browser.visit(url)

time.sleep(2)

html = browser.html
soup_mars = bs(html, 'html.parser')

tweet_parent = soup_mars.find_all('article', attrs={'role':'article'})

for tweet in tweet_parent[:1]:
    weather_tweet = tweet.find('div', attrs={'data-testid':'tweet'}).\
                          find('div',class_="css-901oao r-jwli3a r-1qd0xha r-a023e6 r-16dba41 r-ad9z0x r-bcqeeo r-bnwqim r-qvutc0").\
                          find('span', class_="css-901oao css-16my406 r-1qd0xha r-ad9z0x r-bcqeeo r-qvutc0").text

weather_tweet

'InSight sol 595 (2020-07-29) low -91.9ºC (-133.5ºF) high -12.9ºC (8.7ºF)\nwinds from the WNW at 7.0 m/s (15.6 mph) gusting to 18.2 m/s (40.7 mph)\npressure at 7.90 hPa'

### Mars Facts

In [15]:
#url for Mars Facts tables
url = 'https://space-facts.com/mars/'

In [16]:
#read in tabular data from mars space facts website 
mars_tables = pd.read_html(url)

#extract first table with indexing to derive the specific mars facts table we want
mars_profile_df = mars_tables[0]

#rename column headers
mars_profile_df.columns = ['Description','Value']
mars_profile_df

Unnamed: 0,Description,Value
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.39 × 10^23 kg (0.11 Earths)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.38 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-87 to -5 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [17]:
#convert df to html
html_mars_table = mars_profile_df.to_html(index=False)

### Mars Hemispheres

In [18]:
#create list of partial hemisphere names to use as partial text for link clicking in 'for' loop 
#to extract info, add to dict, and append to dictionary list
hemispheres = ['Cerberus','Schiaparelli','Syrtis Major','Valles Marineris']

#list for dictionary of hemisphere titles/img urls
hemisphere_image_urls = []

#use splinter for navigating/retrieving site info
executable_path = {'executable_path': 'c:/chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)

for hemisphere in hemispheres:
    
    #dict for hemisphere title and image url 
    hemi_dict={}
    
    #browser visit site
    url_hemispheres = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
    browser.visit(url_hemispheres)    
    
    #click link to go to each hemispheres page with larger image url and full hemisphere title
    browser.click_link_by_partial_text(hemisphere)
    
    #sleep to allow full page load and all html to be captured
    time.sleep(2)
    
    #create beautiful soup object to then parse
    html = browser.html
    soup_hemi = bs(html, 'html.parser')
    
    #retrieve hemisphere url and title from their respective tags
    hemi_image_url = soup_hemi.find('li').a['href']
    hemi_title = soup_hemi.find('h2', class_="title").text
    
    #add info to dictionary
    hemi_dict['title'] = hemi_title
    hemi_dict['img_url'] = hemi_image_url
    
    #append dictionary to list of titles/urls dicts
    hemisphere_image_urls.append(hemi_dict)
    
    
hemisphere_image_urls



[{'title': 'Cerberus Hemisphere Enhanced',
  'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg'},
 {'title': 'Schiaparelli Hemisphere Enhanced',
  'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg'},
 {'title': 'Syrtis Major Hemisphere Enhanced',
  'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg'},
 {'title': 'Valles Marineris Hemisphere Enhanced',
  'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg'}]