In [1]:
# import dependencies
from bs4 import BeautifulSoup as bs
from splinter import Browser
from splinter.exceptions import ElementDoesNotExist
import time
import pandas as pd

In [2]:
# connect to chromedriver
executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)

### NASA Mars News

In [3]:
# navigate to NASA's Mars Exploration Program news site
url_news = 'https://mars.nasa.gov/news'
browser.visit(url_news)

time.sleep(2)

In [4]:
# scrape page
html_news = browser.html
soup_news = bs(html_news, 'html.parser')

time.sleep(2)

# print(soup_news.prettify())

In [5]:
# find latest news title
news_title = soup_news.find_all('div', class_='content_title')[0].text
news_title

"NASA's Mars 2020 Rover Completes Its First Drive"

In [6]:
# find corresponding paragraph text
news_p = soup_news.find_all('div', class_='article_teaser_body')[0].text
news_p

'In a 10-plus-hour marathon, the rover steered, turned and drove in 3-foot (1-meter) increments over small ramps.'

### JPL Mars Space Images - Featured Image

In [7]:
# visit JPL Featured Space Image page
url_image = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
browser.visit(url_image)

time.sleep(2)

In [8]:
# navigate to full image version of featured image
browser.click_link_by_id('full_image')

time.sleep(3)

# browser.click_link_by_partial_href('details')
browser.click_link_by_partial_text('more info')

time.sleep(2)

In [9]:
# scrape page
html_image = browser.html
soup_image = bs(html_image, 'html.parser')

# print(soup_image.prettify())

In [10]:
# find featured image source url
partial_image_url = soup_image.find_all('div', class_='download_tiff')[1].find('a')['href']

featured_image_url = 'https:' + partial_image_url

featured_image_url

'https://photojournal.jpl.nasa.gov/jpeg/PIA17832.jpg'

### Mars Weather

In [11]:
# visit Mars Weather twitter page
url_weather = 'https://twitter.com/marswxreport'
browser.visit(url_weather)

time.sleep(2)

In [12]:
# scrape page
html_weather = browser.html
soup_weather = bs(html_weather, 'html.parser')

# print(soup_weather.prettify())

In [13]:
# find latest weather report
tweets = soup_weather.find_all('p', class_='tweet-text')

tweets_list = []

for tweet in tweets:
    if tweet.text.split()[0] == 'InSight':
        if tweet.find('a'):
            pic_url = tweet.find('a').text
            full_string = tweet.text
            tweet_string = full_string.replace(pic_url, '')
            tweets_list.append(tweet_string)
        else:
            tweets_list.append(tweet.text)

try:
    mars_weather = tweets_list[0].replace('\n', ', ')
    mars_weather = mars_weather.replace('InSight s', 'S')
    mars_weather = mars_weather.replace(') low', '), low')
    mars_weather = mars_weather.replace(') high', '), high')
except:
    pass
    
mars_weather

'Sol 378 (2019-12-19), low -97.7ºC (-143.8ºF), high -20.0ºC (-3.9ºF), winds from the SSE at 5.7 m/s (12.8 mph) gusting to 23.8 m/s (53.2 mph), pressure at 6.50 hPa'

### Mars Facts

In [14]:
# # visit Mars Facts page
# url_facts = 'https://space-facts.com/mars/'
# browser.visit(url_facts)

# time.sleep(2)

In [15]:
# # scrape page
# html_facts = browser.html
# soup_facts = bs(html_facts, 'html.parser')

# # print(soup_facts.prettify())

In [16]:
# mars_facts_dict = {}

# table = soup_facts.find('table', id='tablepress-p-mars-no-2')
# entries = table.find_all('tr')

# for entry in entries:
#     key = entry.find_all('td')[0].text.replace(':','')
#     value = entry.find_all('td')[1].text
#     mars_facts_dict[key] = value

# mars_facts = pd.DataFrame([mars_facts_dict])
# mars_facts = mars_facts.transpose()
# mars_facts.reset_index(inplace=True)
# mars_facts.columns = ['Description','Value']
# mars_facts.set_index('Description', inplace=True)
# mars_facts

In [17]:
# url for Mars Facts page
url_facts = 'https://space-facts.com/mars/'

In [18]:
# create dataframe containing facts
tables = pd.read_html(url_facts)

mars_facts = tables[0]
mars_facts.columns = ['Description','Value']
mars_facts = mars_facts.set_index('Description')
mars_facts

Unnamed: 0_level_0,Value
Description,Unnamed: 1_level_1
Equatorial Diameter:,"6,792 km"
Polar Diameter:,"6,752 km"
Mass:,6.39 × 10^23 kg (0.11 Earths)
Moons:,2 (Phobos & Deimos)
Orbit Distance:,"227,943,824 km (1.38 AU)"
Orbit Period:,687 days (1.9 years)
Surface Temperature:,-87 to -5 °C
First Record:,2nd millennium BC
Recorded By:,Egyptian astronomers


### Mars Hemispheres

In [19]:
# visit USGS Astrogeology page
url_hemisphere = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
browser.visit(url_hemisphere)

time.sleep(2)

In [20]:
# scrape page
html_hemisphere = browser.html
soup_hemisphere = bs(html_hemisphere, 'html.parser')

# print(soup_hemisphere.prettify())

In [21]:
# find image titles and urls
hemispheres = soup_hemisphere.find_all('div', class_='description')

hemisphere_image_urls = []

hemispheres

for hemisphere in hemispheres:
    
    hemisphere_dict = {}
    
    title = hemisphere.find('a').text
    print(title)
    
    url = 'https://astrogeology.usgs.gov' + hemisphere.find('a')['href']
    browser.visit(url)
    time.sleep(2)
    
    html = browser.html
    soup = bs(html, 'html.parser')
    full_image = soup.find('img',class_='wide-image')['src']
    image_url = 'https://astrogeology.usgs.gov' + full_image
    print(f'{image_url}\n')
    
    hemisphere_dict['title'] = title
    hemisphere_dict['image_url'] = image_url
    
    hemisphere_image_urls.append(hemisphere_dict)
    
    browser.back()
    time.sleep(2)

hemisphere_image_urls

Cerberus Hemisphere Enhanced
https://astrogeology.usgs.gov/cache/images/cfa62af2557222a02478f1fcd781d445_cerberus_enhanced.tif_full.jpg

Schiaparelli Hemisphere Enhanced
https://astrogeology.usgs.gov/cache/images/3cdd1cbf5e0813bba925c9030d13b62e_schiaparelli_enhanced.tif_full.jpg

Syrtis Major Hemisphere Enhanced
https://astrogeology.usgs.gov/cache/images/ae209b4e408bb6c3e67b6af38168cf28_syrtis_major_enhanced.tif_full.jpg

Valles Marineris Hemisphere Enhanced
https://astrogeology.usgs.gov/cache/images/7cf2da4bf549ed01c17f206327be4db7_valles_marineris_enhanced.tif_full.jpg



[{'title': 'Cerberus Hemisphere Enhanced',
  'image_url': 'https://astrogeology.usgs.gov/cache/images/cfa62af2557222a02478f1fcd781d445_cerberus_enhanced.tif_full.jpg'},
 {'title': 'Schiaparelli Hemisphere Enhanced',
  'image_url': 'https://astrogeology.usgs.gov/cache/images/3cdd1cbf5e0813bba925c9030d13b62e_schiaparelli_enhanced.tif_full.jpg'},
 {'title': 'Syrtis Major Hemisphere Enhanced',
  'image_url': 'https://astrogeology.usgs.gov/cache/images/ae209b4e408bb6c3e67b6af38168cf28_syrtis_major_enhanced.tif_full.jpg'},
 {'title': 'Valles Marineris Hemisphere Enhanced',
  'image_url': 'https://astrogeology.usgs.gov/cache/images/7cf2da4bf549ed01c17f206327be4db7_valles_marineris_enhanced.tif_full.jpg'}]