In [1]:
from splinter import Browser
from bs4 import BeautifulSoup as bs
import pandas as pd
import time

In [2]:
browser = Browser('chrome')

scraped_data = {}

In [3]:
# Scrape Mars News
browser.visit("https://mars.nasa.gov/news/")
soup = bs(browser.html,'html.parser')
for li in soup.find_all('li', class_='slide'):
    news_title = li.find('div', class_='content_title').text
    news_paragraph = li.find('div', class_='article_teaser_body').text
    scraped_data['news_title'] = news_title
    scraped_data['news_paragraph'] = news_paragraph
    break # one news is enough

scraped_data

{'news_title': "Things Are Stacking up for NASA's Mars 2020 Spacecraft",
 'news_paragraph': 'As the July 2020 launch date inches closer, the next spacecraft headed to the Red Planet is assembled for more testing.'}

In [4]:
# Scrape Featured Image
browser.visit('https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars')
browser.click_link_by_id("full_image")
time.sleep(0.5)
try:
    browser.find_link_by_partial_text('more info')[0].click()
except:
    # try again
    time.sleep(2)
    browser.find_link_by_partial_text('more info')[0].click()
soup = bs(browser.html,'html.parser')
featured_image_url = 'https://www.jpl.nasa.gov' + soup.find('img', class_='main_image')['src']
scraped_data['featured_image_url'] = featured_image_url
featured_image_url

'https://www.jpl.nasa.gov/spaceimages/images/largesize/PIA17448_hires.jpg'

In [5]:
# Scrape Tweeted Mars Weather
browser.visit('https://twitter.com/marswxreport?lang=en')
soup = bs(browser.html,'html.parser')
Mars_Weather = ''
for div in soup.find_all('div', class_='tweet'):
    tweet_header = div.find('div', class_='stream-item-header').text.lstrip()
    if tweet_header.startswith("Mars Weather"):
        Mars_Weather = div.find('p', class_='TweetTextSize').contents[0]
        break

scraped_data['Mars_Weather'] = Mars_Weather
Mars_Weather

'InSight sol 141 (2019-04-20) low -98.3ºC (-144.9ºF) high -19.7ºC (-3.5ºF)\nwinds from the SW at 4.7 m/s (10.6 mph) gusting to 12.9 m/s (28.8 mph)\npressure at 7.40 hPa'

In [6]:
# Scrape Mars Facts
tables = pd.read_html('https://space-facts.com/mars/')
df = tables[0]
df.columns = ['Description',"Value"]
facts_table = df.to_html(header=False,index=False).replace('\n','')
scraped_data['facts_table'] = facts_table
df.head(-1)

Unnamed: 0,Description,Value
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.42 x 10^23 kg (10.7% Earth)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.52 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-153 to 20 °C
7,First Record:,2nd millennium BC


In [7]:
# Scrape Hemispheres Images
browser.visit('https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars')
urls = []
for item in browser.find_by_css('.description'):
    tag = item.find_by_tag('a')[0]
    title, url = tag.value, tag['href']
    urls.append((title,url))

hemisphere_image_urls = []    
for title, url in urls:
    browser.visit(url)
    img_url = browser.find_by_css('.wide-image')[0]['src']
    hemisphere_image_urls.append( {'title':title, 'img_url':img_url} )

scraped_data['hemispheres'] = hemisphere_image_urls
hemisphere_image_urls

[{'title': 'Cerberus Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/cfa62af2557222a02478f1fcd781d445_cerberus_enhanced.tif_full.jpg'},
 {'title': 'Schiaparelli Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/3cdd1cbf5e0813bba925c9030d13b62e_schiaparelli_enhanced.tif_full.jpg'},
 {'title': 'Syrtis Major Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/ae209b4e408bb6c3e67b6af38168cf28_syrtis_major_enhanced.tif_full.jpg'},
 {'title': 'Valles Marineris Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/7cf2da4bf549ed01c17f206327be4db7_valles_marineris_enhanced.tif_full.jpg'}]