In [1]:
# Dependencies
from splinter import Browser
from bs4 import BeautifulSoup
import pandas as pd

In [2]:
# Start chromedriver and open browser
executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)

## Latest News

In [3]:
# Define url and direct chromedriver
url = "https://mars.nasa.gov/news/"
browser.visit(url)

In [4]:
# Initiate BeautifulSoup
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

In [5]:
# Define sets for storing scraped data
titles = []
paras = []
lnks = []

# Loop through each news item and scrape title, text, and link
for x in soup.find_all('div', class_="list_text"):
    title = x.find('div', class_="content_title")
    par = x.find('div', class_="article_teaser_body")
    link = x.a['href']
    titles.append(title.text)
    paras.append(par.text)
    lnks.append(link)

In [6]:
# Create dictionary from scraped data
news = []
for x in range(0, len(titles)):
    news.append({'title':titles[x], 'text':paras[x], '_url':"https://mars.nasa.gov" + lnks[x]})

In [7]:
# Preview dictionary
news

[{'title': 'InSight Captures Sunrise and Sunset on Mars',
  'text': "InSight joins the rest of NASA's Red Planet surface missions, all of which have photographed either the start or end of a Martian day.",
  '_url': 'https://mars.nasa.gov/news/8432/insight-captures-sunrise-and-sunset-on-mars/'},
 {'title': 'NASA Social Media and Websites Win Webby Awards ',
  'text': 'NASA\'s social media presence, the InSight mission social media accounts, NASA.gov and SolarSystem.NASA.gov will be honored at the 2019 Webby Awards - "the Oscars of the Internet."',
  '_url': 'https://mars.nasa.gov/news/8431/nasa-social-media-and-websites-win-webby-awards/'},
 {'title': "NASA's InSight Detects First Likely 'Quake' on Mars",
  'text': 'While their causes are still unknown, one of three shaking events looks a lot like the quakes detected on the Moon by the Apollo missions.',
  '_url': 'https://mars.nasa.gov/news/8430/nasas-insight-detects-first-likely-quake-on-mars/'},
 {'title': "Things Are Stacking up fo

## Featured Image

In [8]:
# Define next url and direct chromedriver
url = "https://www.jpl.nasa.gov/spaceimages/?search=mars&category=featured#submit/"
browser.visit(url)

In [9]:
# Initiate BeautifulSoup
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

In [10]:
# Scrape certain image for jumbotron background
bgImage = soup.find_all(attrs={"data-title": "A Fresh Crater near Sirenum Fossae"})[0]['data-fancybox-href']
bg_url = "https://www.jpl.nasa.gov" + bgImage

In [11]:
# Scrape first featured Mars image
image = soup.find_all(class_="carousel_item")[0]['style']
featured_image_url = "https://www.jpl.nasa.gov" + image.split("('", 1)[1].split("')")[0]

In [12]:
# View to verify
featured_image_url

'https://www.jpl.nasa.gov/spaceimages/images/wallpaper/PIA10181-1920x1200.jpg'

## Mars Weather

In [13]:
# Define next url and direct chromedriver
url = "https://twitter.com/marswxreport?lang=en"
browser.visit(url)

In [14]:
# Initiate BeautifulSoup
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

In [15]:
# Find all tweets
tweets = soup.find_all('div', class_='content')

In [16]:
# Scrape only MarsWxReport tweets, eliminates retweets
marsWx_tweets = []
for tw in tweets:
    if tw.a['href'] == "/MarsWxReport" :
        marsWx_tweets.append(tw)

In [17]:
# Scrape text from tweet
wx = marsWx_tweets[0].find('p', class_='tweet-text')

# Remove picture text if picture in tweet
if wx.a.text:
    wx = wx.text.strip(wx.a.text)
else:
    wx = wx.text

In [18]:
# Remove line breaks and save as variable
mars_weather = wx.replace('\n', ' ')

In [19]:
# View to verify
mars_weather

'InSight sol 152 (2019-05-01) low -98.1ºC (-144.5ºF) high -17.2ºC (1.0ºF) winds from the SW at 4.8 m/s (10.7 mph) gusting to 13.2 m/s (29.5 mph) pressure at 7.40 hPa'

## Mars Facts

In [20]:
# Define next url and direct chromedriver
url = "https://space-facts.com/mars/"
browser.visit(url)

In [21]:
# Initiate BeautifulSoup
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

In [22]:
# print(soup.find_all(class_='tablepress tablepress-id-mars'))
# Scrape table
table = soup.find_all('table', id="tablepress-mars")[0]

In [23]:
# Create html string from table
table_str = f"""{table}"""

In [24]:
# Remove line breaks
table_str = table_str.replace('\n', '')

In [25]:
# Add table html class code
html_table = table_str.replace('''class="tablepress tablepress-id-mars''', '''class="tablepress tablepress-id-mars table-hover table-light table-borderless''')

In [26]:
# View to verify
html_table

'<table class="tablepress tablepress-id-mars table-hover table-light table-borderless" id="tablepress-mars"><tbody><tr class="row-1 odd"><td class="column-1"><strong>Equatorial Diameter:</strong></td><td class="column-2">6,792 km<br/></td></tr><tr class="row-2 even"><td class="column-1"><strong>Polar Diameter:</strong></td><td class="column-2">6,752 km<br/></td></tr><tr class="row-3 odd"><td class="column-1"><strong>Mass:</strong></td><td class="column-2">6.42 x 10^23 kg (10.7% Earth)</td></tr><tr class="row-4 even"><td class="column-1"><strong>Moons:</strong></td><td class="column-2">2 (<a href="https://space-facts.com/phobos/">Phobos</a> &amp; <a href="https://space-facts.com/deimos/">Deimos</a>)</td></tr><tr class="row-5 odd"><td class="column-1"><strong>Orbit Distance:</strong></td><td class="column-2">227,943,824 km (1.52 AU)</td></tr><tr class="row-6 even"><td class="column-1"><strong>Orbit Period:</strong></td><td class="column-2">687 days (1.9 years)<br/></td></tr><tr class="ro

## Mars Hemisphere

In [27]:
# Define next url and direct chromedriver
url = "https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars/"
browser.visit(url)

In [28]:
# Initiate BeautifulSoup
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

In [29]:
# Scrape all links needed to find larger images
images_lnks = soup.find_all('div', class_='item')

In [30]:
# Isolate links for all enhanced images
hrefs = [images_lnks[x].a['href'] for x in range (0, len(images_lnks))]

In [31]:
# Loop through each link, collect image src, and store in list
image_urls = []
for endpoint in hrefs:
    url = 'https://astrogeology.usgs.gov/' + endpoint
    browser.visit(url)
    
    html = browser.html
    soup = BeautifulSoup(html, 'html.parser')

    image_urls.append("https://astrogeology.usgs.gov" + soup.find('img', class_='wide-image')['src'])


In [32]:
# Create dictionary from collected image titles and urls
hemisphere_image_urls = []
for x in range(0, len(images_lnks)):
    hemisphere_image_urls.append({'title': images_lnks[x].h3.text, 'img_url': image_urls[x]})

In [33]:
# View to verify
hemisphere_image_urls

[{'title': 'Cerberus Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/cfa62af2557222a02478f1fcd781d445_cerberus_enhanced.tif_full.jpg'},
 {'title': 'Schiaparelli Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/3cdd1cbf5e0813bba925c9030d13b62e_schiaparelli_enhanced.tif_full.jpg'},
 {'title': 'Syrtis Major Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/ae209b4e408bb6c3e67b6af38168cf28_syrtis_major_enhanced.tif_full.jpg'},
 {'title': 'Valles Marineris Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/7cf2da4bf549ed01c17f206327be4db7_valles_marineris_enhanced.tif_full.jpg'}]

In [34]:
# Quit chromedriver
browser.quit()