In [13]:
# Dependencies
from splinter import Browser
from bs4 import BeautifulSoup
from webdriver_manager.chrome import ChromeDriverManager
import requests
import pandas as pd
import os

In [94]:
# Setup splinter
executable_path = {'executable_path': ChromeDriverManager().install()}
browser = Browser('chrome', **executable_path, headless=False)



Current google-chrome version is 93.0.4577
Get LATEST driver version for 93.0.4577
Driver [/Users/yankori/.wdm/drivers/chromedriver/mac64/93.0.4577.63/chromedriver] found in cache


In [16]:
# URL to be scraped
url = 'https://redplanetscience.com/'
browser.visit(url)

In [17]:
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

### NASA Mars News

In [18]:
# Collect the latest News Title
results = soup.find_all('div', class_="content_title")
news_title = results[0].text
news_title

'NASA Wins Two Emmy Awards for Interactive Mission Coverage'

In [19]:
# Collect the latest Paragraph Text
results = soup.find_all('div', class_="article_teaser_body")
news_p = results[0].text
news_p

"NASA-JPL's coverage of the Mars InSight landing earns one of the two wins, making this the NASA center's second Emmy."

### JPL Mars Space Images - Featured Image

In [58]:
# Use splinter to navigate the site
base_url = 'https://spaceimages-mars.com/'
browser.visit(base_url)

In [59]:
# Click the full image button
full_img = browser.find_by_tag('button')[1]
full_img.click()

In [64]:
# Parse the html with soup
html = browser.html
img_soup = BeautifulSoup(html, 'html.parser')

In [65]:
# Find the relative image url
img_url = img_soup.find("img", class_ = "fancybox-image").get('src')
img_url

'image/featured/mars2.jpg'

In [66]:
featured_image_url = base_url + img_url
featured_image_url

'https://spaceimages-mars.com/image/featured/mars2.jpg'

### Mars Facts

In [75]:
# Use Pandas to scrape the table containing facts about Mars from Mars Facts webpage
mars_facts_url = 'https://galaxyfacts-mars.com/'
tables = pd.read_html(mars_facts_url)
facts_df = tables[1]

In [76]:
# Rename the table columns and set the Parameter as index
facts_df.columns = ["Parameter", "Value"]
facts_df.set_index("Parameter", inplace = True)
facts_df

Unnamed: 0_level_0,Value
Parameter,Unnamed: 1_level_1
Equatorial Diameter:,"6,792 km"
Polar Diameter:,"6,752 km"
Mass:,6.39 × 10^23 kg (0.11 Earths)
Moons:,2 ( Phobos & Deimos )
Orbit Distance:,"227,943,824 km (1.38 AU)"
Orbit Period:,687 days (1.9 years)
Surface Temperature:,-87 to -5 °C
First Record:,2nd millennium BC
Recorded By:,Egyptian astronomers


In [78]:
# Use Pandas to convert the data to a HTML table string
html_table = mars_df.to_html()
html_table.replace('\n', '')
print(html_table)

<table border="1" class="dataframe">
  <thead>
    <tr style="text-align: right;">
      <th></th>
      <th>0</th>
      <th>1</th>
      <th>2</th>
    </tr>
  </thead>
  <tbody>
    <tr>
      <th>0</th>
      <td>Mars - Earth Comparison</td>
      <td>Mars</td>
      <td>Earth</td>
    </tr>
    <tr>
      <th>1</th>
      <td>Diameter:</td>
      <td>6,779 km</td>
      <td>12,742 km</td>
    </tr>
    <tr>
      <th>2</th>
      <td>Mass:</td>
      <td>6.39 × 10^23 kg</td>
      <td>5.97 × 10^24 kg</td>
    </tr>
    <tr>
      <th>3</th>
      <td>Moons:</td>
      <td>2</td>
      <td>1</td>
    </tr>
    <tr>
      <th>4</th>
      <td>Distance from Sun:</td>
      <td>227,943,824 km</td>
      <td>149,598,262 km</td>
    </tr>
    <tr>
      <th>5</th>
      <td>Length of Year:</td>
      <td>687 Earth days</td>
      <td>365.24 days</td>
    </tr>
    <tr>
      <th>6</th>
      <td>Temperature:</td>
      <td>-87 to -5 °C</td>
      <td>-88 to 58°C</td>
    </tr>
  </tbody

### Mars Hemispheres

In [95]:
# Use the browser to visit the astrogeology url
hemispheres_url = "https://marshemispheres.com/"
browser.visit(hemispheres_url)

In [96]:
html = browser.html
soup = BeautifulSoup(html, "html.parser")

In [97]:
items = soup.find_all("div", class_="item")

In [98]:
hemisphere_img_urls = []
for item in items:
    title = item.find("h3").text
    hemisphere_url = "https://marshemispheres.com/" + item.find("a", class_="itemLink product-item")["href"]
    
    browser.visit(hemisphere_url)
    html = browser.html
    soup = BeautifulSoup(html, "html.parser")
    hemisphere_img_url = "https://marshemispheres.com/" + soup.find("img", class_="wide-image")["src"]
    hemisphere_img_urls.append({"title": title, "img_url": hemisphere_img_url})
hemisphere_img_urls

[{'title': 'Cerberus Hemisphere Enhanced',
  'img_url': 'https://marshemispheres.com/images/f5e372a36edfa389625da6d0cc25d905_cerberus_enhanced.tif_full.jpg'},
 {'title': 'Schiaparelli Hemisphere Enhanced',
  'img_url': 'https://marshemispheres.com/images/3778f7b43bbbc89d6e3cfabb3613ba93_schiaparelli_enhanced.tif_full.jpg'},
 {'title': 'Syrtis Major Hemisphere Enhanced',
  'img_url': 'https://marshemispheres.com/images/555e6403a6ddd7ba16ddb0e471cadcf7_syrtis_major_enhanced.tif_full.jpg'},
 {'title': 'Valles Marineris Hemisphere Enhanced',
  'img_url': 'https://marshemispheres.com/images/b3c7c6c9138f57b4756be9b9c43e3a48_valles_marineris_enhanced.tif_full.jpg'}]