In [68]:
from splinter import Browser
from bs4 import BeautifulSoup
from webdriver_manager.chrome import ChromeDriverManager
import requests
import pandas as pd

### NASA Mars Article

In [69]:
url = 'https://mars.nasa.gov/news/'

# Retrieve page with the requests module
response = requests.get(url)
# Create BeautifulSoup object; parse with 'html.parser'
soup = BeautifulSoup(response.text, 'html.parser')

In [70]:
# Parse the page for the "content_title" element
news_title = soup.find('div', class_="content_title").text.strip()

news_title

"NASA's Perseverance Drives on Mars' Terrain for First Time"

In [71]:
# Parse the page for the "article_teaser_body" element
news_paragraph = soup.find('div', class_="article_teaser_body")
news_p = soup.find('div', class_='text')
print(news_paragraph)

None


### JPL Mars Space Images

In [72]:
# Setup splinter
executable_path = {'executable_path': ChromeDriverManager().install()}
browser = Browser('chrome', **executable_path, headless=False)

imageurl="https://data-class-jpl-space.s3.amazonaws.com/JPL_Space/index.html"
    
browser.visit(imageurl)

[WDM] - Current google-chrome version is 88.0.4324
[WDM] - Get LATEST driver version for 88.0.4324
[WDM] - Driver [/Users/kristenlega/.wdm/drivers/chromedriver/mac64/88.0.4324.96/chromedriver] found in cache






In [35]:
html = browser.html
imgsoup = BeautifulSoup(html, 'html.parser')

In [36]:
short_url = imgsoup.find('img', class_="headerimage fade-in")
short_url_clean = short_url['src']
featured_image_url = f'https://data-class-jpl-space.s3.amazonaws.com/JPL_Space/{short_url_clean}'
print(featured_image_url)

https://data-class-jpl-space.s3.amazonaws.com/JPL_Space/image/featured/mars3.jpg


### Mars Facts

In [37]:
factsurl="https://space-facts.com/mars/"

In [38]:
factstable = pd.read_html(factsurl)
factstable[0].columns = ['Description','Mars']
factstable[0].set_index("Description",inplace=True)
factstable[0]

Unnamed: 0_level_0,Mars
Description,Unnamed: 1_level_1
Equatorial Diameter:,"6,792 km"
Polar Diameter:,"6,752 km"
Mass:,6.39 × 10^23 kg (0.11 Earths)
Moons:,2 (Phobos & Deimos)
Orbit Distance:,"227,943,824 km (1.38 AU)"
Orbit Period:,687 days (1.9 years)
Surface Temperature:,-87 to -5 °C
First Record:,2nd millennium BC
Recorded By:,Egyptian astronomers


In [39]:
factstable_html = factstable[0].to_html()
factstable_html

'<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: right;">\n      <th></th>\n      <th>Mars</th>\n    </tr>\n    <tr>\n      <th>Description</th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>Equatorial Diameter:</th>\n      <td>6,792 km</td>\n    </tr>\n    <tr>\n      <th>Polar Diameter:</th>\n      <td>6,752 km</td>\n    </tr>\n    <tr>\n      <th>Mass:</th>\n      <td>6.39 × 10^23 kg (0.11 Earths)</td>\n    </tr>\n    <tr>\n      <th>Moons:</th>\n      <td>2 (Phobos &amp; Deimos)</td>\n    </tr>\n    <tr>\n      <th>Orbit Distance:</th>\n      <td>227,943,824 km (1.38 AU)</td>\n    </tr>\n    <tr>\n      <th>Orbit Period:</th>\n      <td>687 days (1.9 years)</td>\n    </tr>\n    <tr>\n      <th>Surface Temperature:</th>\n      <td>-87 to -5 °C</td>\n    </tr>\n    <tr>\n      <th>First Record:</th>\n      <td>2nd millennium BC</td>\n    </tr>\n    <tr>\n      <th>Recorded By:</th>\n      <td>Egyptian astronomers</td>\n    </tr>\n

In [42]:
factstable_html.replace('\n','')
print(factstable_html)

<table border="1" class="dataframe">
  <thead>
    <tr style="text-align: right;">
      <th></th>
      <th>Mars</th>
    </tr>
    <tr>
      <th>Description</th>
      <th></th>
    </tr>
  </thead>
  <tbody>
    <tr>
      <th>Equatorial Diameter:</th>
      <td>6,792 km</td>
    </tr>
    <tr>
      <th>Polar Diameter:</th>
      <td>6,752 km</td>
    </tr>
    <tr>
      <th>Mass:</th>
      <td>6.39 × 10^23 kg (0.11 Earths)</td>
    </tr>
    <tr>
      <th>Moons:</th>
      <td>2 (Phobos &amp; Deimos)</td>
    </tr>
    <tr>
      <th>Orbit Distance:</th>
      <td>227,943,824 km (1.38 AU)</td>
    </tr>
    <tr>
      <th>Orbit Period:</th>
      <td>687 days (1.9 years)</td>
    </tr>
    <tr>
      <th>Surface Temperature:</th>
      <td>-87 to -5 °C</td>
    </tr>
    <tr>
      <th>First Record:</th>
      <td>2nd millennium BC</td>
    </tr>
    <tr>
      <th>Recorded By:</th>
      <td>Egyptian astronomers</td>
    </tr>
  </tbody>
</table>


### Mars Hemispheres

In [50]:
hemi_url="https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars"
    
browser.visit(hemi_url)
html = browser.html
hemisoup = BeautifulSoup(html, 'html.parser')

In [59]:
hemi_results = hemisoup.find("div", class_="collapsible results")
hemi_items = hemi_results.find_all("div", class_="item")
hemi_items[0]

'<div class="item">\n <a class="itemLink product-item" href="/search/map/Mars/Viking/cerberus_enhanced">\n  <img alt="Cerberus Hemisphere Enhanced thumbnail" class="thumb" src="/cache/images/39d3266553462198bd2fbc4d18fbed17_cerberus_enhanced.tif_thumb.png"/>\n </a>\n <div class="description">\n  <a class="itemLink product-item" href="/search/map/Mars/Viking/cerberus_enhanced">\n   <h3>\n    Cerberus Hemisphere Enhanced\n   </h3>\n  </a>\n  <span class="subtitle" style="float:left">\n   image/tiff 21 MB\n  </span>\n  <span class="pubDate" style="float:right">\n  </span>\n  <br/>\n  <p>\n   Mosaic of the Cerberus hemisphere of Mars projected into point perspective, a view similar to that which one would see from a spacecraft. This mosaic is composed of 104 Viking Orbiter images acquired…\n  </p>\n </div>\n <!-- end description -->\n</div>\n'

In [66]:
hemisphere_image_urls = []

# loop over results to get titles and urls
for item in hemi_items:
    # scrape the image title
    description = item.find('div', class_='description')
    title = description.find('h3').text
    
    # scrape the image url
    hemi_page_url_short = item.a['href']
    
    browser.visit(f'https://astrogeology.usgs.gov{hemi_page_url_short}')
    html = browser.html
    soup = BeautifulSoup(html, 'html.parser')
    downloads = soup.find("div", class_="downloads")
    img_url = downloads.find('li').a['href']
    
    # Create Dictionary Element
    hemi_dict = {
        'title': title,
        'img_url': img_url,
    }

    # Append to list
    hemisphere_image_urls.append(hemi_dict)

In [67]:
hemisphere_image_urls

[{'title': 'Cerberus Hemisphere Enhanced',
  'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg'},
 {'title': 'Schiaparelli Hemisphere Enhanced',
  'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg'},
 {'title': 'Syrtis Major Hemisphere Enhanced',
  'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg'},
 {'title': 'Valles Marineris Hemisphere Enhanced',
  'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg'}]

In [77]:
final_dict={
    "news_title": news_title,
    "news_p": news_p,
    "featured_image_url":featured_image_url,
    "factstable_html":factstable_html,
    "hemisphere_images":hemisphere_image_urls
}

In [78]:
final_dict

{'news_title': "NASA's Perseverance Drives on Mars' Terrain for First Time",
 'news_p': None,
 'featured_image_url': 'https://data-class-jpl-space.s3.amazonaws.com/JPL_Space/image/featured/mars3.jpg',
 'factstable_html': '<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: right;">\n      <th></th>\n      <th>Mars</th>\n    </tr>\n    <tr>\n      <th>Description</th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>Equatorial Diameter:</th>\n      <td>6,792 km</td>\n    </tr>\n    <tr>\n      <th>Polar Diameter:</th>\n      <td>6,752 km</td>\n    </tr>\n    <tr>\n      <th>Mass:</th>\n      <td>6.39 × 10^23 kg (0.11 Earths)</td>\n    </tr>\n    <tr>\n      <th>Moons:</th>\n      <td>2 (Phobos &amp; Deimos)</td>\n    </tr>\n    <tr>\n      <th>Orbit Distance:</th>\n      <td>227,943,824 km (1.38 AU)</td>\n    </tr>\n    <tr>\n      <th>Orbit Period:</th>\n      <td>687 days (1.9 years)</td>\n    </tr>\n    <tr>\n      <th>Surface Temperatur