In [1]:
# Import dependencies for scraping: pandas, beautiful soup, requests/splinter
import pandas as pd
from bs4 import BeautifulSoup as bs
import requests
from splinter import Browser
from webdriver_manager.chrome import ChromeDriverManager

In [2]:
# Setup splinter
executable_path = {'executable_path': ChromeDriverManager().install()}
browser = Browser('chrome', **executable_path, headless=False)

[WDM] - Current google-chrome version is 88.0.4324
[WDM] - Get LATEST driver version for 88.0.4324






[WDM] - Driver [C:\Users\linds\.wdm\drivers\chromedriver\win32\88.0.4324.96\chromedriver.exe] found in cache


In [14]:
# Nasa Mars News
# Scrape the NASA Mars News Site and collect the latest News Title and Paragraph Text. 
# URL info
nasa_mars_url = "https://mars.nasa.gov/news"
browser.visit(nasa_mars_url)
html = browser.html
soup = bs(html,'html.parser')

# Grab latest news title and paragraph text. Assign the text to variables that you can reference later.
# Title: after inspecting url found news title with "content_title" class 
news_title = soup.find_all('div', class_='content_title')[1].text

# Paragraph: after inspecting url found summary paragraph on website within article_teaser_body

news_p = soup.find_all('div', class_='article_teaser_body')[1].text

# Print
print("Most recent title from mars.nasa.gov/news:")
print("------------------------------------------")
print(news_title)
print("------------------------------------------")
print("Corresponding first paragraph:")
print("------------------------------------------")
print(news_p)

# note: if an index error appears try to run this cell again.

Most recent title from mars.nasa.gov/news:
------------------------------------------
Testing Proves Its Worth With Successful Mars Parachute Deployment
------------------------------------------
Corresponding first paragraph:
------------------------------------------
A 360-degree panorama taken by the rover’s Mastcam-Z instrument will be discussed during a public video chat this Thursday.


In [19]:
# JPL Mars Space Images - Featured Image
# Visit the url for JPL Featured Space Image here https://data-class-jpl-space.s3.amazonaws.com/JPL_Space/index.html

# first go to the website
jpl_url = "https://www.jpl.nasa.gov/images"
browser.visit(jpl_url)
html = browser.html
soup = bs(html,'html.parser')

# grab the featured image by using beautiful soup find_all 'img' and 'src'
# from a brute force approach, index 2 was found to correspond to the appropriate image.
featured_image_url = soup.find_all('img')[2]['src']

print("Below is the url corresponding to the JPL featured image from the website provided for this assignment:")
print("------------------------------------------")
print(featured_image_url)
print("------------------------------------------")

Below is the url corresponding to the JPL featured image from the website provided for this assignment:
------------------------------------------
https://d2pn8kiwq2w21t.cloudfront.net/images/jpegPIA23727.2e16d0ba.fill-400x400-c50.jpg
------------------------------------------


In [8]:
# Mars Facts
# Visit the Mars Facts webpage and use Pandas to scrape the table containing facts about the planet including Diameter, 
# Mass, etc.

mars_facts_url = "https://space-facts.com/mars/"
browser.visit(mars_facts_url)

# read table and create data frame with new column ids and reset index
mars_table = pd.read_html(mars_facts_url)
mars_facts = mars_table[0]
mars_facts = mars_facts.rename(columns = {0:"Description", 1: "Value"})
mars_facts.set_index("Description",inplace=True)

# Use Pandas to convert the data to a HTML table string.
mars_facts_html = mars_facts.to_html()
mars_facts_html.replace('\n','')
print(mars_facts_html)

<table border="1" class="dataframe">
  <thead>
    <tr style="text-align: right;">
      <th></th>
      <th>Value</th>
    </tr>
    <tr>
      <th>Description</th>
      <th></th>
    </tr>
  </thead>
  <tbody>
    <tr>
      <th>Equatorial Diameter:</th>
      <td>6,792 km</td>
    </tr>
    <tr>
      <th>Polar Diameter:</th>
      <td>6,752 km</td>
    </tr>
    <tr>
      <th>Mass:</th>
      <td>6.39 × 10^23 kg (0.11 Earths)</td>
    </tr>
    <tr>
      <th>Moons:</th>
      <td>2 (Phobos &amp; Deimos)</td>
    </tr>
    <tr>
      <th>Orbit Distance:</th>
      <td>227,943,824 km (1.38 AU)</td>
    </tr>
    <tr>
      <th>Orbit Period:</th>
      <td>687 days (1.9 years)</td>
    </tr>
    <tr>
      <th>Surface Temperature:</th>
      <td>-87 to -5 °C</td>
    </tr>
    <tr>
      <th>First Record:</th>
      <td>2nd millennium BC</td>
    </tr>
    <tr>
      <th>Recorded By:</th>
      <td>Egyptian astronomers</td>
    </tr>
  </tbody>
</table>


In [22]:
# Mars Hemispheres
# Visit the USGS Astrogeology site to obtain high resolution images for each of Mar's hemispheres.

# first go to the website
mars_hemisphere_url = "https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars"
browser.visit(mars_hemisphere_url)
html = browser.html
soup = bs(html,'html.parser')

# grab each hemisphere image by going through url, div class "collapsible results" highlights all hemispheres (items)
mars_hemispheres = soup.find('div',class_='collapsible results')
mars_item = mars_hemispheres.find_all('div',class_='item')

# initialize hemisphere image url list
hemisphere_image_urls = []

# loop through each item to get title and image url
for item in mars_item:
    # error handling
    try:
        # grab title in h3 text of div class "description"
        hemisphere = item.find('div', class_='description')
        title = hemisphere.h3.text
        
        # grab image url in <a href a\>
        # go to hemisphere url
        hemisphere_url = hemisphere.a['href']
        browser.visit('https://astrogeology.usgs.gov' + hemisphere_url)
        html = browser.html
        soup = bs(html,'html.parser')
        image_url = soup.find('li').a['href']
        
        if (title and image_url):
            # print results
            print('-------------------------------------')
            print(title)
            print(image_url)
            
        # create dictionary for title and url
        hemisphere_dict={
            'title': title,
            'image_url': image_url
        }
        hemisphere_image_urls.append(hemisphere_dict)
    except Exception as e:
        print(e)

#hemisphere_image_urls
hemisphere_image_urls

-------------------------------------
Cerberus Hemisphere Enhanced
https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg
-------------------------------------
Schiaparelli Hemisphere Enhanced
https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg
-------------------------------------
Syrtis Major Hemisphere Enhanced
https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg
-------------------------------------
Valles Marineris Hemisphere Enhanced
https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg


[{'title': 'Cerberus Hemisphere Enhanced',
  'image_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg'},
 {'title': 'Schiaparelli Hemisphere Enhanced',
  'image_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg'},
 {'title': 'Syrtis Major Hemisphere Enhanced',
  'image_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg'},
 {'title': 'Valles Marineris Hemisphere Enhanced',
  'image_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg'}]