In [78]:
#Import dependencies and setup
from splinter import Browser
from bs4 import BeautifulSoup
from webdriver_manager.chrome import ChromeDriverManager
import pandas as pd
from IPython.display import HTML

In [79]:
# Setup splinter
executable_path = {'executable_path': ChromeDriverManager().install()}
browser = Browser('chrome', **executable_path, headless=False)



Current google-chrome version is 92.0.4515
Get LATEST driver version for 92.0.4515
Get LATEST driver version for 92.0.4515
Trying to download new driver from https://chromedriver.storage.googleapis.com/92.0.4515.107/chromedriver_win32.zip
Driver has been saved in cache [C:\Users\aleja\.wdm\drivers\chromedriver\win32\92.0.4515.107]


## Step 1 - Scraping

### NASA Mars News

In [3]:
# Connecting to the NASA
url = "https://mars.nasa.gov/news/"
browser.visit(url)

In [4]:
#Scrape the Mars News Site 
html = browser.html
news_soup = BeautifulSoup(html, "html.parser")
slide_element = news_soup.select_one("ul.item_list li.slide")

In [5]:
# Scrape the Latest News Title
news_title = slide_element.find("div", class_="content_title").get_text()
print(news_title)

Clays, Not Water, Are Likely Source of Mars 'Lakes'


In [6]:
# Scrape the Latest News Paragraph
news_paragraph = slide_element.find("div", class_="article_teaser_body").get_text()
print(news_paragraph)

Three studies published in the past month have cast doubt on the premise of subsurface lakes below the Martian south pole.


### JPL Mars Space Images

In [20]:
# Connecting to the NASA
url = "https://www.jpl.nasa.gov/images?query=Mars"
browser.visit(url)

In [21]:
#Find the image url for the current Featured Mars Image and assign the url string to a variable
full_image_button = browser.find_by_id("SearchListingPageResults")
full_image_button.click()

In [22]:
browser.is_element_present_by_text("Download JPG", wait_time=1)
more_info_element = browser.find_link_by_partial_text("Download JPG")
more_info_element.click()

In [23]:
# Parse Results HTML with BeautifulSoup
html = browser.html
image_soup = BeautifulSoup(html, "html.parser")

In [26]:
#Save a complete url string for this image
for link in image_soup.find_all('img'):
    print(link.get('src'))

https://d2pn8kiwq2w21t.cloudfront.net/original_images/jpegPIA15964.jpg


In [27]:
#save a complete url string for this image
img_url = image_soup.select_one("img").get("src")
img_url

'https://d2pn8kiwq2w21t.cloudfront.net/original_images/jpegPIA15964.jpg'

### Mars Facts




In [63]:
# Use Pandas to scrape the table containing facts 
mars_facts = pd.read_html("https://galaxyfacts-mars.com/")

# Take table for Mars facts
df = mars_facts[1]

# Rename columns and set index
df.columns=['description', 'value']
df

Unnamed: 0,description,value
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.39 × 10^23 kg (0.11 Earths)
3,Moons:,2 ( Phobos & Deimos )
4,Orbit Distance:,"227,943,824 km (1.38 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-87 to -5 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [65]:
# Use Pandas to convert the data to a HTML table string.
mars_facts_table = [df.to_html(classes='data table table-borderless', index=False, header=False, border=0)]
mars_facts_table

['<table border="0" class="dataframe data table table-borderless">\n  <tbody>\n    <tr>\n      <td>Equatorial Diameter:</td>\n      <td>6,792 km</td>\n    </tr>\n    <tr>\n      <td>Polar Diameter:</td>\n      <td>6,752 km</td>\n    </tr>\n    <tr>\n      <td>Mass:</td>\n      <td>6.39 × 10^23 kg (0.11 Earths)</td>\n    </tr>\n    <tr>\n      <td>Moons:</td>\n      <td>2 ( Phobos &amp; Deimos )</td>\n    </tr>\n    <tr>\n      <td>Orbit Distance:</td>\n      <td>227,943,824 km (1.38 AU)</td>\n    </tr>\n    <tr>\n      <td>Orbit Period:</td>\n      <td>687 days (1.9 years)</td>\n    </tr>\n    <tr>\n      <td>Surface Temperature:</td>\n      <td>-87 to -5 °C</td>\n    </tr>\n    <tr>\n      <td>First Record:</td>\n      <td>2nd millennium BC</td>\n    </tr>\n    <tr>\n      <td>Recorded By:</td>\n      <td>Egyptian astronomers</td>\n    </tr>\n  </tbody>\n</table>']

### Mars Hemispheres




In [89]:
# Connecting to the NASA visit the astrogeology site
url = "https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars"
browser.visit(url)

In [90]:
#Save Hemisphere title containing the hemisphere name
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

title = []

# Search for the names of all four hemispheres
results = soup.find_all('div', class_="collapsible results")
hemispheres = results[0].find_all('h3')

# Get text and store in list
for name in hemispheres:
    title.append(name.text)

title

['Cerberus Hemisphere Enhanced',
 'Schiaparelli Hemisphere Enhanced',
 'Syrtis Major Hemisphere Enhanced',
 'Valles Marineris Hemisphere Enhanced']

In [93]:
# Search for thumb class links
thumb_results = results[0].find_all('a')
thumb_links = []

for thumb in thumb_results:
    
    # Find if the thumb element has an image
    if (thumb.img):
        
        # then grab the attached link
        thumb_url = 'https://astrogeology.usgs.gov/' + thumb['href']
        
        # Append list with links
        thumb_links.append(thumb_url)

thumb_links

['https://astrogeology.usgs.gov//search/map/Mars/Viking/cerberus_enhanced',
 'https://astrogeology.usgs.gov//search/map/Mars/Viking/schiaparelli_enhanced',
 'https://astrogeology.usgs.gov//search/map/Mars/Viking/syrtis_major_enhanced',
 'https://astrogeology.usgs.gov//search/map/Mars/Viking/valles_marineris_enhanced']

In [97]:
# Find full resolution images

full_images = []

for url in thumb_links:
    
    # Click through each thumb link
    browser.visit(url)
    
    html = browser.html
    soup = BeautifulSoup(html, 'html.parser')
    
    # Scrape each page for the image path
    results = soup.find_all('img', class_='wide-image')
    image_path = results[0]['src']
    
    # Add the image path to get the full url
    image_link = 'https://astrogeology.usgs.gov/' + image_path
    
    # Append full image links to a list
    full_images.append(image_link)

full_images

['https://astrogeology.usgs.gov//cache/images/f5e372a36edfa389625da6d0cc25d905_cerberus_enhanced.tif_full.jpg',
 'https://astrogeology.usgs.gov//cache/images/3778f7b43bbbc89d6e3cfabb3613ba93_schiaparelli_enhanced.tif_full.jpg',
 'https://astrogeology.usgs.gov//cache/images/555e6403a6ddd7ba16ddb0e471cadcf7_syrtis_major_enhanced.tif_full.jpg',
 'https://astrogeology.usgs.gov//cache/images/b3c7c6c9138f57b4756be9b9c43e3a48_valles_marineris_enhanced.tif_full.jpg']