In [1]:
# Dependencies
from bs4 import BeautifulSoup as bs
from splinter import Browser
import pandas as pd
import requests
import time


In [2]:
def init_browser():
    executable_path = {"executable_path": "c:/bin/chromedriver"}
    return Browser("chrome", **executable_path, headless=False)

In [3]:
browser = init_browser()

# scrape the NASA Mars News Site 
news_url = 'https://mars.nasa.gov/news/'
browser.visit(news_url)
html = browser.html
time.sleep(10)

# create beautiful soup object; parse with html.parser
soup = bs(html, 'html.parser')

In [4]:
# collect latest news title and paragraph text
article = soup.find('div', class_='list_text')
title = article.find('div', class_="content_title").get_text()
paragraph = article.find('div', class_="article_teaser_body").get_text()

print(title)
print(paragraph)

NASA Readies Perseverance Mars Rover's Earthly Twin 
Did you know NASA's next Mars rover has a nearly identical sibling on Earth for testing? Even better, it's about to roll for the first time through a replica Martian landscape.


In [5]:
# scrape the JPL featured space image
JPL_url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
browser.visit(JPL_url)
time.sleep(2)

# go to the full image
browser.find_by_xpath('/html/body/div/div/div[3]/section[1]/div/div/article/div[1]/footer/a').click()

browser.click_link_by_partial_text('more info')




In [6]:
# parse html 
html = browser.html
featured_soup = bs(html, 'html.parser')

# scrape the image from the url
img = featured_soup.find('figure', class_='lede').a['href']
featured_image_url = f'https://www.jpl.nasa.gov{img}'
print(featured_image_url)

https://www.jpl.nasa.gov/spaceimages/images/largesize/PIA18907_hires.jpg


In [7]:
# use pandas to scrape mars facts re: diameter, mass, etc.
facts_url = 'https://space-facts.com/mars/'
browser.visit(facts_url)

In [18]:
# get tables from the webpage
tables = pd.read_html(facts_url)

# save the first table as a dataframe
mars_df = tables[0]

mars_df.columns = ['Description', 'Mars']

# set hte first column as the index
mars_df.set_index('Description', inplace=True)

mars_df.head()


Unnamed: 0_level_0,Mars
Description,Unnamed: 1_level_1
Equatorial Diameter:,"6,792 km"
Polar Diameter:,"6,752 km"
Mass:,6.39 × 10^23 kg (0.11 Earths)
Moons:,2 (Phobos & Deimos)
Orbit Distance:,"227,943,824 km (1.38 AU)"


In [10]:
# convert data to html table
html_mars_table = mars_df.to_html()

# clean up html table formatting
html_mars_table.replace('\n', '')

'<table border="1" class="dataframe">  <thead>    <tr style="text-align: right;">      <th></th>      <th>1</th>    </tr>    <tr>      <th>0</th>      <th></th>    </tr>  </thead>  <tbody>    <tr>      <th>Equatorial Diameter:</th>      <td>6,792 km</td>    </tr>    <tr>      <th>Polar Diameter:</th>      <td>6,752 km</td>    </tr>    <tr>      <th>Mass:</th>      <td>6.39 × 10^23 kg (0.11 Earths)</td>    </tr>    <tr>      <th>Moons:</th>      <td>2 (Phobos &amp; Deimos)</td>    </tr>    <tr>      <th>Orbit Distance:</th>      <td>227,943,824 km (1.38 AU)</td>    </tr>    <tr>      <th>Orbit Period:</th>      <td>687 days (1.9 years)</td>    </tr>    <tr>      <th>Surface Temperature:</th>      <td>-87 to -5 °C</td>    </tr>    <tr>      <th>First Record:</th>      <td>2nd millennium BC</td>    </tr>    <tr>      <th>Recorded By:</th>      <td>Egyptian astronomers</td>    </tr>  </tbody></table>'

In [11]:
# scrape high resolution images for each hemisphere along with hemisphere title
hemisphere_url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'

browser.visit(hemisphere_url)

# navigate to the cerberus hemisphere page and get the URL of the high resolution image
browser.click_link_by_partial_text('Cerberus')

html = browser.html
hem_soup = bs(html, 'html.parser')

# find and save the URL for cerberus hemisphere
cerb_img = hem_soup.find('img', class_='wide-image')['src']
cerb_url = f'https://astrogeology.usgs.gov{cerb_img}'

# print to verify
print(cerb_url)

# return to the main page
browser.back()



https://astrogeology.usgs.gov/cache/images/f5e372a36edfa389625da6d0cc25d905_cerberus_enhanced.tif_full.jpg


In [12]:
# schiaparelli hemisphere

# navigate to the schiaparelli hemisphere page and get the URL of the high resolution image
browser.click_link_by_partial_text('Schiaparelli')

html = browser.html
hem_soup = bs(html, 'html.parser')

# find and save the URL for cerberus hemisphere
schiap_img = hem_soup.find('img', class_='wide-image')['src']
schiap_url = f'https://astrogeology.usgs.gov{schiap_img}'

# print to verify
print(schiap_url)

https://astrogeology.usgs.gov/cache/images/3778f7b43bbbc89d6e3cfabb3613ba93_schiaparelli_enhanced.tif_full.jpg


In [13]:
# syrtis major hemisphere

# return to the main page
browser.back()

# navigate to the schiaparelli hemisphere page and get the URL of the high resolution image
browser.click_link_by_partial_text('Syrtis Major')

html = browser.html
hem_soup = bs(html, 'html.parser')

# find and save the URL for cerberus hemisphere
sm_img = hem_soup.find('img', class_='wide-image')['src']
sm_url = f'https://astrogeology.usgs.gov{sm_img}'

# print to verify
print(sm_url)


https://astrogeology.usgs.gov/cache/images/555e6403a6ddd7ba16ddb0e471cadcf7_syrtis_major_enhanced.tif_full.jpg


In [14]:
# valles marineris hemisphere

# return to the main page
browser.back()

# navigate to the schiaparelli hemisphere page and get the URL of the high resolution image
browser.click_link_by_partial_text('Valles')

html = browser.html
hem_soup = bs(html, 'html.parser')

# find and save the URL for cerberus hemisphere
vm_img = hem_soup.find('img', class_='wide-image')['src']
vm_url = f'https://astrogeology.usgs.gov{vm_img}'

# print to verify
print(vm_url)


https://astrogeology.usgs.gov/cache/images/b3c7c6c9138f57b4756be9b9c43e3a48_valles_marineris_enhanced.tif_full.jpg


In [15]:
# store in python dictionary using img_url and title.

hemisphere_image_urls = [
    {"title": "Valles Marineris Hemisphere", "img_url": vm_url},
    {"title": "Cerberus Hemisphere", "img_url": cerb_url},
    {"title": "Schiaparelli Hemisphere", "img_url": schiap_url},
    {"title": "Syrtis Major Hemisphere", "img_url": sm_url},
]