In [1]:
#import dependencies
from bs4 import BeautifulSoup as bs
import pandas as pd
import requests
from splinter import Browser


In [2]:
#initiate splinter
executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=True)

In [3]:
#scrape NASA Mars News
#set the url to be scraped
nasa_url = 'https://mars.nasa.gov/news/'

#visit the site via splinter
browser.visit(nasa_url)
nasa_html = browser.html
nasa_soup = bs(nasa_html, 'lxml')

#get the most recent news tile
nasa_title = nasa_soup.find('div', class_='content_title').text
print(nasa_title)

#get the teaser associated with that title
nasa_teaser = nasa_soup.find('div', class_='article_teaser_body').text
print(nasa_teaser)

Mars New Home 'a Large Sandbox'
With InSight safely on the surface of Mars, the mission team is busy learning more about the spacecraft's landing site.


In [4]:
#scrape JPL Mars Space Images for Featured Image
#set the url to be scaped
jpl_url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'

#visit the site via splinter
browser.visit(jpl_url)
jpl_html = browser.html
jpl_soup = bs(jpl_html, 'lxml')

#retrieve the featured image
featured_image = jpl_soup.find('a', class_='button fancybox')
jpl_pic_page = 'https://www.jpl.nasa.gov' + featured_image['data-link']
browser.visit(jpl_pic_page)
jpl_pic_page_html = browser.html
jpl_pic_soup = bs(jpl_pic_page_html, 'lxml')
jpl_pic_url = 'https://www.jpl.nasa.gov' + jpl_pic_soup.find('figure', class_='lede').find('a')['href']
print(jpl_pic_url)

https://www.jpl.nasa.gov/spaceimages/images/largesize/PIA19980_hires.jpg


In [5]:
#scrape the Mars Weather Twitter page
#set the url to be scraped
weather_url = "https://twitter.com/marswxreport?lang=en"

#call the page
weather_response = requests.get(weather_url)
weather_soup = bs(weather_response.text, 'lxml')

#retrieve the first tweet for latest weather update
mars_weather = weather_soup.find('div', class_='js-tweet-text-container').find('p').text
print(mars_weather)

Well done! That 30 minutes of EDL dust settling was very effective. Shame #InSight can’t act as a supercharger for @marsrovers Oppy, she sure could use a boost right now.https://twitter.com/NASAInSight/status/1068661716756516864 …


In [6]:
#scrape Mars Facts
#set the url to be scraped
facts_url = 'http://space-facts.com/mars/'

#scrape the table from the site
facts_table = pd.read_html(facts_url)

#convert the scrape into a DataFrame
facts_df = facts_table[0]
facts_df.columns = ['Feature', 'Stat']
facts_df

Unnamed: 0,Feature,Stat
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.42 x 10^23 kg (10.7% Earth)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.52 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-153 to 20 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [7]:
#convert the dataframe to html
html_facts = facts_df.to_html()
html_facts = html_facts.replace('\n', '')
html_facts

'<table border="1" class="dataframe">  <thead>    <tr style="text-align: right;">      <th></th>      <th>Feature</th>      <th>Stat</th>    </tr>  </thead>  <tbody>    <tr>      <th>0</th>      <td>Equatorial Diameter:</td>      <td>6,792 km</td>    </tr>    <tr>      <th>1</th>      <td>Polar Diameter:</td>      <td>6,752 km</td>    </tr>    <tr>      <th>2</th>      <td>Mass:</td>      <td>6.42 x 10^23 kg (10.7% Earth)</td>    </tr>    <tr>      <th>3</th>      <td>Moons:</td>      <td>2 (Phobos &amp; Deimos)</td>    </tr>    <tr>      <th>4</th>      <td>Orbit Distance:</td>      <td>227,943,824 km (1.52 AU)</td>    </tr>    <tr>      <th>5</th>      <td>Orbit Period:</td>      <td>687 days (1.9 years)</td>    </tr>    <tr>      <th>6</th>      <td>Surface Temperature:</td>      <td>-153 to 20 °C</td>    </tr>    <tr>      <th>7</th>      <td>First Record:</td>      <td>2nd millennium BC</td>    </tr>    <tr>      <th>8</th>      <td>Recorded By:</td>      <td>Egyptian astronomers<

In [8]:
#scrape USGS photos of hemispheres
#set the url to be scraped
usgs_url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'

#create beautiful soup element
usgs_response = requests.get(usgs_url)
usgs_soup = bs(usgs_response.text, 'lxml')

#initiate an empty list to store dictionaries or urls and titles
hemispheres = []

#scape all hemispheres and store in dictionaries
hemisphere_scrape = usgs_soup.find_all('div', class_='item')

for hemisphere in hemisphere_scrape:
    #find the link to the hemiphere's page
    h_page = hemisphere.find('a', class_='itemLink product-item')['href']
    hemisphere_url = 'https://astrogeology.usgs.gov' + h_page
    
    #create a beautiful soup object of the page
    hemisphere_response = requests.get(hemisphere_url)
    hemisphere_soup = bs(hemisphere_response.text, 'lxml')
    
    #find the image url
    hemisphere_pic_url = hemisphere_soup.find('div', class_='downloads').find('a')['href']
    
    #find the hemisphere name
    hemisphere_name = hemisphere_soup.find('h2').text.replace(' Enhanced', '')
    
    #store the image url and hemisphere name in the hemisphere list
    hemisphere = {'title': hemisphere_name, 'img_url': hemisphere_pic_url}
    hemispheres.append(hemisphere)

print(hemispheres)

[{'title': 'Cerberus Hemisphere', 'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg'}, {'title': 'Schiaparelli Hemisphere', 'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg'}, {'title': 'Syrtis Major Hemisphere', 'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg'}, {'title': 'Valles Marineris Hemisphere', 'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg'}]
