In [25]:
# Dependencies
from bs4 import BeautifulSoup
import requests
from splinter import Browser
import pandas as pd
import urllib.request

<h1>NASA Mars News

In [2]:
# URL of page to be scraped
mars_news_url = 'https://mars.nasa.gov/news/'

# Path for Chromedriver
executable_path = {'executable_path': 'chromedriver.exe'}

# Launch Chrome browser
browser = Browser('chrome', **executable_path, headless=False)

In [3]:
# Go to url for news about mars
browser.visit(mars_news_url)

In [4]:
# Get HTML from browser
html = browser.html

# Turn HTML into BeautifulSoup object
soup = BeautifulSoup(html, 'html.parser')

In [5]:
# Pretty print soup results
#print(soup.prettify())

In [6]:
# Retrieve the parent item for all news items
results = soup.find_all('li', class_='slide')

# Print first news item
#results[0]

In [7]:
# Get the header and paragraph text from the first news item
news_paragraph = results[0].find('div', class_='article_teaser_body').text
news_header = results[0].find('h3').text

# Print header and paragraph for first news item
print(news_paragraph)
print(news_header)

NASA is adding a Mars helicopter to the agency’s next mission to the Red Planet, Mars 2020.
Mars Helicopter to Fly on NASA’s Next Red Planet Rover Mission


In [8]:
# Close and exit browser session
browser.quit()

<h1>JPL Mars Space Images - Featured Image

In [9]:
# URL for featured image of Mars
jpl_image_url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'

# Retrieve page with the requests module
response = requests.get(jpl_image_url)

# Create BeautifulSoup object; parse with 'lxml'
soup = BeautifulSoup(response.text, 'lxml')

In [10]:
# Pretty print soup results
#print(soup.prettify())

In [11]:
# Retrieve the item that has the url for featured image
results = soup.find_all('a', class_='button fancybox')

In [12]:
# Get url for featured image
featured_image_url = 'https://www.jpl.nasa.gov'+results[0]['data-fancybox-href']
featured_image_url

'https://www.jpl.nasa.gov/spaceimages/images/mediumsize/PIA01320_ip.jpg'

In [29]:
# Save image
urllib.request.urlretrieve(featured_image_url, 'featured-image.jpg')

('featured-image.jpg', <http.client.HTTPMessage at 0x23542221908>)

<h1>Mars Weather (Twitter)

In [13]:
# URL for Mars weather twitter account
mars_twitter_url = 'https://twitter.com/marswxreport'

# Retrieve page with the requests module
response = requests.get(mars_twitter_url)

# Create BeautifulSoup object; parse with 'lxml'
soup = BeautifulSoup(response.text, 'lxml')

In [14]:
# Pretty print soup results
#print(soup.prettify())

In [15]:
# Retrieve parent item of tweets
results = soup.find_all('div', class_='js-tweet-text-container')

# Store first tweet
mars_weather = results[0].find('p').text
mars_weather

'Sol 2047 (May 10, 2018), Sunny, high 3C/37F, low -71C/-95F, pressure at 7.33 hPa, daylight 05:22-17:20'

<h1>Mars Facts

In [16]:
# url for Mars facts
mars_facts_url = "https://space-facts.com/mars"

# Read table from url using pandas
tables = pd.read_html(mars_facts_url)

# Print table
#tables[0]

In [17]:
# Convert table to HTML
mars_table_html = tables[0].to_html()

In [18]:
# Remove \n from string
mars_table_html = mars_table_html.replace('\n', '')
mars_table_html

'<table border="1" class="dataframe">  <thead>    <tr style="text-align: right;">      <th></th>      <th>0</th>      <th>1</th>    </tr>  </thead>  <tbody>    <tr>      <th>0</th>      <td>Equatorial Diameter:</td>      <td>6,792 km</td>    </tr>    <tr>      <th>1</th>      <td>Polar Diameter:</td>      <td>6,752 km</td>    </tr>    <tr>      <th>2</th>      <td>Mass:</td>      <td>6.42 x 10^23 kg (10.7% Earth)</td>    </tr>    <tr>      <th>3</th>      <td>Moons:</td>      <td>2 (Phobos &amp; Deimos)</td>    </tr>    <tr>      <th>4</th>      <td>Orbit Distance:</td>      <td>227,943,824 km (1.52 AU)</td>    </tr>    <tr>      <th>5</th>      <td>Orbit Period:</td>      <td>687 days (1.9 years)</td>    </tr>    <tr>      <th>6</th>      <td>Surface Temperature:</td>      <td>-153 to 20 °C</td>    </tr>    <tr>      <th>7</th>      <td>First Record:</td>      <td>2nd millennium BC</td>    </tr>    <tr>      <th>8</th>      <td>Recorded By:</td>      <td>Egyptian astronomers</td>    <

In [19]:
# Save table as HTML file
# df.to_html('table.html')

<h1>Mars Hemispheres

In [20]:
# url to search for Mars hemisphere images
mars_hemi_url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'

# Retrieve page with requests module
response = requests.get(mars_hemi_url)

# Convert to BeautifulSoup object and parse with lxml
soup = BeautifulSoup(response.text, 'lxml')

In [21]:
# Pretty print soup
#print(soup.prettify())

In [22]:
# Retrieve parent item that has the search results
results = soup.find_all('div', class_='item')

In [28]:
# Create empty list to store image titles and urls
hemispheres_image_urls = []

# Loop through the search results
for result in results:
    
    # Create an empty dictionary to store image title and url
    img_dict = {}
    
    # Find the text from the h3 tag
    title = result.find('h3').text
    
    # Strip out unwanted words
    title = title.replace(' Enhanced', '')
    
    # Add title to dictionary
    img_dict['title'] = title
    
    # Find the link to the image
    link = result.find('a')['href']
    
    # Add in https... to link to create full url
    full_link = 'https://astrogeology.usgs.gov' + link
    
    # Retrieve page using requests module
    img_response = requests.get(full_link)
    
    # Convert to BeautifulSoup object and parse using lxml
    img_soup = BeautifulSoup(img_response.text, 'lxml')
    
    # Find all div items that has the url information
    img_results = img_soup.find_all('div', class_='downloads')
    
    # Find url in first result
    img_url = img_results[0].find_all('a')[0]['href']
    
    # Add url to dictionary
    img_dict['img_url'] = img_url
    
    # Append dictionary to list
    hemispheres_image_urls.append(img_dict)
    
    # Replace spaces with dashes and add JPG as file name extension
    img_file_name = img_dict['title'].replace(' ','-')+'.jpg'
    
    # Save image
    urllib.request.urlretrieve(img_dict['img_url'], img_file_name)

# Print out final list    
hemispheres_image_urls

[{'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg',
  'title': 'Cerberus Hemisphere'},
 {'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg',
  'title': 'Schiaparelli Hemisphere'},
 {'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg',
  'title': 'Syrtis Major Hemisphere'},
 {'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg',
  'title': 'Valles Marineris Hemisphere'}]