In [1]:
# Dependencies
from bs4 import BeautifulSoup
import requests
import os
import pymongo
from splinter import Browser
import pandas as pd

In [2]:
# Set up executable path
executable_path = {'executable_path': '/usr/local/bin/chromedriver'}
browser = Browser('chrome', **executable_path, headless=False)

## NASA Mars News

In [3]:
# Set up url
nasa_url = 'https://mars.nasa.gov/news/'
browser.visit(nasa_url)

In [4]:
# Create BeautifulSoup object; parse with 'html.parser'
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

In [5]:
# articles are returned as an iterable list
articles = soup.find_all('ul', class_='item_list')

# Loop through returned articles
for article in articles:
    
    # Retrieve the thread title and the latest News Title and Paragraph Text.
    news_title = article.find('div', class_='content_title').text
    news_p = article.find('div', class_='article_teaser_body').text
    
    print(f"Latest news about Mars is:")
    print('-------------------------')
    print(news_title)
    print(news_p)

Latest news about Mars is:
-------------------------
Hear Audio From NASA's Perseverance As It Travels Through Deep Space
The first to be rigged with microphones, the agency's latest Mars rover picked up the subtle sounds of its own inner workings during interplanetary flight.


## PL Mars Space Images - Featured Image

In [6]:
# Set up url
main_jpl_url = 'https://www.jpl.nasa.gov'
jpl_url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
browser.visit(jpl_url)

In [7]:
# Create BeautifulSoup object; parse with 'html.parser'
html = browser.html
image_soup = BeautifulSoup(html, 'html.parser')

In [8]:
# Retrieve the current Featured Mars Image.
image = image_soup.find_all('img')[3]['src']

# assign the url string to a variable called featured_image_url
featured_image_url = main_jpl_url + image
print(f"featured_image_url = {featured_image_url}")

featured_image_url = https://www.jpl.nasa.gov/spaceimages/images/wallpaper/PIA24247-640x350.jpg


## Mars Facts

In [9]:
# Set up url
fact_url = 'https://space-facts.com/mars/'

In [10]:
# Use Panda's `read_html` to parse the url
tables = pd.read_html(fact_url)
tables

[                      0                              1
 0  Equatorial Diameter:                       6,792 km
 1       Polar Diameter:                       6,752 km
 2                 Mass:  6.39 × 10^23 kg (0.11 Earths)
 3                Moons:            2 (Phobos & Deimos)
 4       Orbit Distance:       227,943,824 km (1.38 AU)
 5         Orbit Period:           687 days (1.9 years)
 6  Surface Temperature:                   -87 to -5 °C
 7         First Record:              2nd millennium BC
 8          Recorded By:           Egyptian astronomers,
   Mars - Earth Comparison             Mars            Earth
 0               Diameter:         6,779 km        12,742 km
 1                   Mass:  6.39 × 10^23 kg  5.97 × 10^24 kg
 2                  Moons:                2                1
 3      Distance from Sun:   227,943,824 km   149,598,262 km
 4         Length of Year:   687 Earth days      365.24 days
 5            Temperature:     -87 to -5 °C      -88 to 58°C,
           

In [11]:
# Retrieve the list of DataFrames as assign it to `df` and assign the columns `['description', 'value']`
df = tables[0]
df.columns = ['description','value']
df = df.set_index("description")
df

Unnamed: 0_level_0,value
description,Unnamed: 1_level_1
Equatorial Diameter:,"6,792 km"
Polar Diameter:,"6,752 km"
Mass:,6.39 × 10^23 kg (0.11 Earths)
Moons:,2 (Phobos & Deimos)
Orbit Distance:,"227,943,824 km (1.38 AU)"
Orbit Period:,687 days (1.9 years)
Surface Temperature:,-87 to -5 °C
First Record:,2nd millennium BC
Recorded By:,Egyptian astronomers


In [12]:
# generate HTML tables from DataFrames
html_table = df.to_html()

# strip unwanted newlines to clean up the table
html_table.replace('\n', '')

# save the table directly to a file
df.to_html('mars_facts_table.html')

## Mars Hemispheres

In [13]:
# Set up url
main_hemisphere_url = 'https://astrogeology.usgs.gov'
hemisphere_url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars/'
browser.visit(hemisphere_url)

In [14]:
# Create BeautifulSoup object; parse with 'html.parser'
html = browser.html
hemisphere_soup = BeautifulSoup(html, 'html.parser')

In [15]:
# results are returned as an iterable list
results = hemisphere_soup.find_all('div', class_='item')

hemisphere_image_urls = []

# Loop through returned results
for result in results:
    # Identify and return title of listing
    title = result.find('div', class_="description").h3.text
    # Identify and return link to listing
    link = result.find('div', class_="description").a['href']
    
#   create new BeautifulSoup object; parse with 'html.parser'
    browser.visit(main_hemisphere_url+link)
    html = browser.html
    hemisphere_image_soup = BeautifulSoup(html, 'html.parser')
    
    new_results = hemisphere_image_soup.find('div', class_='downloads')
    img_url = new_results.find('li').a['href']

#   dictionary to store the data using the keys img_url and title
    img_dict = {}
    img_dict['title'] = title
    img_dict['img_url'] = img_url
    
    hemisphere_image_urls.append(img_dict)

In [16]:
print(f"hemisphere_image_urls =\n {hemisphere_image_urls}")

hemisphere_image_urls =
 [{'title': 'Cerberus Hemisphere Enhanced', 'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg'}, {'title': 'Schiaparelli Hemisphere Enhanced', 'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg'}, {'title': 'Syrtis Major Hemisphere Enhanced', 'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg'}, {'title': 'Valles Marineris Hemisphere Enhanced', 'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg'}]
