In [1]:
# Import dependencies
from bs4 import BeautifulSoup as bs
from splinter import Browser
from webdriver_manager.chrome import ChromeDriverManager
import requests
import pymongo
import pandas as pd
import matplotlib.pyplot as plt
import os

In [2]:
# Setup Splinter
executable_path = {"executable_path": ChromeDriverManager().install()}
browser = Browser("chrome", **executable_path, headless = False)

# URL of page to be scraped
url = "https://redplanetscience.com/"
browser.visit(url)

In [3]:
# Confirm the HTML element to isolate the title and preview is present within the browser.
browser.is_element_present_by_css("div.list_text", 1)

True

In [4]:
# Create variables for browser.html to use as a Beautiful Soup object and parse out the webpage
mars = browser.html
mars_soup = bs(mars, "html.parser")

In [5]:
# Use the soup object to find the results for the HTML element
mars_results = mars_soup.find_all("div", class_="list_text")

In [6]:
# Create an empty list to append the individual dictionaries to.
mars_dict_list = []

In [7]:
# Loop through the mars_results and find the title and preview, convert to text, create a dictionary for the
# result and append the dictionary to the empty list.
for result in mars_results:
    title = result.find("div", class_="content_title").text
    preview = result.find("div", class_="article_teaser_body").text
    mars_dict = {"title": title, "preview": preview}
    mars_dict_list.append(mars_dict)
    print(f'''Article Title: {title}
Summary: {preview}
----------------------------------------''')

Article Title: NASA's Mars 2020 Rover Completes Its First Drive
Summary: In a 10-plus-hour marathon, the rover steered, turned and drove in 3-foot (1-meter) increments over small ramps.
----------------------------------------
Article Title: NASA's InSight Flexes Its Arm While Its 'Mole' Hits Pause
Summary: Now that the lander's robotic arm has helped the mole get underground, it will resume science activities that have been on hold.
----------------------------------------
Article Title: How NASA's Perseverance Mars Team Adjusted to Work in the Time of Coronavirus 
Summary: Like much of the rest of the world, the Mars rover team is pushing forward with its mission-critical work while putting the health and safety of their colleagues and community first.
----------------------------------------
Article Title: NASA's Mars Rover Drivers Need Your Help
Summary: Using an online tool to label Martian terrain types, you can train an artificial intelligence algorithm that could improve the wa

In [8]:
# URL of image page to be scraped
url_2 = "https://spaceimages-mars.com/"
browser.visit(url_2)

In [9]:
# Confirm the HTML element to isolate the image source is present within the browser.
browser.is_element_present_by_css("img.headerimage", 1)

True

In [10]:
# Create variables for browser.html to use as a Beautiful Soup object and parse out the webpage
mars_img = browser.html
mars_img_soup = bs(mars_img, "html.parser")

In [11]:
# Use the soup object to find the results for the HTML element
mars_img_results = mars_img_soup.find("img", class_="headerimage")

# Create a variable for the image source and from the results isolate the specific attribute
img_source = mars_img_results["src"]
img_source

'image/featured/mars3.jpg'

In [12]:
# Using the url for the spaceimages-mars.com, combine it with the source to create a complete URL for the image
featured_image_url = url_2 + img_source
featured_image_url

'https://spaceimages-mars.com/image/featured/mars3.jpg'

In [13]:
# URL of Mars facts to be scraped
url_3 = "https://galaxyfacts-mars.com/"
browser.visit(url_3)

In [14]:
# Using Pandas, read URL to obtain the info on the tables
tables = pd.read_html(url_3)
tables

[                         0                1                2
 0  Mars - Earth Comparison             Mars            Earth
 1                Diameter:         6,779 km        12,742 km
 2                    Mass:  6.39 × 10^23 kg  5.97 × 10^24 kg
 3                   Moons:                2                1
 4       Distance from Sun:   227,943,824 km   149,598,262 km
 5          Length of Year:   687 Earth days      365.24 days
 6             Temperature:     -87 to -5 °C      -88 to 58°C,
                       0                              1
 0  Equatorial Diameter:                       6,792 km
 1       Polar Diameter:                       6,752 km
 2                 Mass:  6.39 × 10^23 kg (0.11 Earths)
 3                Moons:          2 ( Phobos & Deimos )
 4       Orbit Distance:       227,943,824 km (1.38 AU)
 5         Orbit Period:           687 days (1.9 years)
 6  Surface Temperature:                   -87 to -5 °C
 7         First Record:              2nd millennium BC

In [15]:
# Isolate the table with info specific to Mars
mars_profile_df = tables[1]
mars_profile_df

Unnamed: 0,0,1
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.39 × 10^23 kg (0.11 Earths)
3,Moons:,2 ( Phobos & Deimos )
4,Orbit Distance:,"227,943,824 km (1.38 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-87 to -5 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [16]:
# Convert the DataFrame to an HTML table
mars_html_table = mars_profile_df.to_html()
mars_html_table

'<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: right;">\n      <th></th>\n      <th>0</th>\n      <th>1</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>Equatorial Diameter:</td>\n      <td>6,792 km</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>Polar Diameter:</td>\n      <td>6,752 km</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>Mass:</td>\n      <td>6.39 × 10^23 kg (0.11 Earths)</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>Moons:</td>\n      <td>2 ( Phobos &amp; Deimos )</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>Orbit Distance:</td>\n      <td>227,943,824 km (1.38 AU)</td>\n    </tr>\n    <tr>\n      <th>5</th>\n      <td>Orbit Period:</td>\n      <td>687 days (1.9 years)</td>\n    </tr>\n    <tr>\n      <th>6</th>\n      <td>Surface Temperature:</td>\n      <td>-87 to -5 °C</td>\n    </tr>\n    <tr>\n      <th>7</th>\n      <td>First Record:</td>\n      <td>2nd millennium BC</td>\n   

In [17]:
# URL of Mars facts to be scraped
url_4 = "https://marshemispheres.com/"

# Create list of all hemisphere links to loop through
hemispheres = ["cerberus.html", "schiaparelli.html", "syrtis.html", "valles.html"]

# Create an empty list to store the hemisphere image URLs
hemisphere_image_urls = []

In [18]:
# Loop through the hemispheres to find the title and img_url

for hemisphere in hemispheres:
    loop_url = url_4 + hemisphere
    browser.visit(loop_url)
    
    # Create variables for browser.html to use as a Beautiful Soup object and parse out the webpage
    hemi = browser.html
    hemi_soup = bs(hemi, "html.parser")
    
    # Use the soup object to find the results for the HTML element
    hemi_results = hemi_soup.find("div", class_="container")
    title = hemi_results.find("h2").text
    img = hemi_results.find("img", class_="wide-image")
    img_url = loop_url + img["src"]
    
    # Create a dictionary for the result
    hemi_dict = {"title": title, "img_url": img_url}
    
    # Append the dictionary to the empty list.
    hemisphere_image_urls.append(hemi_dict)
    
    # Print to confirm title and URL were grabbed for each hemisphere
    print(f'''Hemisphere: {title}
Image URL: {img_url}
----------------------------------------''')

Hemisphere: Cerberus Hemisphere Enhanced
Image URL: https://marshemispheres.com/cerberus.htmlimages/f5e372a36edfa389625da6d0cc25d905_cerberus_enhanced.tif_full.jpg
----------------------------------------
Hemisphere: Schiaparelli Hemisphere Enhanced
Image URL: https://marshemispheres.com/schiaparelli.htmlimages/3778f7b43bbbc89d6e3cfabb3613ba93_schiaparelli_enhanced.tif_full.jpg
----------------------------------------
Hemisphere: Syrtis Major Hemisphere Enhanced
Image URL: https://marshemispheres.com/syrtis.htmlimages/555e6403a6ddd7ba16ddb0e471cadcf7_syrtis_major_enhanced.tif_full.jpg
----------------------------------------
Hemisphere: Valles Marineris Hemisphere Enhanced
Image URL: https://marshemispheres.com/valles.htmlimages/b3c7c6c9138f57b4756be9b9c43e3a48_valles_marineris_enhanced.tif_full.jpg
----------------------------------------


In [19]:
# Confirm list contains each hemisphere's dictionary
hemisphere_image_urls

[{'title': 'Cerberus Hemisphere Enhanced',
  'img_url': 'https://marshemispheres.com/cerberus.htmlimages/f5e372a36edfa389625da6d0cc25d905_cerberus_enhanced.tif_full.jpg'},
 {'title': 'Schiaparelli Hemisphere Enhanced',
  'img_url': 'https://marshemispheres.com/schiaparelli.htmlimages/3778f7b43bbbc89d6e3cfabb3613ba93_schiaparelli_enhanced.tif_full.jpg'},
 {'title': 'Syrtis Major Hemisphere Enhanced',
  'img_url': 'https://marshemispheres.com/syrtis.htmlimages/555e6403a6ddd7ba16ddb0e471cadcf7_syrtis_major_enhanced.tif_full.jpg'},
 {'title': 'Valles Marineris Hemisphere Enhanced',
  'img_url': 'https://marshemispheres.com/valles.htmlimages/b3c7c6c9138f57b4756be9b9c43e3a48_valles_marineris_enhanced.tif_full.jpg'}]

In [20]:
# Quit the browser session.
browser.quit()