In [1]:
# Dependencies
import os
from bs4 import BeautifulSoup
import requests
import pandas as pd
from splinter import Browser
import time

## NASA Mars News

In [2]:
#sets the chromedriver path and opens the chromedriver browser
executable_path = {"executable_path": "chromedriver"}
browser = Browser("chrome", **executable_path, headless=False)

In [3]:
url_mn = 'https://mars.nasa.gov/news'

In [4]:
browser.visit(url_mn)

In [5]:
#allows a delay to let the page load fully before getting the HTML Object
time.sleep(25)

html = browser.html
soup = BeautifulSoup(html, 'html.parser')

In [6]:
#print(soup.prettify())

In [7]:
#returns the relevant div
results = soup.find('div', class_='image_and_description_container')

In [8]:
results

<div class="image_and_description_container"><a href="/news/8798/mars-is-getting-a-new-robotic-meteorologist/" target="_self"><div class="rollover_description"><div class="rollover_description_inner">Sensors on NASA's Perseverance will help prepare for future human exploration by taking weather measurements and studying dust particles.</div><div class="overlay_arrow"><img alt="More" src="/assets/overlay-arrow.png"/></div></div><div class="list_image"><img alt="Sky-facing Camera on Perseverance Rover" src="/system/news_items/list_view_images/8798_PIA22109-320.jpg"/></div><div class="bottom_gradient"><div><h3>Mars Is Getting a New Robotic Meteorologist</h3></div></div></a><div class="list_text"><div class="list_date">November 13, 2020</div><div class="content_title"><a href="/news/8798/mars-is-getting-a-new-robotic-meteorologist/" target="_self">Mars Is Getting a New Robotic Meteorologist</a></div><div class="article_teaser_body">Sensors on NASA's Perseverance will help prepare for futur

In [9]:
#finds the text of the news preview
news_p = results.find("div", class_="rollover_description_inner").text
news_p

"Sensors on NASA's Perseverance will help prepare for future human exploration by taking weather measurements and studying dust particles."

In [10]:
#finds the text of the title
news_title = results.find("h3").text
news_title

'Mars Is Getting a New Robotic Meteorologist'

## JPL Mars Space Images - Feature Image

In [11]:
#sets the chromedriver path and opens the chromedriver browser
executable_path = {"executable_path": "chromedriver"}
browser = Browser("chrome", **executable_path, headless=False)

In [12]:
url_jpl = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'

In [13]:
browser.visit(url_jpl)

In [14]:
#click the full image button
browser.click_link_by_partial_text('FULL IMAGE')



In [15]:
#click the more info button to get to the page with the high res image
browser.click_link_by_partial_text('more info')

In [16]:
#access the HTML with BeautifulSoup to store the high res image to the variable
html = browser.html
soup = BeautifulSoup(html, 'html.parser')
image_highres_url = soup.find("img", class_="main_image")["src"]
image_highres_url

'/spaceimages/images/largesize/PIA16469_hires.jpg'

In [17]:
#combine the browser link with the image link to get a clickable link to the image and display the link
featured_image_url = f"https://www.jpl.nasa.gov{image_highres_url}"
featured_image_url

'https://www.jpl.nasa.gov/spaceimages/images/largesize/PIA16469_hires.jpg'

## Mars Facts

In [18]:
#sets the chromedriver path and opens the chromedriver browser
executable_path = {"executable_path": "chromedriver"}
browser = Browser("chrome", **executable_path, headless=False)

In [19]:
#sets the url
url_mf = "https://space-facts.com/mars/"

In [20]:
#finds the different tables on the website
tables = pd.read_html(url_mf)
tables

[                      0                              1
 0  Equatorial Diameter:                       6,792 km
 1       Polar Diameter:                       6,752 km
 2                 Mass:  6.39 × 10^23 kg (0.11 Earths)
 3                Moons:            2 (Phobos & Deimos)
 4       Orbit Distance:       227,943,824 km (1.38 AU)
 5         Orbit Period:           687 days (1.9 years)
 6  Surface Temperature:                   -87 to -5 °C
 7         First Record:              2nd millennium BC
 8          Recorded By:           Egyptian astronomers,
   Mars - Earth Comparison             Mars            Earth
 0               Diameter:         6,779 km        12,742 km
 1                   Mass:  6.39 × 10^23 kg  5.97 × 10^24 kg
 2                  Moons:                2                1
 3      Distance from Sun:   227,943,824 km   149,598,262 km
 4         Length of Year:   687 Earth days      365.24 days
 5            Temperature:     -87 to -5 °C      -88 to 58°C,
           

In [21]:
#save the first table to df as this is the one we are after
df = tables[0]

#rename columns and set index to Description
df = df.rename(columns = {0: "Description", 1: "Mars"})
df.set_index("Description", inplace = True)

df

Unnamed: 0_level_0,Mars
Description,Unnamed: 1_level_1
Equatorial Diameter:,"6,792 km"
Polar Diameter:,"6,752 km"
Mass:,6.39 × 10^23 kg (0.11 Earths)
Moons:,2 (Phobos & Deimos)
Orbit Distance:,"227,943,824 km (1.38 AU)"
Orbit Period:,687 days (1.9 years)
Surface Temperature:,-87 to -5 °C
First Record:,2nd millennium BC
Recorded By:,Egyptian astronomers


In [22]:
#save dataframe as html and set the class and border values
df.to_html('table.html', classes = 'table table-striped table-hover', border = 0)

## Mars Hemispheres

In [23]:
#sets the chromedriver path and opens the chromedriver browser
executable_path = {"executable_path": "chromedriver"}
browser = Browser("chrome", **executable_path, headless=False)

In [24]:
#list of hemisphere names for the browser to search through
mars_hemispheres = ["Cerberus", "Schiaparelli", "Syrtis Major", "Marineris"]

In [25]:
#create the list that will store the dictionaries
mars_dict = []

#url and opens the chromedriver browser
url_mh = "https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars"
browser.visit(url_mh)

#for loop to search through the url
for hem in mars_hemispheres:
    
    #lick the link with the name in the mars_hemispheres list
    browser.click_link_by_partial_text(hem)
    
    # HTML object and parse HTML with Beautiful Soup
    html = browser.html
    soup = BeautifulSoup(html, 'html.parser')
    
    #retreive the title element, drop the word "Enhanced" and the space before it and then store into dictionary 
    title = soup.find("h2", class_="title").text
    t = title.replace(" Enhanced", "")
    mars_dict.append({"title": t})
    
    #retreive the image url element, combine it with the url link and store it into dictionary
    image_link = soup.find("img", class_="wide-image")["src"]
    url_image_link = f"https://astrogeology.usgs.gov{image_link}"
    mars_dict.append({"img_url": url_image_link})
    
    #goes back to the home page to start the search for the next hemisphere in the list
    browser.visit(url_mh)



In [26]:
mars_dict 

[{'title': 'Cerberus Hemisphere'},
 {'img_url': 'https://astrogeology.usgs.gov/cache/images/f5e372a36edfa389625da6d0cc25d905_cerberus_enhanced.tif_full.jpg'},
 {'title': 'Schiaparelli Hemisphere'},
 {'img_url': 'https://astrogeology.usgs.gov/cache/images/3778f7b43bbbc89d6e3cfabb3613ba93_schiaparelli_enhanced.tif_full.jpg'},
 {'title': 'Syrtis Major Hemisphere'},
 {'img_url': 'https://astrogeology.usgs.gov/cache/images/555e6403a6ddd7ba16ddb0e471cadcf7_syrtis_major_enhanced.tif_full.jpg'},
 {'title': 'Valles Marineris Hemisphere'},
 {'img_url': 'https://astrogeology.usgs.gov/cache/images/b3c7c6c9138f57b4756be9b9c43e3a48_valles_marineris_enhanced.tif_full.jpg'}]