In [1]:
import warnings
warnings.filterwarnings('ignore')

In [2]:
# Dependencies
from bs4 import BeautifulSoup as bs
from splinter import Browser
import pandas as pd
import time
import requests
import json

## NASA Mars News

In [3]:
# Set executable path and initialize Chrome browser
executable_path = {"executable_path": "chromedriver.exe"}
browser = Browser("chrome", **executable_path, headless=False)

In [4]:
# Visit the website
url = "https://mars.nasa.gov/news/"
browser.visit(url)

time.sleep(2)

# Create a Beautiful Soup object
html = browser.html
news_soup = bs(html, "html.parser")

# Get the list of articles
news_list = news_soup.find('ul', class_='item_list')

In [5]:
# Get the title of the latest news
latest_article = news_list.find("div", class_ = "content_title").text

# Get the paragraph text of the latest news article
article_teaser = news_list.find("div", class_ = "article_teaser_body").text


print(f'The title of the latest news article is "{latest_article}".')
print(f"Here is a teaser to the latest news article: {article_teaser}")

The title of the latest news article is "Virginia Middle School Student Earns Honor of Naming NASA's Next Mars Rover".
Here is a teaser to the latest news article: NASA chose a seventh-grader from Virginia as winner of the agency's "Name the Rover" essay contest. Alexander Mather's entry for "Perseverance" was voted tops among 28,000 entries. 


## JPL Mars Space Images - Featured Image

In [6]:
# Visit the JPL website
jpl_url = "https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars"
browser.visit(jpl_url)

time.sleep(1)

# Using splinter to click on the full image button of the featured image
browser.click_link_by_partial_text('FULL IMAGE')

time.sleep(1)

# Using splinter to click on the full image button of the featured image
browser.click_link_by_partial_text('more info')

time.sleep(1)

In [7]:
# Scrape page into Soup
jpl_html = browser.html
jpl_soup = bs(jpl_html, "html.parser")

# Get the featured image link
jpl_featured_image = jpl_soup.find("img", class_="main_image")['src']

# Create the full link
featured_image_url = "https://www.jpl.nasa.gov" + jpl_featured_image
print(f"The url path to the featured image is {featured_image_url}")

The url path to the featured image is https://www.jpl.nasa.gov/spaceimages/images/largesize/PIA07137_hires.jpg


## Mars Weather

In [8]:
# Visit the Mars Weather twitter account and scrape the latest tweet
mars_weather_url = "https://twitter.com/marswxreport?lang=en"

# Scraping the latest Mars Weather tweet using request

response=requests.get(mars_weather_url)

# Scraping the response content into Soup
mars_weather_soup = bs(response.content,'lxml')
all_tweets = mars_weather_soup.find_all('div',{'class':'tweet'})

# Creating an empty tweet list
tweet_list = []

# Looping through all the tweets and appending only those tweeted by Mars Weather user
if all_tweets:
  for tweet in all_tweets:
    content = tweet.find('div',{'class':'content'})
    header = content.find('div',{'class':'stream-item-header'})
    user = header.find('a',{'class':'account-group js-account-group js-action-profile js-user-profile-link js-nav'}).text.replace("\n"," ").strip()
    message = content.find('div',{'class':'js-tweet-text-container'}).text.replace("\n"," ").strip()
    
    if user == "Mars Weather\u200f\xa0@MarsWxReport":
        tweet_list.append(message)
    
else:
    print("List is empty/account name not found.")
    
# Selecting the latest tweet
mars_weather = tweet_list[1]

In [9]:
# Print the latest tweet
print(mars_weather)

InSight sol 456 (2020-03-08) low -94.6ºC (-138.3ºF) high -9.6ºC (14.7ºF) winds from the SSE at 5.8 m/s (12.9 mph) gusting to 20.2 m/s (45.2 mph) pressure at 6.30 hPa


## Mars Facts

In [10]:
# Visit Mars Facts webpage
mars_facts_url = "https://space-facts.com/mars/"

# Use pandas to parse the url
tables = pd.read_html(mars_facts_url)
tables

[  Mars - Earth Comparison             Mars            Earth
 0               Diameter:         6,779 km        12,742 km
 1                   Mass:  6.39 × 10^23 kg  5.97 × 10^24 kg
 2                  Moons:                2                1
 3      Distance from Sun:   227,943,824 km   149,598,262 km
 4         Length of Year:   687 Earth days      365.24 days
 5            Temperature:    -153 to 20 °C      -88 to 58°C,
                       0                              1
 0  Equatorial Diameter:                       6,792 km
 1       Polar Diameter:                       6,752 km
 2                 Mass:  6.39 × 10^23 kg (0.11 Earths)
 3                Moons:            2 (Phobos & Deimos)
 4       Orbit Distance:       227,943,824 km (1.38 AU)
 5         Orbit Period:           687 days (1.9 years)
 6  Surface Temperature:                   -87 to -5 °C
 7         First Record:              2nd millennium BC
 8          Recorded By:           Egyptian astronomers]

In [11]:
# Selecting the table and putting into a dataframe
mars_facts_df = tables[1]
mars_facts_df.columns = ["Planetary Attributes", "Values"]
mars_facts_df

Unnamed: 0,Planetary Attributes,Values
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.39 × 10^23 kg (0.11 Earths)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.38 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-87 to -5 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [12]:
# Convert data in table to html
mars_facts_html = mars_facts_df.to_html("mars_facts.html", index=False, header=False)

## Mars Hemispheres

In [13]:
# Visit USGS website
usgs_url = "https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars"
browser.visit(usgs_url)

time.sleep(1)

# Scrape page into Soup
usgs_html = browser.html
usgs_soup = bs(usgs_html, "html.parser")

item = usgs_soup.find_all("div", class_ = "item")

# Create a list of the titles
names = usgs_soup.find_all('h3')

# Create an empty list for the image links
hemisphere_image_urls = []

# Loop through all the items on the page to get the links and names
for i in range(0,len(item)):
    
    hemisphere_dict = {}
    
    # get the list of titles with the element stripped off
    title = names[i].text.strip('Enhanced').strip(' ')
    hemisphere_dict["title"] = title
    
    # clicking on each item to get the image link/url
    browser.visit(usgs_url)
    time.sleep(1)
    browser.click_link_by_partial_text(title)
    time.sleep(1)
    hemisphere_html = browser.html
    time.sleep(1)
    hemisphere_soup = bs(hemisphere_html, "html.parser")
    hemisphere_link = hemisphere_soup.find("div", class_ = "downloads").find('ul').find('li').find('a')['href']
    hemisphere_dict["img_url"] = hemisphere_link
    
    hemisphere_image_urls.append(hemisphere_dict)

In [14]:
# Display dictionary of image url and titles
hemisphere_image_urls

[{'title': 'Cerberus Hemisphere',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg'},
 {'title': 'Schiaparelli Hemisphere',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg'},
 {'title': 'Syrtis Major Hemisphere',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg'},
 {'title': 'Valles Marineris Hemisphere',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg'}]