In [1]:
# Dependencies
from splinter import Browser
from bs4 import BeautifulSoup
import requests
import pandas as pd
import time

In [2]:
# Use Chrome to scrape the following url
executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)

### NASA Mars News

In [3]:
# Url for NASA Mars News Site
news_url= 'https://mars.nasa.gov/news'

# Navigate to url
browser.visit(news_url)

In [4]:
# Parse HTML with BeautifulSoup
soup = BeautifulSoup(browser.html, 'html.parser')

# Setup explicit wait time for javascript to load the webpage
time.sleep(1)

In [5]:
# Find the first tag in which news title and paragraph locate
# Note that if directly searching for ('div', class_='list_text'), time.sleep(1) is unnecessary
# It loads faster than its parental "li" with the class of "slide"
news_tag = soup.find('li', class_='slide').find('div', class_='list_text')

# Retrieve the title and paragraph for the latest news

news_title = news_tag.find('div', class_='content_title').text
news_p = news_tag.find('div', class_='article_teaser_body').text
news_date = news_tag.find('div', class_='list_date').text

# Print results
print('-'*97)
print('News Title:')
print(news_title)
print('\nNews Paragraph:')
print(news_p)
print(f'\nUpdated on {news_date}')
print('-'*97)

-------------------------------------------------------------------------------------------------
News Title:
For InSight, Dust Cleanings Will Yield New Science

News Paragraph:
Wind can be crucial to clearing dust from spacecraft solar panels on Mars. With InSight's meteorological sensors, scientists get their first measurements of wind and dust interacting "live" on the Martian surface.  

Updated on May  6, 2019
-------------------------------------------------------------------------------------------------


### JPL Mars Space Images - Featured Image

In [6]:
# Url for Mars Space Image from Jet Propulsion Laboratory
image_url= 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'

# Navigate to url
browser.visit(image_url)

In [7]:
# Parse HTML with BeautifulSoup
soup = BeautifulSoup(browser.html, 'html.parser')

In [8]:
# Retrieve relative path for featured image
rel_path = soup.find('article', class_='carousel_item')['style']

# View "rel_path"
rel_path

"background-image: url('/spaceimages/images/wallpaper/PIA14884-1920x1200.jpg');"

In [9]:
# Since "/" is not in string "?search=&category=Mars", rstrip() "image_url" for concatenation 
featured_img_url = image_url.rstrip("?search=&category=Mars") + rel_path[36:-3]

# Display "featured_img_url"
featured_img_url

'https://www.jpl.nasa.gov/spaceimages/images/wallpaper/PIA14884-1920x1200.jpg'

### Mars Weather

In [10]:
# Url for Mars Weather on Twitter
news_url= 'https://twitter.com/marswxreport?lang=en'

# Navigate to url
browser.visit(news_url)

In [11]:
# Parse HTML with BeautifulSoup
soup = BeautifulSoup(browser.html, 'html.parser')

In [12]:
# Find tags that contain Mars temperature
# Note that not all 'div' tags with the class of "content" is about Mars temperature tweet
content_tags = soup.find_all('div', class_='content')

# Loop through "content" tags
for content in content_tags:
    
    try:
        # Assign child 'div' tag with the class of "stream-item-header" to "header" 
        header = content.find('div', class_='stream-item-header')
        
        # Look for full name and username in stream header      
        full_name = header.a.find('span', class_='FullNameGroup').text[1:].rstrip('\u200f\xa0')
        username = header.a.find('span', class_='username').text

        # Mars temperature is tweeted by "MarsWxReport" as "Mars Weather"
        if full_name == 'Mars Weather' and username == '@MarsWxReport':
            
            # Retrieve content of the tweet for Mars temperature
            # Note that if 'div' (class="stream-item-header") exist, so does that with class of
            # "js-tweet-text-container"
            mars_weather = content.find('div', class_='js-tweet-text-container').p.text[:-26]
            
            # Jump out of iteration once the latest Mars temperature is found
            break        
        
    # Set exception for "content" tag without child 'div' (class="stream-item-header")   
    except:
        pass

# Print results
print('-'*80)
print('Latest Mars Weather:')
print(mars_weather)
print('-'*80)

--------------------------------------------------------------------------------
Latest Mars Weather:
InSight sol 158 (2019-05-07) low -99.7ºC (-147.5ºF) high -21.8ºC (-7.2ºF)
winds from the SSE at 4.8 m/s (10.7 mph) gusting to 13.6 m/s (30.4 mph)
pressure at 7.50 hPa
--------------------------------------------------------------------------------


### Mars Facts

In [13]:
# Url for Mars facts on Space Fact website
fact_url= 'https://space-facts.com/mars/'

# Navigate to url
browser.visit(fact_url)

In [14]:
# Parse HTML with BeautifulSoup
soup = BeautifulSoup(browser.html, 'html.parser')

In [15]:
# List to store data from "Mars Planet Profile" table
cols = []

# Anchor contents of "Mars Planet Profile" table
table = soup.find('table', class_='tablepress')

# Retrieve all rows from the table
rows = table.tbody.find_all('tr')

# Loop through each row to scrape column data of interest and append to "cols" list
for row in rows:
    col_queries = row.find_all('td')
    col = [col_queries[i].text.strip() for i in range(2)]
    cols.append(col)

In [16]:
# Setup a Pandas DataFrame to store column data from "cols"
mars_fact_df = pd.DataFrame(columns=['Parameters', 'Values'])

mars_fact_df['Parameters'] = [cols[i][0] for i in range(len(cols))]
mars_fact_df['Values'] = [cols[i][1] for i in range(len(cols))]

# Display "mars_fact_df"
mars_fact_df

Unnamed: 0,Parameters,Values
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.42 x 10^23 kg (10.7% Earth)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.52 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-153 to 20 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


### Mars Hemispheres

In [17]:
# Url for Mars facts on USGS Astrogeology
hemisph_url= 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'

# Navigate to url
browser.visit(hemisph_url)

In [18]:
# Parse HTML with BeautifulSoup
soup = BeautifulSoup(browser.html, 'html.parser')

In [19]:
# List to store dict that containing hemisphere title and image url string  
hemisph_img_urls = []

# Find tags that contain 'div' with the class of "item"
item_tags = soup.find_all('div', class_='item')

# Loop through "item" tags
for item in item_tags:
    
    # Retrieve hemisphere title
    title = item.h3.text
    
    # Concatenate child_url
    child_url = hemisph_url[:29] + item.find('a', class_='itemLink')['href']
    
    # Navigate to the child url
    browser.visit(child_url)
    
    # Parse child url with BeautifulSoup
    soup = BeautifulSoup(browser.html, 'html.parser')
    
    # Setup explicit wait time for javascript to load the webpage
    time.sleep(2)
    
    # Concatenate the url for high resolution image
    hi_r_img = hemisph_url[:29] + soup.find('img', class_='wide-image')['src']
    
    # Append hemisphere title and image url string to "hemisph_img_urls" as dict
    hemisph_img_urls.append({'title': title, 'img_url': hi_r_img})
    
# View "hemisph_img_urls"
hemisph_img_urls

[{'title': 'Cerberus Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/cfa62af2557222a02478f1fcd781d445_cerberus_enhanced.tif_full.jpg'},
 {'title': 'Schiaparelli Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/3cdd1cbf5e0813bba925c9030d13b62e_schiaparelli_enhanced.tif_full.jpg'},
 {'title': 'Syrtis Major Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/ae209b4e408bb6c3e67b6af38168cf28_syrtis_major_enhanced.tif_full.jpg'},
 {'title': 'Valles Marineris Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/7cf2da4bf549ed01c17f206327be4db7_valles_marineris_enhanced.tif_full.jpg'}]

In [20]:
# Close all browsers if still active
browser.quit()