In [1]:
# Import  
from bs4 import BeautifulSoup
import pandas as pd

In [2]:
# Import Splinter and set the chromedriver path - for Windows Users
from splinter import Browser

executable_path = {"executable_path": "chromedriver"}
browser = Browser("chrome", **executable_path, headless=False)

### NASA Mars News

In [3]:
# Visit the following URL using splinter.Browser module
url = "https://mars.nasa.gov/news/"
browser.visit(url)

In [4]:
# HTML Object
html = browser.html
   
# Parse HTML with Beautiful Soup
soup = BeautifulSoup(html, 'html.parser')

# Retrieve the latest element that contains news title and news_paragraph
news_title = soup.find('div', class_='content_title').find('a').text
news_p = soup.find('div', class_='article_teaser_body').text

# Display scrapped data 
print(news_title)
print(news_p)

NASA's MRO Completes 60,000 Trips Around Mars
The orbiting spacecraft is also about to set a record for data relayed from the Martian surface.


### JPL Mars Space Images - Featured Image

In [5]:
# Visit the following URL using splinter.Browser module
jpl_mars_url = "https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars"
browser.visit(jpl_mars_url)

In [6]:
# HTML Object 
html = browser.html

# Parse HTML with Beautiful Soup
soup = BeautifulSoup(html, 'html.parser')

# Scrape the image path from soup
section = soup.find("section", class_="grid_gallery module grid_view")
a = section.find('a', class_='fancybox')
image_path = a["data-fancybox-href"]

# JPL url 
jpl_url = 'https://www.jpl.nasa.gov'

# Concatenate jpl_url with scrapped image_path
featured_image_url = jpl_url + image_path

# Display url to featured image
featured_image_url

'https://www.jpl.nasa.gov/spaceimages/images/largesize/PIA23221_hires.jpg'

### Mars Weather

In [7]:
# Visit the following URL using splinter.Browser module
weather_url = "https://twitter.com/marswxreport?lang=en"
browser.visit(weather_url)

In [8]:
# HTML Object 
html = browser.html

# Parse HTML with Beautiful Soup
soup = BeautifulSoup(html, 'html.parser')

# Scrape the latest Mars weather
section = soup.find("div", class_="js-tweet-text-container")
p = section.find("p", class_="TweetTextSize TweetTextSize--normal js-tweet-text tweet-text") 

# Extract "a" tag from inside the "p" tag
p.a.extract()
mars_weather = p.text

print(mars_weather)

InSight sol 167 (2019-05-17) low -100.5ºC (-148.9ºF) high -20.4ºC (-4.6ºF)
winds from the SW at 4.7 m/s (10.6 mph) gusting to 13.5 m/s (30.3 mph)
pressure at 7.50 hPa


### Mars Facts

In [9]:
# Mars facts url
facts_url = "https://space-facts.com/mars/"

# Use Panda's read_html to read the first table
mars_facts_df = pd.read_html(facts_url)[0]

# Name the columns
mars_facts_df.columns = ["description","value"]

# Set the index to description
mars_facts_df.set_index("description", inplace=True)

# Display mars_df
mars_facts_df

Unnamed: 0_level_0,value
description,Unnamed: 1_level_1
Equatorial Diameter:,"6,792 km"
Polar Diameter:,"6,752 km"
Mass:,6.42 x 10^23 kg (10.7% Earth)
Moons:,2 (Phobos & Deimos)
Orbit Distance:,"227,943,824 km (1.52 AU)"
Orbit Period:,687 days (1.9 years)
Surface Temperature:,-153 to 20 °C
First Record:,2nd millennium BC
Recorded By:,Egyptian astronomers


### Mars Hemispheres

In [10]:
# Visit the following URL using splinter.Browser module
hemispheres_url = "https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars"
browser.visit(hemispheres_url)

In [11]:
# HTML Object
html_hemispheres = browser.html

# Parse HTML with Beautiful Soup
soup = BeautifulSoup(html_hemispheres, "html.parser")

# Scrape information of all the hemispheres
hemispheres = soup.find_all("div", class_="item")

# Main url
main_url = "https://astrogeology.usgs.gov"

# Empty list 
hemisphere_image_urls = []


# Loop through all the hemispheres
for hemisphere in hemispheres: 
    
    # Scrap relative image path
    image_path = hemisphere.find('a', class_='itemLink product-item')["href"]
    
    # Complete image url
    image_url = main_url + image_path
    
    # Visit a specific hemispehre URL using splinter.Browser module
    browser.visit(image_url)
    
    # HTML Object
    html_full_image = browser.html
    
    # Parse a specific hemisphere information website 
    soup = BeautifulSoup(html_full_image, 'html.parser')
    
    # Scrape relative image path
    image_path = soup.find('img', class_='wide-image')['src']
    
    # Full url 
    image_url = main_url + image_path
    
    # Scrap the title
    title = hemisphere.find('h3').text
    
    # Append title and image url as dictionary 
    hemisphere_image_urls.append({"title" : title, "img_url" : image_url})
     

print(hemisphere_image_urls)

[{'title': 'Cerberus Hemisphere Enhanced', 'img_url': 'https://astrogeology.usgs.gov/cache/images/cfa62af2557222a02478f1fcd781d445_cerberus_enhanced.tif_full.jpg'}, {'title': 'Schiaparelli Hemisphere Enhanced', 'img_url': 'https://astrogeology.usgs.gov/cache/images/3cdd1cbf5e0813bba925c9030d13b62e_schiaparelli_enhanced.tif_full.jpg'}, {'title': 'Syrtis Major Hemisphere Enhanced', 'img_url': 'https://astrogeology.usgs.gov/cache/images/ae209b4e408bb6c3e67b6af38168cf28_syrtis_major_enhanced.tif_full.jpg'}, {'title': 'Valles Marineris Hemisphere Enhanced', 'img_url': 'https://astrogeology.usgs.gov/cache/images/7cf2da4bf549ed01c17f206327be4db7_valles_marineris_enhanced.tif_full.jpg'}]


In [12]:
# Close the browser after scraping
browser.quit()