In [1]:
# Dependencies: Import Splinter and BeautifulSoup
import time
import pandas as pd
from splinter import Browser
from bs4 import BeautifulSoup as bs
from webdriver_manager.chrome import ChromeDriverManager

In [2]:
# Set the executable path and initialize the chrome browser in splinter
def init_browser():
    executable_path = {'executable_path': 'C:/Users/Doug/Desktop/chromedriver.exe'}
    return Browser('chrome', **executable_path, headless=False)
    #browser

## NASA Mars News

In [3]:
# Using python to scrape website
browser = init_browser()
mars_data = {}

# Open the Nasa Mars Webpage (must be open to code)
url = 'https://mars.nasa.gov/news'
browser.visit(url)
time.sleep(2)

# HTML object
html = browser.html
    
# Parse ('lxml') HTML with Beautiful Soup
soup = bs(html, 'html.parser')

# Retrieve the latest news title and paragraph 
# Use 0 bc retrieval is a list and starts at 0, not one
news_t = soup.find_all('div', class_='content_title')[0].text
news_p = soup.find_all('div', class_='article_teaser_body')[0].text
    
print(news_t)
print("--------------------------------------------------------------------")
print(news_p)

Mars Now
--------------------------------------------------------------------
The science team is studying the clouds, which arrived earlier and formed higher than expected, to learn more about the Red Planet. 


## JPL Mars Space Images - Featured Image

In [4]:
# Mars Image to be scraped
mars_image_url = 'https://data-class-jpl-space.s3.amazonaws.com/JPL_Space/index.html'
browser.visit(mars_image_url)
browser.find_by_text(' FULL IMAGE').click()

In [5]:
#HTML object
image_html = browser.html

#Parse HTML with Beautiful Soup
image_soup = bs(image_html, 'html.parser')

#find first Mars image url
img_path = image_soup.find('img', class_='fancybox-image')['src']

#combine url to get image path
featured_image_url = f'https://www.jpl.nasa.gov{img_path}'

print(f'featured_image_url = {featured_image_url}')

featured_image_url = https://www.jpl.nasa.govimage/featured/mars3.jpg


### Mars Facts

In [6]:
#Visit Mars facts page and use Pandas to scrape the table
facts_url = 'https://space-facts.com/mars/'
browser.visit(facts_url)
time.sleep(1)

In [7]:
#HTML object
mars_facts = browser.html

#Parse HTML with Beautiful Soup
soup_f = bs(mars_facts, 'html.parser')

fact_table = soup_f.find('section', class_='sidebar widget-area clearfix')
column1 = fact_table.find_all('td', class_='column-1')
column2 = fact_table.find_all('td', class_='column-2')

# Empty List to hold the scraped data
descriptions = []
values = []

#  Note: row.text.strip(): Return a copy of the string with the leading and trailing characters removed
for row in column1:
    description = row.text.strip()
    descriptions.append(description)
    
for row in column2:
    value = row.text.strip()
    values.append(value)

# Convert scraped lists to a pandas DF 
mars_facts = pd.DataFrame({
    "Description":descriptions,
    "Value":values
    })

# Convert DF to html 
mars_facts_html = mars_facts.to_html(header=False, index=False)
mars_facts

Unnamed: 0,Description,Value
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.39 × 10^23 kg (0.11 Earths)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.38 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-87 to -5 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


###  Mars Hemispheres

In [8]:
# Visit the USGS Astrogeology site 
mars_hemi_url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
browser.visit(mars_hemi_url)
time.sleep(1)

In [9]:
# Find the image url and title and title

# HTML object
hemi_html = browser.html
#Parse HTML with Beautiful Soup
soup_h = bs(hemi_html, 'html.parser')

#Retreive all items
items = soup_h.find_all('div', class_='item')

# Create an empty list 
hemi_image_urls = []

# store the main url
hemi_url = 'https://astrogeology.usgs.gov'
mars_data = {}
#loop through items 
for i, iv in enumerate(items):
    #store title
    title = iv.find('h3').text
    
    # store the link to full image from thumbnail page
    hemi_url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'

    # link for the full image website
    browser.visit(hemi_url)
    browser.find_by_tag('h3')[i].click()
        
    # HTML object for individual hemisphere sites
    image_url = browser.html    
    
    # Parse HTML with Beautiful Soup for each hemisphere
    image_soup = bs(image_url, 'html.parser')
    
    # Full image path
    hemi_full_path = image_soup.find('img',class_='wide-image')['src']

    # retrieve full image source
    img_url = f'https://astrogeology.usgs.gov{hemi_full_path}'
    
    # append title and urls to list
    hemi_image_urls.append({"title": title,"image_url": img_url})
    mars_data['hemisphere_images'] = hemi_image_urls
    
    print(img_url)

https://astrogeology.usgs.gov/cache/images/f5e372a36edfa389625da6d0cc25d905_cerberus_enhanced.tif_full.jpg
https://astrogeology.usgs.gov/cache/images/3778f7b43bbbc89d6e3cfabb3613ba93_schiaparelli_enhanced.tif_full.jpg
https://astrogeology.usgs.gov/cache/images/555e6403a6ddd7ba16ddb0e471cadcf7_syrtis_major_enhanced.tif_full.jpg
https://astrogeology.usgs.gov/cache/images/b3c7c6c9138f57b4756be9b9c43e3a48_valles_marineris_enhanced.tif_full.jpg


In [10]:
browser.quit()