In [1]:
# Import Splinter and BeautifulSoup
from splinter import Browser
from bs4 import BeautifulSoup

import pandas as pd

In [2]:
# Set the executable path and initialize the chrome browser in splinter
executable_path = {'executable_path': 'chromedriver'}
browser = Browser('chrome', **executable_path)

In [63]:
# Visit the mars nasa news site
url_hemi = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
browser.visit(url_hemi)
# Optional delay for loading the page: wait a second before searching for components since the website may have heavy-loading images
browser.is_element_present_by_css("div.collapsible.results", wait_time=1)

True

In [64]:
# set up the HTML parser
html = browser.html
img_soup = BeautifulSoup(html, 'html.parser')
# pinpoints the <div /> tag with a class of “collapsible results”
elem = img_soup.select_one('div.collapsible.results')

In [65]:
# Hemisphere list:
hemi_list = []

In [80]:
# Full loop to get all four hemisphere names:
all_hemis = elem.find_all('div', class_='description')
all_img_links = [hemi.select_one('a.itemLink.product-item').get("href") \
                 for hemi in all_hemis]
all_img_links

['/search/map/Mars/Viking/cerberus_enhanced',
 '/search/map/Mars/Viking/schiaparelli_enhanced',
 '/search/map/Mars/Viking/syrtis_major_enhanced',
 '/search/map/Mars/Viking/valles_marineris_enhanced']

In [98]:
# Full loop to get all four img links:
for i in all_img_links:
    base_url = 'https://astrogeology.usgs.gov'
    link = f'{base_url}{i}'
    
    # visit the page:
    browser.visit(link)
    browser.is_element_not_present_by_css("div.wide-image-wrapper", wait_time=1)
        
    # Parse the resulting html with soup
    hemi_html = browser.html
    hemi_html_soup = BeautifulSoup(hemi_html, 'html.parser')
    
    # Find the relative image url
    # use 2 tags: <div /> class=wide-image-wrapper --> <img /> class=wide-image
    img_url_rel = hemi_html_soup.select_one('div.wide-image-wrapper img.wide-image').get("src")
    # Use the base URL to create an absolute URL
    img_url = f'{base_url}{img_url_rel}'
    
    # Find the image title
    # use all 2 tags: <div /> class=content --> <h2 /> class=title
    img_title = hemi_html_soup.select_one('div.content h2.title').text
    
    # Return the result into a dict, then append to the list:
    hemi_dict = {
        "img_url": img_url,
        "title": img_title
    }
    hemi_list.append(hemi_dict)
    
    print(hemi_dict)

{'img_url': 'https://astrogeology.usgs.gov/cache/images/f5e372a36edfa389625da6d0cc25d905_cerberus_enhanced.tif_full.jpg', 'title': 'Cerberus Hemisphere Enhanced'}
{'img_url': 'https://astrogeology.usgs.gov/cache/images/3778f7b43bbbc89d6e3cfabb3613ba93_schiaparelli_enhanced.tif_full.jpg', 'title': 'Schiaparelli Hemisphere Enhanced'}
{'img_url': 'https://astrogeology.usgs.gov/cache/images/555e6403a6ddd7ba16ddb0e471cadcf7_syrtis_major_enhanced.tif_full.jpg', 'title': 'Syrtis Major Hemisphere Enhanced'}
{'img_url': 'https://astrogeology.usgs.gov/cache/images/b3c7c6c9138f57b4756be9b9c43e3a48_valles_marineris_enhanced.tif_full.jpg', 'title': 'Valles Marineris Hemisphere Enhanced'}


In [99]:
hemi_list

[{'img_url': 'https://astrogeology.usgs.gov/cache/images/f5e372a36edfa389625da6d0cc25d905_cerberus_enhanced.tif_full.jpg',
  'title': 'Cerberus Hemisphere Enhanced'},
 {'img_url': 'https://astrogeology.usgs.gov/cache/images/3778f7b43bbbc89d6e3cfabb3613ba93_schiaparelli_enhanced.tif_full.jpg',
  'title': 'Schiaparelli Hemisphere Enhanced'},
 {'img_url': 'https://astrogeology.usgs.gov/cache/images/555e6403a6ddd7ba16ddb0e471cadcf7_syrtis_major_enhanced.tif_full.jpg',
  'title': 'Syrtis Major Hemisphere Enhanced'},
 {'img_url': 'https://astrogeology.usgs.gov/cache/images/b3c7c6c9138f57b4756be9b9c43e3a48_valles_marineris_enhanced.tif_full.jpg',
  'title': 'Valles Marineris Hemisphere Enhanced'}]