In [1]:
# Import Splinter and BeautifulSoup
from splinter import Browser
from bs4 import BeautifulSoup
import pandas as pd
import os
import time

In [2]:
# return a filename (with path) such that it is accessible. 
def get_fullname(filename):
    # as long as it is accessible.
    if os.path.isfile(filename):
        return filename

    # search current working directory.
    cur_working_dir = os.getcwd()
    for i in range(50):
        for (path, dir, files) in os.walk(cur_working_dir):
            if filename in files:
                return os.path.join(path, filename)

        # check parent directory
        parent = os.path.dirname(cur_working_dir)
        if cur_working_dir == parent:
            break;
        cur_working_dir = parent

    # did not found, simply return.
    print(f"file {filename} not found !!!")
    return filename

In [3]:
# get the full size image url: class="container" / class="wide-image"
def get_img_url(img_soup):
    img_url = ""
    for container in img_soup.find_all(class_ = 'container'):
        t = container.find(class_='wide-image')
        if t:
            img_url = t.get('src')
            break
    return img_url  

In [4]:
# get the title: class="content" / class="title"
def get_title(img_soup):
    title = ""
    for content in img_soup.find_all(class_ = 'content'):
        if not title:
            t = content.find(class_='title')
            if t:
                title = t.get_text()
            break
    return title

In [5]:
# Set the executable path and initialize the chrome browser in splinter
#executable_path = {'executable_path': '/usr/local/bin/chromedriver'}
executable_path = {'executable_path': get_fullname("chromedriver.exe")}
browser = Browser('chrome', **executable_path)
time.sleep(1)

In [7]:
# Visit the usgs mars site
usgs_base_url = 'https://astrogeology.usgs.gov'
usgs_url = f'{usgs_base_url}/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
browser.visit(usgs_url)
# Optional delay for loading the page
browser.is_element_present_by_id("product-section", wait_time=10)
time.sleep(0.1)
    
# get the href for splinter find.
prod_soup = BeautifulSoup(browser.html, 'html.parser').find(id='product-section')
thumbs_hrefs = []
for a in prod_soup.find_all('a', href=True):
    if a['href'] in thumbs_hrefs:
        continue
    thumbs_hrefs.append(a['href'])

# get the img_urls, titles.
img_urls = []
for href in thumbs_hrefs:
    # Optional delay for loading the page
    browser.visit(usgs_url)
    browser.is_element_present_by_id("product-section", wait_time=10)
    time.sleep(0.1)
    browser.links.find_by_href(href)[1].click()

    # up to 10 sec.
    browser.is_element_present_by_id("wide-image", wait_time=10)
    img_soup = BeautifulSoup(browser.html, 'html.parser')
    # get title
    title = get_title(img_soup)
    # get url
    img_url = get_img_url(img_soup)
    # save it
    if (title and img_url):
        img_urls.append({'img_url' : usgs_base_url + img_url, 'title' : title})

In [8]:
img_urls

[{'img_url': 'https://astrogeology.usgs.gov/cache/images/cfa62af2557222a02478f1fcd781d445_cerberus_enhanced.tif_full.jpg',
  'title': 'Cerberus Hemisphere Enhanced'},
 {'img_url': 'https://astrogeology.usgs.gov/cache/images/3cdd1cbf5e0813bba925c9030d13b62e_schiaparelli_enhanced.tif_full.jpg',
  'title': 'Schiaparelli Hemisphere Enhanced'},
 {'img_url': 'https://astrogeology.usgs.gov/cache/images/ae209b4e408bb6c3e67b6af38168cf28_syrtis_major_enhanced.tif_full.jpg',
  'title': 'Syrtis Major Hemisphere Enhanced'},
 {'img_url': 'https://astrogeology.usgs.gov/cache/images/7cf2da4bf549ed01c17f206327be4db7_valles_marineris_enhanced.tif_full.jpg',
  'title': 'Valles Marineris Hemisphere Enhanced'}]

titles

['https://astrogeology.usgs.gov/cache/images/cfa62af2557222a02478f1fcd781d445_cerberus_enhanced.tif_full.jpg',
 'https://astrogeology.usgs.gov/cache/images/3cdd1cbf5e0813bba925c9030d13b62e_schiaparelli_enhanced.tif_full.jpg',
 'https://astrogeology.usgs.gov/cache/images/ae209b4e408bb6c3e67b6af38168cf28_syrtis_major_enhanced.tif_full.jpg',
 'https://astrogeology.usgs.gov/cache/images/7cf2da4bf549ed01c17f206327be4db7_valles_marineris_enhanced.tif_full.jpg']