In [160]:
from selenium import webdriver

from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager

from selenium.webdriver.common.by import By

from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

import pandas as pd
import time 

In [137]:
WEBSITE = 'https://www.audible.com/search'

In [138]:
##############################
# Step 0 - Lanch chrome driver

def launch_browser():
    options = webdriver.ChromeOptions()
    options.add_experimental_option('excludeSwitches', ['enable-logging'])
    options.add_experimental_option("detach", True)
    options.headless = False
    options.add_argument('window-size=1920x1080')
    
    driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)
    driver.get(WEBSITE)
    
    return driver

driver = launch_browser()

In [141]:
################################################
# Define function to get info from a SINGLE book
def get_book_element(product, element):
    element_function_dict = {
        'heading' : '''product.find_element(By.XPATH, './/h3[contains(@class, "bc-heading")]/a').text''',
        'subtitle' : '''product.find_element(By.XPATH, './/li[contains(@class, "subtitle")]/span').text''',
        'author': '''product.find_element(By.XPATH, './/li[contains(@class, "authorLabel")]/span').text''',
        'narrator' : '''product.find_element(By.XPATH, './/li[contains(@class, "narratorLabel")]/span').text''',
        'series' : '''product.find_element(By.XPATH, './/li[contains(@class, "seriesLabel")]/span').text''',
        'runtime' : '''product.find_element(By.XPATH, '//li[contains(@class, "runtimeLabel")]/span').text''',
        'release_date' : '''product.find_element(By.XPATH, '//li[contains(@class, "releaseDateLabel")]/span').text''',
        'language': '''product.find_element(By.XPATH, '//li[contains(@class, "languageLabel")]/span').text''',
        'ratings' : '''product.find_element(By.XPATH, '//li[contains(@class, "ratingsLabel")]/span').text''',
    }

    try:
        return eval(element_function_dict[element]) # Turn chosen string into executable function
    except:
        return 'Not Found'

In [None]:
##############################################
# Locate the pagination bar and find last page
pagination = driver.find_element(By.XPATH, '//ul[contains(@class, "pagingElements")]')
pages_item = pagination.find_elements(By.TAG_NAME, './/li')
last_page = int(pages_item[-2].find_element(By.XPATH, './/a').text)

In [164]:
heading = []
subtitle = []
author = []
narrator = []
series = []
runtime = []
release_date = []
language = []
ratings = []

#####################
# Loop over all pages
for page in range(1, 3): #last_page+1
    # Implicit wait
    time.sleep(2) 

    ################################################################
    # Find the "information box" (sth that contains all items needed) - Explicit wait
    products_container = WebDriverWait(driver, 2).until(
        EC.presence_of_element_located((By.CLASS_NAME, 'adbl-impression-container '))
    )
    # driver.find_element(By.CLASS_NAME, 'adbl-impression-container ')

    #####################################################
    # Get the products list within a page - Explicit wait
    products = WebDriverWait(products_container, 2).until(
        EC.presence_of_all_elements_located((By.XPATH, './div/span/ul/li'))
    )

    ##########################################
    # Scraping all products data within a page
    ##########################################
    for i, product in enumerate(products):
        heading.append(get_book_element(product, 'heading'))
        subtitle.append(get_book_element(product, 'subtitle'))
        author.append(get_book_element(product, 'author'))
        narrator.append(get_book_element(product, 'narrator'))
        series.append(get_book_element(product, 'series'))
        runtime.append(get_book_element(product, 'runtime'))
        release_date.append(get_book_element(product, 'release_date'))
        language.append(get_book_element(product, 'language'))
        ratings.append(get_book_element(product, 'ratings'))

    next_page_url = f'https://www.audible.com/search?page={page + 1}'
    print(next_page_url)
    driver.get(next_page_url)


https://www.audible.com/search?page=2
https://www.audible.com/search?page=3


In [154]:
df_books = pd.DataFrame({
    "title" : heading,
    "subtitle" : subtitle,
    "author" : author,
    "narrator" : narrator,
    "series" : series,
    "length" : runtime,
    "release_date" : release_date,
    "language" : language,
    "ratings" : ratings
})
df_books

Unnamed: 0,title,subtitle,author,narrator,series,length,release_date,language,ratings
0,One for the Ages,"DCI Logan Crime Thrillers, Book 16",By: JD Kirk,Narrated by: Angus King,"Series: DCI Logan Crime Thrillers, Book 16",Length: 11 hrs and 32 mins,Release date: 02-28-23,Language: English,5 out of 5 stars
1,Hell Divers X: Fallout,"Hell Divers Series, Book 10",By: Nicholas Sansbury Smith,Narrated by: R. C. Bray,"Series: Hell Divers, Book 10",Length: 11 hrs and 32 mins,Release date: 02-28-23,Language: English,5 out of 5 stars
2,A Game of Malice and Greed,Not Found,"By: Caroline Peckham, Susanne Valenti","Narrated by: Bridget Bordeaux, Jake Bordeaux","Series: Game of Malice and Greed, Book 1",Length: 11 hrs and 32 mins,Release date: 02-28-23,Language: English,5 out of 5 stars
3,The New Guy,Not Found,By: Sarina Bowen,"Narrated by: Teddy Hamilton, J.F. Harding",Not Found,Length: 11 hrs and 32 mins,Release date: 02-28-23,Language: English,5 out of 5 stars
4,"Portal to Nova Roma: The Rhine, Book 3",Not Found,By: J.R. Mathews,Narrated by: Christian J. Gilliland,"Series: Portal to Nova Roma, Book 3",Length: 11 hrs and 32 mins,Release date: 02-28-23,Language: English,5 out of 5 stars
...,...,...,...,...,...,...,...,...,...
495,Tú eres lo único que falta en tu vida [You Are...,Libérate del ego a través del Eneagrama [Get R...,By: Borja Vilaseca,"Narrated by: Jordi Llovet, Borja Vilaseca",Not Found,Length: 9 hrs and 41 mins,Release date: 12-20-22,Language: English,5 out of 5 stars
496,While Time Remains,A North Korean Girl's Search for Freedom in Am...,By: Yeonmi Park,Narrated by: Maureen Taylor,Not Found,Length: 9 hrs and 41 mins,Release date: 12-20-22,Language: English,5 out of 5 stars
497,Divorcing a Narcissist,"The Lure, the Loss, and the Law","By: Supriya McKenna, Karin Walker",Narrated by: Supriya McKenna,Not Found,Length: 9 hrs and 41 mins,Release date: 12-20-22,Language: English,5 out of 5 stars
498,Menopause and the Black Woman,A Guide to Wellness for African Americans,By: April S. Lily,Narrated by: Karen M. Jackson,Not Found,Length: 9 hrs and 41 mins,Release date: 12-20-22,Language: English,5 out of 5 stars


In [155]:
df_books.to_csv('audible_books_data.csv', index=False)