In [None]:
# -- Log into Audible and use this browser extension to export your wishlist and/or library
# https://chromewebstore.google.com/detail/audible-library-extractor/deifcolkciolkllaikijldnjeloeaall

# -- Purpose 
# This tool will allow you to priorize your list of books based on a variety of factors
    # e.g. length, price, average rating, number of ratings, & value (price/length)

# -- Use Cases (particularly if you have a large wishlist or library)
# Choosing what book to purchase next (from your wishlist)
    # e.g. If purchasing a book (especially if there is a sale), you can prioritize the best deals without having to open each individually in-app 
        # try: high priority on low price and/or good value
    # e.g. If purchasing a book using a credit, you can prioritize the most expensive book to get the best "bang for your buck"
        # try: high priority on high price

# Choosing the next book to listen to (from your library)
    # e.g. If you want a book that is shorter or longer, and/or highly rated or highly popular
        # try: high priority on short (or long!), highly rated, highly popular (great if you're indecisive!)

# -- Questions to answer before analysis
# Are you analyzing your library or wishlist? What is the path of the file?
analysis_type = 'wishlist'      # define either 'library' or 'wishlist'
file_path = r"C:\Users\User\Documents\Data\Audible\audible_wishlist_raw.csv"     # define the file path of your export
# wishlist - "C:\Users\User\Documents\Data\Audible\audible_wishlist_raw.csv"
# library - "C:\Users\User\Documents\Data\Audible\audible_library_raw.csv"

# Are you in the mood for a shorter or longer book? From 0 to 10, how important is book length to you?
length_preference = 'short'     # define either 'short' or 'long'
length_priority = 5             # define number from 0 to 10

# Do you only want to see books above a certain rating? From 0 to 10, how important is a high book rating to you?
rating_minimum = 4.0              # exclude all books with average rating below this number (ratings can range from 0 to 5. set 0 to show all)
rating_priority = 5             # define number from 0 to 10

# Do you only want to see books above a certain number of reviews? From 0 to 10, how important is number of reviews to you?
number_ratings_minimum = 10    # all books with number of reviews less than this will be excluded
number_ratings_priority = 5     # define number from 0 to 10

# The following variables are related to price, which is a wishlist-only field, and will be ignored when analyzing library. 
# Would you prefer price to be higher or lower? From 0 to 10, how important is this to you?
    # If using credits, you may prefer a higher price for the best deal. If shopping a sale, you may prefer lower. This utilizes sale price!
price_preference = 'low'        # please define either 'low' or 'high'
price_priority = 1              # define number from 0 to 10

# How important is it to get the most value for your money? That is to say - the lowest cost per hour.
    # Since this is calculated using both Length and Price, be careful of "doubling up" on these priorities!
value_priority = 3

# -------------------------------------------------------------------------------------------------
# Quick validation
if analysis_type not in ['library', 'wishlist']:
    raise ValueError("Analysis type must be either 'library' or 'wishlist'.")
if length_preference not in ['long', 'short']:
    raise ValueError("Price preference must be either 'long' or 'short'.")
if price_preference not in ['high', 'low']:
    raise ValueError("Price preference must be either 'high' or 'low'.")
if all(x < 0 for x in [length_priority, rating_priority, rating_minimum, number_ratings_priority, 
                       number_ratings_minimum, price_priority, value_priority]):
    raise ValueError("Negative values are not allowed.")
try:
    import pandas as pd
    df = pd.read_csv(file_path)
    print("File ready for analysis 🚀")
except FileNotFoundError:
    print("Error: File not found. Please check the file path.")
except Exception as e:
    print(f"An error occurred: {e}")

# Quick calculations
if analysis_type == 'wishlist':
    wishlist_total = length_priority + rating_priority + number_ratings_priority + price_priority + value_priority
    length_weight = round(length_priority / wishlist_total, 3)
    rating_weight = round(rating_priority / wishlist_total, 3)
    num_ratings_weight = round(number_ratings_priority / wishlist_total, 3)
    price_weight = round(price_priority / wishlist_total, 3)
    value_weight = round(value_priority / wishlist_total, 3)
elif analysis_type == 'library':
    library_total = length_priority + rating_priority + number_ratings_priority
    length_weight = round(length_priority / library_total, 3)
    rating_weight = round(rating_priority / library_total, 3)
    num_ratings_weight = round(number_ratings_priority / library_total, 3)
    price_weight = 0
    value_weight = 0
# -------------------------------------------------------------------------------------------------
# Review of inputs for calculation:
print("")
print("REVIEW YOUR VARIABLES AND ADJUST IF NEEDED: ")
print("Analysis Type: ", analysis_type)
print("Length Preference: ", length_preference)
print("Average Rating Minimum: ", rating_minimum)
print("Minimum Number of Ratings: ", number_ratings_minimum)
print("Price Preference (Wishlist Only!): ", price_preference)
print ("")
print("REVIEW YOUR WEIGHTS AND ADJUST PRIORITIES IF NEEDED: ")
print(f"Length Weight: {length_weight * 100}%")
print(f"Rating Weight: {rating_weight * 100}%")
print(f"Number of Ratings Weight: {num_ratings_weight * 100}%")
print(f"Price Weight (Wishlist Only!): {price_weight * 100}%")
print(f"Value Weight (Wishlist Only!): {value_weight * 100}%")

In [None]:
# -- dependencies
import pandas as pd
from scipy.stats import rankdata
import re

# -- reading csv, creating dataframe, & cleaning the data for analysis

df = pd.read_csv(file_path)

# only the fields we need
if analysis_type == 'wishlist':
    df = df[['Title Short', 'Length', 'Rating', 'Ratings', 'Format', 'Unavailable', 'Price']]   # wishlist-only field (hide if reading library)
    df['Price'] = df['Price'].fillna(0)   # if free or in the premium catalog, there will not be a price, so we set to 0
elif analysis_type == 'library':
    df = df[['Title Short', 'Length', 'Rating', 'Ratings', 'Format', 'Unavailable']]
    
# drop records where format is podcast. only really needed for library list (as podcasts aren't wishlisted/purchased)
df = df[df['Format'] != 'Podcast'] 

# filters to only include available records, then drops the column 
df = df[df['Unavailable'] != 'TRUE'].drop(columns=['Unavailable']) 

# drop records below defined rating and ratings minimums
df = df[df['Rating'] >= rating_minimum]
df = df[df['Ratings'] >= number_ratings_minimum]

# defining function to calc total length in minutes from the 'Length' column formatted as e.g. '12h 34m', which is a recognized format
def calc_length_minutes(time_str):
    match = re.match(r'(\d+)h(?: (\d+)m)?', time_str)
    if match:
            hours = int(match.group(1))
            minutes = int(match.group(2)) if match.group(2) else 0
            return (hours * 60) + minutes
    return 0 # set to 0 if format doesn't match

# transform length to minutes as a number instead of as a string (for further analysis)
df['Length Mins'] = df['Length'].apply(calc_length_minutes)

clean_df = df

# -- calculate the new prioritized score

# calculate percentiles for columns to get their respective position in the data
    # can change average to min or max to change how percentiles are assigned to tied records
if length_preference == 'long':
    clean_df['Length Perc'] = (rankdata(clean_df['Length Mins'], method='average') / len(clean_df)) # longest length = highest percentile
elif length_preference == 'short':
    clean_df['Length Perc'] = 1 - (rankdata(clean_df['Length Mins'], method='average') / len(clean_df)) # shortest length = highest percentile

clean_df['Rating Perc'] = rankdata(clean_df['Rating'], method='average') / len(clean_df)
clean_df['Num Ratings Perc'] = rankdata(clean_df['Ratings'], method='average') / len(clean_df)

if analysis_type == 'wishlist':          # includes variables related to price
    if price_preference == 'high':
        clean_df['Price Perc'] = (rankdata(clean_df['Price'], method='average') / len(clean_df)) # longest length = highest percentile
    elif price_preference == 'low':
        clean_df['Price Perc'] = 1 - (rankdata(clean_df['Price'], method='average') / len(clean_df)) # shortest length = highest percentile
    clean_df['Price per Hour'] = round(clean_df['Price'] / (clean_df['Length Mins'] / 60), 2)     # calculating price per hour
    clean_df['Value Perc']     = 1 - (rankdata(clean_df['Price per Hour'], method='average') / len(clean_df))  # calculating perc for price/hour
    clean_df['Priority Score'] = (
        length_weight * clean_df['Length Perc'] + 
        rating_weight * clean_df['Rating Perc'] + 
        num_ratings_weight * clean_df['Num Ratings Perc'] + 
        price_weight * clean_df['Price Perc'] +
        value_weight * clean_df['Value Perc'] )      # calculating the priority score based on the weighted factors
    clean_df = clean_df[['Title Short', 'Length', 'Rating', 'Ratings', 'Price', 'Price per Hour', 'Priority Score', 
                         'Length Perc', 'Rating Perc',  'Num Ratings Perc', 'Price Perc', 'Value Perc', 'Format']].copy()  # reorder
    columns_to_round = ['Priority Score', 'Length Perc', 'Rating Perc', 'Num Ratings Perc', 'Price Perc', 'Value Perc']
    clean_df[columns_to_round] = clean_df[columns_to_round].round(3)   # rounding these selected columns to 3 decimals
elif analysis_type == 'library':          # excludes variables related to price (as they don't exist in library export)
    clean_df['Priority Score'] = (
        length_weight * clean_df['Length Perc'] + 
        rating_weight * clean_df['Rating Perc'] + 
        num_ratings_weight * clean_df['Num Ratings Perc'] )
    clean_df = clean_df[['Title Short', 'Length', 'Rating', 'Ratings', 'Priority Score', 
                         'Length Perc', 'Rating Perc',  'Num Ratings Perc', 'Format']].copy()
    columns_to_round = ['Priority Score', 'Length Perc', 'Rating Perc', 'Num Ratings Perc']
    clean_df[columns_to_round] = clean_df[columns_to_round].round(3)

# sort by priority score desc
prioritized_df = clean_df.sort_values(by=['Priority Score'], ascending=[False])


prioritized_df.reset_index(drop=True, inplace=True)

# print(prioritized_df)
prioritized_df

In [None]:
# -- export to csv at given file path
csv_export = r'C:\Users\User\Downloads\test.csv'

try:
    prioritized_df.to_csv(csv_export, index=False)
    print(f"Data successfully exported to {csv_export} 🎉")
except Exception as e:
    print(f"An error occurred: {e}")
