In [None]:
import os
import pandas as pd
import kagglehub

# Download and get the dataset path
dataset_path = kagglehub.dataset_download("snehangsude/audible-dataset")

# Find all files in the dataset folder
files = os.listdir(dataset_path)
print("Files in the dataset:", files)

# Find the first CSV file in the dataset
csv_file = [file for file in files if file.endswith(".csv")][0]
csv_path = os.path.join(dataset_path, csv_file)

# Load the CSV file into a Pandas DataFrame
data = pd.read_csv(csv_path)

# Display the first few rows
print("Dataset preview:")
print(data.head())

# Save the DataFrame to a new CSV file
output_path = os.path.join("audio_book.csv")
data.to_csv(output_path, index=False)
print(f"Saved as: {output_path}")


Files in the dataset: ['audio_book.csv', 'audible_uncleaned.csv', 'audible_cleaned.csv']
Dataset preview:
                                         name                     author  \
0                  Geronimo Stilton #11 & #12  Writtenby:GeronimoStilton   
1                            The Burning Maze      Writtenby:RickRiordan   
2                                The Deep End       Writtenby:JeffKinney   
3                        Daughter of the Deep      Writtenby:RickRiordan   
4  The Lightning Thief: Percy Jackson, Book 1      Writtenby:RickRiordan   

                    narrator                time releasedate language  \
0      Narratedby:BillLobely   2 hrs and 20 mins    04-08-08  English   
1   Narratedby:RobbieDaymond   13 hrs and 8 mins    01-05-18  English   
2      Narratedby:DanRussell    2 hrs and 3 mins    06-11-20  English   
3  Narratedby:SoneelaNankani  11 hrs and 16 mins    05-10-21  English   
4  Narratedby:JesseBernstein              10 hrs    13-01-10  English   

In [10]:
import streamlit as st
import pandas as pd
import urllib.parse

# mock dataset
data = [
    {
        "title": "Geronimo Stilton #11 & #12",
        "author": "Geronimo Stilton",
        "narrator": "Bill Lobely",
        "time": "2 hrs and 20 mins",
        "releasedate": "04-08-08",
        "language": "English",
        "stars": 5.0,
        "votes": 34,
        "price": 468.0,
    },
    {
        "title": "The Burning Maze",
        "author": "Rick Riordan",
        "narrator": "Robbie Daymond",
        "time": "13 hrs and 8 mins",
        "releasedate": "01-05-18",
        "language": "English",
        "stars": 4.5,
        "votes": 41,
        "price": 820.0,
    },
    # Add more entries here...
]

df = pd.DataFrame(data)

# utility to generate Audible link
def generate_audible_link(title):
    base_url = "https://www.audible.in/search"
    query = urllib.parse.urlencode({"keywords": title, "k": title})
    return f"{base_url}?{query}"

# streamlit UI
st.title("Audible Dataset Explorer")

# Filters
st.sidebar.header("Filters")
min_price, max_price = st.sidebar.slider("Price Range", 0, int(df["price"].max()), (0, int(df["price"].max())))
min_votes = st.sidebar.slider("Minimum Votes", 0, int(df["votes"].max()), 0)
min_stars = st.sidebar.slider("Minimum Stars", 0.0, 5.0, 0.0, step=0.5)

# Apply filters
filtered_df = df[
    (df["price"] >= min_price) &
    (df["price"] <= max_price) &
    (df["votes"] >= min_votes) &
    (df["stars"] >= min_stars)
]

# Sorting
sort_by = st.sidebar.selectbox("Sort By", ["stars", "votes", "price"])
sort_order = st.sidebar.radio("Sort Order", ["Ascending", "Descending"])
filtered_df = filtered_df.sort_values(by=sort_by, ascending=(sort_order == "Ascending"))

# Display Data
st.write(f"Showing {len(filtered_df)} results:")
for _, row in filtered_df.iterrows():
    st.markdown(f"### {row['title']}")
    st.write(f"**Author:** {row['author']}")
    st.write(f"**Narrator:** {row['narrator']}")
    st.write(f"**Time:** {row['time']}")
    st.write(f"**Release Date:** {row['releasedate']}")
    st.write(f"**Language:** {row['language']}")
    st.write(f"**Stars:** {row['stars']}")
    st.write(f"**Votes:** {row['votes']}")
    st.write(f"**Price:** ₹{row['price']}")
    st.markdown(f"[Search on Audible]({generate_audible_link(row['title'])})")
    st.markdown("---")


2025-05-09 02:52:57.747 
  command:

    streamlit run /home/arch/.venvs/global/lib/python3.13/site-packages/ipykernel_launcher.py [ARGUMENTS]
2025-05-09 02:52:57.805 Session state does not function when running a script without `streamlit run`


In [3]:
import re

# Create DataFrame
df = pd.DataFrame(data)

# Extract numerical ratings and votes
def parse_stars_and_votes(stars):
    rating_match = re.search(r"([\d.]+) out of 5 stars", stars)
    votes_match = re.search(r"(\d+) ratings", stars)
    rating = float(rating_match.group(1)) if rating_match else None
    votes = int(votes_match.group(1)) if votes_match else 0
    return rating, votes

df[["rating", "votes"]] = df["stars"].apply(parse_stars_and_votes).apply(pd.Series)

# Set threshold and compute weighted score
m = 30  # Minimum votes threshold
c = df["rating"].mean()  # Mean rating across all items

df["weighted_score"] = ((df["votes"] * df["rating"]) + (m * c)) / (df["votes"] + m)

# Sort by weighted score
df = df.sort_values(by="weighted_score", ascending=False)

# Filter items below threshold votes
threshold = 10
df = df[df["votes"] >= threshold]

# Display results
print("Ranked Items:")
print(df[["name", "rating", "votes", "weighted_score"]])



Ranked Items:
                                                  name  rating  votes  \
44433                                Project Hail Mary     5.0  926.0   
7559   Harry Potter and the Chamber of Secrets, Book 2     5.0  845.0   
48143       Sherlock Holmes: The Definitive Collection     5.0  838.0   
64428                                    Wings of Fire     5.0  827.0   
22158                                    Wings of Fire     5.0  823.0   
...                                                ...     ...    ...   
66887                                         Hinduism     3.0   24.0   
16691                                Unf--k Your Brain     3.5   85.0   
19433                                       Just Do It     3.5  112.0   
60826                                The Sands of Time     3.5  134.0   
12490                    Learn English: Word Power 101     3.0   82.0   

       weighted_score  
44433        4.982958  
7559         4.981381  
48143        4.981231  
64428        