# ------------------------------------------
# Gold Layer Notebook - Business Analytics
# Dataset: Library Borrowing System
# Author: TheDataLead
# ------------------------------------------

In [0]:
from pyspark.sql.functions import count, avg, desc


In [0]:
# Load Silver tables
books_silver = spark.table("books_silver")
borrowers_silver = spark.table("borrowers_silver")


In [0]:

# 1. Most borrowed books
most_borrowed_books = (borrowers_silver.groupBy("book_isbn")
    .agg(count("*").alias("borrow_count"))
    .join(books_silver, borrowers_silver.book_isbn == books_silver.isbn)
    .select("title", "author", "genre", "borrow_count")
    .orderBy(desc("borrow_count"))
)
most_borrowed_books.write.mode("overwrite").format("delta").saveAsTable("most_borrowed_books")

In [0]:
%sql
select * from most_borrowed_books limit 10

In [0]:
# 2. Average return delay per genre
delay_by_genre = (borrowers_silver
    .join(books_silver, borrowers_silver.book_isbn == books_silver.isbn)
    .groupBy("genre")
    .agg(avg("return_delay_days").alias("avg_return_delay_days"))
    .orderBy(desc("avg_return_delay_days"))
)
delay_by_genre.write.mode("overwrite").format("delta").saveAsTable("delay_by_genre")

In [0]:
%sql
select * from delay_by_genre limit 10