In [0]:
# ——————————————————————————————
#  ENVIRONMENT CONFIGURATION
# ——————————————————————————————
#env = dbutils.widgets.get("pipeline.env")
print(f"Simple Gold Analytics for environment: {env}")
catalog = "book_rec_catalog"
silver_schema = f"{env}_silver"
gold_schema = f"{env}_gold"

spark.sql(f"USE CATALOG {catalog}")
spark.sql(f"USE SCHEMA {gold_schema}")

print(f"Catalog: {catalog}")
print(f"Silver Schema: {silver_schema}")
print(f"Gold Schema: {gold_schema}")

Top 10 books

In [0]:
spark.sql(f"""
CREATE OR REPLACE VIEW {catalog}.{gold_schema}.v_top_10_books AS
SELECT 
    b.`Book-Title`,
    b.`Book-Author`, 
    b.`Publisher`,
    b.`Year-Of-Publication`,
    COUNT(r.`Book-Rating`) as total_ratings,
    AVG(r.`Book-Rating`) as avg_rating,
    COUNT(r.`Book-Rating`) * AVG(r.`Book-Rating`) as popularity_score
FROM {catalog}.{silver_schema}.books_silver_batch b
JOIN {catalog}.{silver_schema}.ratings_silver_batch r ON b.ISBN = r.ISBN
WHERE r.`Book-Rating` IS NOT NULL
GROUP BY b.`Book-Title`, b.`Book-Author`, b.`Publisher`, b.`Year-Of-Publication`
HAVING COUNT(r.`Book-Rating`) >= 5
ORDER BY popularity_score DESC
LIMIT 10
""")

print("View v_top_10_books created successfully!")

Top 10 authors

In [0]:
spark.sql(f"""
CREATE OR REPLACE VIEW {catalog}.{gold_schema}.v_top_10_authors AS
SELECT 
    b.`Book-Author`,
    COUNT(r.`Book-Rating`) as total_ratings,
    AVG(r.`Book-Rating`) as avg_rating,
    COUNT(DISTINCT b.ISBN) as books_count
FROM {catalog}.{silver_schema}.books_silver_batch b
JOIN {catalog}.{silver_schema}.ratings_silver_batch r ON b.ISBN = r.ISBN
WHERE r.`Book-Rating` IS NOT NULL
GROUP BY b.`Book-Author`
HAVING COUNT(r.`Book-Rating`) >= 10
ORDER BY avg_rating DESC
LIMIT 10
""")

Top authors by year

In [0]:
spark.sql(f"""
CREATE OR REPLACE VIEW {catalog}.{gold_schema}.v_authors_by_year AS
SELECT
  b.`Year-Of-Publication`,
  b.`Book-Author`,
  b.`Publisher`,
  COUNT(r.`Book-Rating`) as total_ratings,
  AVG(r.`Book-Rating`) as avg_rating,
  COUNT(DISTINCT b.ISBN) as books_count
FROM
  book_rec_catalog.dev_silver.books_silver_batch b
    JOIN book_rec_catalog.dev_silver.ratings_silver_batch r
      ON b.ISBN = r.ISBN
WHERE
  r.`Book-Rating` IS NOT NULL
  AND b.`Year-Of-Publication` >= 2014
  AND b.`Year-Of-Publication` <= 2024
GROUP BY
  b.`Year-Of-Publication`,
  b.`Book-Author`,
  b.`Publisher`
HAVING
  COUNT(r.`Book-Rating`) >= 3
ORDER BY
  avg_rating DESC
LIMIT 10
""")

print("View v_top_authors_by_year created successfully!")