#Define parameters

In [0]:
catalog_name = "dev_gold"
schema_name = "books"

#Import of libraries

In [0]:
from pyspark.sql.types import StructType, StructField, StringType, IntegerType, DoubleType, DecimalType

#Average books ratings

In [0]:
table_name = "average_books_rating"

df = spark.sql(f"""
with global_rating as (
    select 
        round(avg(Book_Rating), 2)  as global_average_rating
    from dev_silver.books.ratings_silver    
),
average_rating as (
    select 
            ISBN,
            round(avg(Book_Rating), 2)  as average_rating,
            count(Book_Rating) as number_of_ratings
        from dev_silver.books.ratings_silver  
        group by 
            ISBN
)
select
    books.ISBN,
    books.Book_Title,
    books.Book_Author,
    books.Year_Of_Publication,
    books.Publisher,
    books.Image_URL_M,
    average_rating.average_rating,
    average_rating.number_of_ratings,
    --formula for calculation of weighted_rating where 50 is minimum ratings threshold
    round((number_of_ratings / (number_of_ratings + 50)) * average_rating +
    (50 / (number_of_ratings + 50)) * global_average_rating, 2
  ) AS weighted_rating
FROM dev_silver.books.books_silver books 
left JOIN average_rating
    on books.ISBN = average_rating.ISBN
left join global_rating
""")

display(df)

(df.write
    .format("delta")
    .mode("overwrite")
    .option("inferSchema", "true")
    .saveAsTable(f"{catalog_name}.{schema_name}.{table_name}")
)
print(f"Table {table_name} updated")


#Top books

In [0]:
view_name = "top_books"

df = spark.sql(f"""
    CREATE OR REPLACE VIEW {catalog_name}.{schema_name}.{view_name} AS
    select
        Book_Title,
        Book_Author,
        weighted_rating,
        number_of_ratings 
    from dev_gold.books.average_books_rating
    order by weighted_rating desc
    limit 10
""")


# Top authors

In [0]:
view_name = "top_authors"

df = spark.sql(f"""
    CREATE OR REPLACE VIEW {catalog_name}.{schema_name}.{view_name} AS
    select
        Book_Author,
        max(weighted_rating) as average_weighted_rating
    from dev_gold.books.average_books_rating
    group by Book_Author
    order by average_weighted_rating desc
    limit 10
""")
