Spring 2025 <br>
Lecture 03

# Creating Tables & Heatmaps with Great-Tables

Documentation on the **great_tables** package: https://posit-dev.github.io/great-tables/articles/intro.html

In [55]:
# Install the great_tables package / library (ONE-TIME RUN ONLY)
# ! conda install -y great_tables

# Imports
import pandas as pd
from great_tables import (
    GT, md, google_font, style, loc
)

# Load data
df = pd.read_csv('data/top-500-novels-metadata_2025-01-11.csv')

## Examples:

1. Create a table of author C.S. Lewis' books.
    * Include the book title, year of publication, goodreads number of ratings, and goodreads average rating
    * Make sure the column names are easily understandable
    * Give the table a title, subtitle, and a footer
    * Sort the table by rating (desc)
2. Create a heatmap for Example 1 using the number of ratings column.

In [56]:
# Data Manipulation

# Typical save column renaming for last!

# Subset of columns
# Filter on the author
# Sort by rating

print(df.columns)

list_columns_ex_1 = [
    'title',
    'pub_year',
    'gr_avg_rating',
    'gr_num_ratings',
]

df_ex_1 = (
    df
    # Filter the author to C.S. Lewis
    [df['author'] == 'C.S. Lewis']
    # Subset the dataframe to certain columns
    [list_columns_ex_1]
    # Sort by the rating DESC
    .sort_values('gr_avg_rating', ascending=False) # ascending = False -> DESCENDING ORDER
    # Rename the columns to be more informative
    .rename(
        columns={
            'title': 'Book Title',
            'pub_year': 'Publication Year',
            'gr_num_ratings': 'Number of Ratings',
            'gr_avg_rating': 'Average Rating'
        }
    )
    # Reset the indices of all the rows
    # .reset_index(drop=True)
)

df_ex_1


Index(['top_500_rank', 'title', 'author', 'pub_year', 'orig_lang', 'genre',
       'author_birth', 'author_death', 'author_gender', 'author_primary_lang',
       'author_nationality', 'author_field_of_activity', 'author_occupation',
       'oclc_holdings', 'oclc_eholdings', 'oclc_total_editions',
       'oclc_holdings_rank', 'oclc_editions_rank', 'gr_avg_rating',
       'gr_num_ratings', 'gr_num_reviews', 'gr_avg_rating_rank',
       'gr_num_ratings_rank', 'oclc_owi', 'author_viaf', 'gr_url', 'wiki_url',
       'pg_eng_url', 'pg_orig_url'],
      dtype='object')


Unnamed: 0,Book Title,Publication Year,Average Rating,Number of Ratings
439,The Screwtape Letters,1942,4.26,443871
50,"The Lion, the Witch, and the Wardrobe",1950,4.24,2780127
160,The Voyage of the Dawn Treader,1952,4.09,467281
123,The Magician's Nephew,1955,4.05,537714
152,The Last Battle,1956,4.01,274476
169,Prince Caspian: The Return to Narnia,1951,3.98,435366
171,The Silver Chair,1953,3.95,295811
158,The Horse and His Boy,1954,3.91,348512


In [57]:
# Table

(
    GT(df_ex_1.reset_index(drop=True)) # Forces the correct row indices that GT needs
    # Add title and a subtitle
    .tab_header(
        title = md(
            "**C.S. Lewis' *the Screwtape Letters* is most acclaimed**"
        ),
        subtitle = md(
            'While much more popular with about 6X as many ratings, *The Lion, the Witch,<br>and the Wardrobe* has about the same rating as *The Screwtape Letters*.'
        )
    )
    # Left-align the header
    .opt_align_table_header('left')
    # Add a footer
    .tab_source_note(
        source_note='Source: Online Computer Library Center (OCLC)'
    )
    # Format the Number of Ratings column to have commas
    .fmt_integer(columns = 'Number of Ratings')
    # Set the table font
    .opt_table_font(google_font('Helvetica'))
    # Set the title font --> SERIF
    .tab_style(
        style=style.text(font=google_font('Merriweather')),
        locations=loc.title()
    )
    # Set the font sizes
    .tab_options(
        heading_title_font_size='26px',
        heading_subtitle_font_size='16px',
        column_labels_font_size='14px',
        table_font_size = '16px',
        # Row Padding
        data_row_padding = 1 # A number between 0 and 1
    )
    # Heatmap
    .data_color(
        columns = 'Number of Ratings',
        palette = 'Blues', # Blues, Greys
        domain=[0, 2800000] # Range of values for that column --> [lowest, highest]
    )
)

C.S. Lewis' the Screwtape Letters is most acclaimed,C.S. Lewis' the Screwtape Letters is most acclaimed,C.S. Lewis' the Screwtape Letters is most acclaimed,C.S. Lewis' the Screwtape Letters is most acclaimed
"While much more popular with about 6X as many ratings, The Lion, the Witch, and the Wardrobe has about the same rating as The Screwtape Letters.","While much more popular with about 6X as many ratings, The Lion, the Witch, and the Wardrobe has about the same rating as The Screwtape Letters.","While much more popular with about 6X as many ratings, The Lion, the Witch, and the Wardrobe has about the same rating as The Screwtape Letters.","While much more popular with about 6X as many ratings, The Lion, the Witch, and the Wardrobe has about the same rating as The Screwtape Letters."
Book Title,Publication Year,Average Rating,Number of Ratings
The Screwtape Letters,1942,4.26,443871
"The Lion, the Witch, and the Wardrobe",1950,4.24,2780127
The Voyage of the Dawn Treader,1952,4.09,467281
The Magician's Nephew,1955,4.05,537714
The Last Battle,1956,4.01,274476
Prince Caspian: The Return to Narnia,1951,3.98,435366
The Silver Chair,1953,3.95,295811
The Horse and His Boy,1954,3.91,348512
Source: Online Computer Library Center (OCLC),Source: Online Computer Library Center (OCLC),Source: Online Computer Library Center (OCLC),Source: Online Computer Library Center (OCLC)
