In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
from scipy import sparse
from sklearn.neighbors import NearestNeighbors
from sklearn.preprocessing import Normalizer
from IPython.display import display
from scipy.sparse.linalg import svds

### 0.1 - Load and Prepare Data

In [2]:
# Load df_books and genres
wd = os.getcwd()
df_books = pd.read_csv(wd + "/data/goodreads_books.csv")
df_inferred_genres = pd.read_csv(wd + "/data/inferred_genres.csv")
genre_descriptors = pd.read_csv(wd + "/data/inferred_genre_top_words.csv")

# Join genres to books
df_books = pd.merge(df_books, df_inferred_genres.drop(columns="title"), how="left", on="book_id")

In [3]:
# Load sparse_reviews from file
df_reviews = sparse.load_npz(wd + "/data/user_reviews.npz")

# Load user (rows) and book (cols) indices
user_index = pd.read_csv(wd + "/data/user_index_for_sparse_matrix.csv").rename(columns={"0":"user_id"})
book_index = pd.read_csv(wd + "/data/book_index_for_sparse_matrix.csv").rename(columns={"0":"book_id"})

In [4]:
# Load my goodreads history
my_books = pd.read_csv(wd + "/data/goodreads_library_export.csv")

# Filter to books I've rated
my_books = my_books[["Book Id", "My Rating", "Title"]]\
    .rename(columns={"Book Id":"book_id", "My Rating":"rating", "Title":"title"}).query("rating > 0")
print(f"Target reader ratings: {my_books.shape[0]}")

# Reformat to fit sparse matrix
my_books_1 = pd.merge(
                my_books, df_books[["book_id", "title"]], 
                how="left", on="book_id", suffixes=["_mb","_dfb"]
            )
my_books_id_match = my_books_1[~my_books_1["title_dfb"].isna()]
print(f"Matching on book_id: {my_books_id_match.shape[0]}")

my_books_2 = pd.merge(
    my_books_1[my_books_1["title_dfb"].isna()], df_books[["book_id", "title"]], 
    how="left", left_on="title_mb", right_on="title", suffixes=["_mb","_dfb"]
    )
my_books_match_on_title = my_books_2[~my_books_2["title"].isna()]
print(f"Matching on title: {my_books_match_on_title.shape[0]}")

my_books_id_match = my_books_id_match[["book_id", "rating", "title_dfb"]]\
                        .rename(columns={"title_dfb":"title"})
my_books_match_on_title = my_books_match_on_title[["book_id_dfb", "rating", "title"]]\
                        .rename(columns={"book_id_dfb":"book_id"})

my_books_3 = pd.concat([my_books_id_match, my_books_match_on_title])
my_books_3 = my_books_3.drop_duplicates(subset="book_id")
print(f"Total matches: {my_books_3.shape[0]}, {(my_books_3.shape[0] / my_books.shape[0]):.2%} of total")

my_books_4 = pd.merge(book_index, my_books_3, how="left", on="book_id")#.fillna(0.)
my_books = np.array(my_books_4["rating"]).reshape(1,-1)

# Add to sparse matrix
df_reviews = sparse.vstack([df_reviews, my_books])

# # Normalize reviews within users
# norm = Normalizer()
# df_reviews = norm.fit_transform(df_reviews) 

Target reader ratings: 166
Matching on book_id: 30
Matching on title: 65
Total matches: 95, 57.23% of total


In [5]:
# Set target reader to make recommendations for
target = df_reviews.shape[0] - 1

#### 0.2 - Functions to Process Recommendations

In [6]:
# Function to use KNN to find similar users
def find_neighbors(n_neighbors, target_user, ratings_df, ind_books, books):

    # Instantiate KNN
    n_neighbors = n_neighbors

    nn_model = NearestNeighbors(
        metric="cosine",
        algorithm="auto",
        n_neighbors=n_neighbors,
        n_jobs=-1
    )

    # Fit to sparse matrix
    nn_model.fit(ratings_df)

    # Feed in user and get neighbors and distances
    reader = ratings_df[target,:].toarray()
    dists, neighbors = nn_model.kneighbors(reader, return_distance=True)

    similar_users = pd.DataFrame(
        [pd.Series(neighbors.reshape(-1)), pd.Series(dists.reshape(-1))]).T.rename(
            columns={0:"user", 1:"distance"}
    )

    # Get all books read by similar users
    book_ind = []
    book_rat = []
    uid = []
    target_user_books = []
    target_user_book_rat = []
    for nt in similar_users.itertuples():
        user = ratings_df[int(nt.user),:].toarray()
        book_inds = np.where(user[0] > 0)[0]
        ratings = user[0][np.where(user[0] > 0)[0]]
        for i in range(len(book_inds)):        
            book_ind.append(book_inds[i])
            book_rat.append(ratings[i])
            uid.append(nt.user)    
            if nt.distance < 0.000000001:
                target_user_books.append(book_inds[i])
                target_user_book_rat.append(ratings[i])

    neighbor_user_ratings = pd.DataFrame([uid, book_ind, book_rat]).T.rename(
                                columns={0:"uid",1:"book_index",2:"user_rating"}
                            )

    # Join overall rating for each book
    neighbor_user_ratings = pd.merge(
                                ind_books.reset_index(), neighbor_user_ratings, 
                                how="inner", left_on="index", right_on="book_index"
                            )
    neighbor_user_ratings = pd.merge(neighbor_user_ratings, books, how="inner", on="book_id")

    # Filter out books target reader has already read
    neighbor_user_ratings = neighbor_user_ratings[~neighbor_user_ratings["book_index"].isin(target_user_books)]
    neighbor_user_ratings.drop(["index"], axis=1, inplace=True)

    # Filter out later volumes in series using regex pattern
    regex1 = r"#([3-9]|[1-9]\d+)"
    regex2 = r"Vol. ([0-9]|[1-9]\d+)"
    neighbor_user_ratings = neighbor_user_ratings[~neighbor_user_ratings["title"].str.contains(regex1)]
    neighbor_user_ratings = neighbor_user_ratings[~neighbor_user_ratings["title"].str.contains(regex2)]
    neighbor_user_ratings = neighbor_user_ratings[~neighbor_user_ratings["title"].str.contains("#1-")]

    # View target user's ratings
    target_user_ratings = pd.DataFrame(
                            [target_user_books, target_user_book_rat]).T.rename(
                                columns={0:"book_index",1:"user_rating"}
                        )
    target_user_ratings = pd.merge(
                            ind_books.reset_index(), target_user_ratings,
                            how="inner", left_on="index", right_on="book_index"
                        )
    target_user_ratings = pd.merge(target_user_ratings, books, how="inner", on="book_id")    

    return neighbor_user_ratings, target_user_ratings

In [7]:
# Function to show top recs for each genre
def show_top_recs_by_genre(
        target, others, preds=None, how=["KNN", "MF"], whose_genres=["target", "neighbors"], 
        n_genres=5, min_score=3.9, min_ratings=400, n_recs=15, write_to_excel=False):

    if whose_genres == "target":
        genre_ranking = pd.DataFrame(target.loc[:, "Genre_1":].sum(axis=0).sort_values(ascending=False))
    else:
        genre_ranking = pd.DataFrame(others.loc[:, "Genre_1":].sum(axis=0).sort_values(ascending=False))    
    
    genres = []
    results = []
    # Loop through genres in descending relevance order and print top recs
    for genre in genre_ranking.index[0:n_genres]:        
        
        g = float(genre[6:])
        if how == "KNN":
            highest_rated_recs_genre = others.query("main_genre == @g")\
                .groupby(["title", "avg_rating", "ratings_count", "year", "url"])["book_id"]\
                .count().reset_index().sort_values(by=["avg_rating", "book_id"], ascending=False)
        
        elif how == "MF":
            highest_rated_recs_genre = preds.query("main_genre == @g")\
            .sort_values(by="predicted_rating", ascending=False)                

        highest_rated_recs_genre = highest_rated_recs_genre.query(
                                        "ratings_count > @min_ratings & avg_rating > @min_score"
                                    )

        # Print genre name and descriptor
        for nt in genre_descriptors.itertuples():
            genre_rep = genre.replace("_"," ")
            if nt.genre_string[0:len(f"{genre}:")] == f"{genre_rep}:":
                gs = (nt.genre_string)
        
        genres.append(gs)
        results.append(highest_rated_recs_genre[["title", "avg_rating", "ratings_count", "url"]].head(50))

        print(gs)
        display(highest_rated_recs_genre[["title", "avg_rating", "ratings_count", "url"]].head(n_recs))
        print("_____\n")
    
    # Write to excel if keyword set
    path = (wd + "/data/book_recs.xlsx")
    if write_to_excel == True:
        if os.path.isfile(path):
            with pd.ExcelWriter(path, engine="openpyxl", mode="a", if_sheet_exists='replace') as writer:
                
                pd.DataFrame(genres).rename(columns={0:"Genres"})\
                    .to_excel(writer, sheet_name="Genre Meanings", index=False)
                
                for i in range(len(results)):
                    results[i].to_excel(writer, sheet_name=f"{genres[i].split(':')[0]} Recs", index=False)
        else:
            with pd.ExcelWriter(path) as writer:
                
                pd.DataFrame(genres).rename(columns={0:"Genres"})\
                    .to_excel(writer, sheet_name="Genre Meanings", index=False)
                
                for i in range(len(results)):
                    results[i].to_excel(writer, sheet_name=f"{genres[i].split(':')[0]} Recs", index=False)
        writer.close

In [8]:
# Functions to show top rated and most popular among similar readers
def neighbors_most_popular(others, n=10, min_ratings=1000, min_score=3.8):
    
    popular_recs = others.query("ratings_count > @min_ratings & avg_rating > @min_score")\
        .groupby(["title", "avg_rating", "ratings_count", "year","url"])["book_id"]\
        .count().reset_index().sort_values(by=["book_id", "avg_rating"], ascending=False)\
        .nlargest(n, "book_id").rename(columns={"book_id":"percent_similar_users_read"})

    popular_recs["percent_similar_users_read"] = (popular_recs["percent_similar_users_read"] / 
                                                    others["uid"].nunique()).map('{:.1%}'.format)
    return popular_recs[["title","avg_rating","ratings_count","year","percent_similar_users_read","url"]]


def neighbors_top_rated(others, n=10, min_ratings=1000, min_score=3.8):

    highest_rated_recs = others.query("ratings_count > @min_ratings & avg_rating > @min_score")\
        .groupby(["title", "avg_rating", "ratings_count", "year","url"])["book_id"]\
        .count().reset_index().sort_values(by=["avg_rating", "book_id"], ascending=False)\
        .nlargest(n, "avg_rating").drop(columns="book_id")

    return highest_rated_recs

In [9]:
# Function to plot neighbors' and target's top genres
def plot_top_genres(others, target):
    
    fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(12,10))
    neighbor_genre_ranking = pd.DataFrame(others.loc[:, "Genre_1":].sum(axis=0).sort_values(ascending=False))
    target_genre_ranking = pd.DataFrame(target.loc[:, "Genre_1":].sum(axis=0).sort_values(ascending=False))
    
    sns.barplot(
        data=neighbor_genre_ranking, y=neighbor_genre_ranking.index, x=0, ax=ax[0]
    )
    ax[0].set_xlabel("Genre preference")
    ax[0].set_title("Neighbors' Top Genres")

    # Plot target reader's top genres    
    sns.barplot(
        data=target_genre_ranking, y=target_genre_ranking.index, x=0, ax=ax[1]
    )
    ax[1].set_xlabel("Genre preference")
    ax[1].set_title("Target User's Top Genres")
    
    plt.show()

## TESTING using gradient descent for matrix factorization 

In [27]:
# Find larger number of similar users than before to get broad pool of potentially relevant books
df_reviews.data[np.isnan(df_reviews.data)] = 0
df_reviews = sparse.csr_matrix(df_reviews)
large_neighborhood_ratings, target_user_ratings = find_neighbors(2000, target, df_reviews, book_index, df_books)

# Get unique users and books to slice df_reviews
neighbor_index = large_neighborhood_ratings["uid"].unique()
neighbor_index = np.append(neighbor_index, target)
neighbor_book_index = large_neighborhood_ratings["book_index"].unique()
neighbor_book_index = np.append(neighbor_book_index, target_user_ratings["book_index"].unique())

# Slice df_reviews to make User Ratings Matrix
df_reviews.data[df_reviews.data == 0] = np.nan
df_reviews = sparse.csr_matrix(df_reviews)
R = df_reviews[:, neighbor_book_index]
R = R[neighbor_index, :].toarray()

  neighbor_user_ratings = neighbor_user_ratings[~neighbor_user_ratings["title"].str.contains(regex1)]
  neighbor_user_ratings = neighbor_user_ratings[~neighbor_user_ratings["title"].str.contains(regex2)]


In [19]:
def matrix_factorization1(R, k=5, steps=4000, lr=0.0002, beta=0.02, verbose=0):
    """
    Uses gradient descent to train matrix factorization algorithm. Useful explanation
    of the math here: 
    http://www.quuxlabs.com/blog/2010/09/matrix-factorization-a-simple-tutorial-and-implementation-in-python/#source-code

    Args:
        R: Utility matrix of user item ratings         
        k: Number of latent features
        steps: Number of epochs to run training
        lr: Learning rate
        beta: Regularization strength
        verbose: Prints updates if 1

    Returns:
        Predictions, User Embeddings, Item Embdeddings
    """
    # Initialize random values for user (P) and book (Q) embedding matrices
    P = np.random.rand(R.shape[0],K)
    Q = np.random.rand(R.shape[1],K).T
    
    for step in range(steps): # For each steps
        r_0 = np.where(R > 0) # Find indices where rating > 0
        e = 0 # Initialize error for step to 0
        for n in range(len(r_0[0])): # For each value where rating > 0
            i = r_0[0][n] # Get the row index where the value is
            j = r_0[1][n] # Get the column index where the value is
            resid = R[i][j] - np.dot(P[i,:],Q[:,j]) # residual is value - r_hat
            e = e + pow(R[i][j] - np.dot(P[i,:],Q[:,j]), 2) # Add to error
            for k in range(K): # for each component (latent feature)       
                P[i][k] = P[i][k] + lr * (2 * resid * Q[k][j] - beta * P[i][k]) # Update P by step in gradient
                Q[k][j] = Q[k][j] + lr * (2 * resid * P[i][k] - beta * Q[k][j]) # Update Q by step in gradient
                e = e + (beta/2) * (pow(P[i][k],2) + pow(Q[k][j],2)) # Update error with regularized term

        # View progress
        if verbose == 1:
            if step % (steps / 10) == 0:
                print(f"Step: {step}, error: {e}")
        
        # Break loop if error is very small
        if e < 0.001:
            break
    
    # Get predictions
    preds = np.dot(P, Q)

    return preds, P, Q.T

In [20]:
preds, user_embeddings, item_embeddings = matrix_factorization1(R, P, Q, K, verbose=1)

Step: 0, error: 28.36597660146878
Step: 400, error: 23.20761081929307
Step: 800, error: 22.248528620033344
Step: 1200, error: 21.7960188515145
Step: 1600, error: 21.54634453791035
Step: 2000, error: 21.383865567875716
Step: 2400, error: 21.262042845496605
Step: 2800, error: 21.16087796283709
Step: 3200, error: 21.071090164857427
Step: 3600, error: 20.987981590462564


In [21]:
def matrix_factorization2(R, k=5, steps=3000, lr=0.0003, beta=0.02, verbose=0):
    """
    Uses gradient descent to train matrix factorization algorithm. Useful explanation
    of the math here: 
    http://www.quuxlabs.com/blog/2010/09/matrix-factorization-a-simple-tutorial-and-implementation-in-python/#source-code

    Args:
        R: Utility matrix of user item ratings         
        k: Number of latent features
        steps: Number of epochs to run training
        lr: Learning rate
        beta: Regularization strength
        verbose: Prints updates if 1

    Returns:
        Predictions, User Embeddings, Item Embdeddings
    """

    # Initialize random values for user (P) and book (Q) embedding matrices
    K = k
    P = np.random.rand(R.shape[0],k)
    Q = np.random.rand(R.shape[1],k).T

    for step in range(steps): # For each steps
        r_0 = np.where(R > 0) # Find indices where rating > 0
        e = 0 # Initialize error for step to 0
        i = r_0[0]
        j = r_0[1]
        resid = R[i,j] - (P @ Q)[i,j] # residual is value - r_hat
        e = e + np.sum(pow(resid, 2)) # Add to error
        for k in range(K): # for each component (latent feature)     
            P[i,k] = P[i,k] + lr * (2 * resid * Q[k,j] - beta * P[i,k]) # Update P by step in gradient                             
            Q[k,j] = Q[k,j] + lr * (2 * resid * P[i,k] - beta * Q[k,j]) # Update Q by step in gradient
            e = e + (beta/2) * np.sum(pow(P[i,k],2) + pow(Q[k,j],2)) # Update error with regularized term

        # View progress
        if verbose == 1:            
            if step % (steps / 10) == 0:
                print(f"Step: {int(step + (steps / 10))}, error: {e}")
        
        # Break loop if error is very small
        if e < 0.001:
            break
    
    # Get predictions
    preds = np.dot(P, Q)

    return preds, P, Q.T

In [28]:
preds, user_embeddings, item_embeddings = matrix_factorization2(R, k=25, steps=10000, verbose=1)

Step: 500, error: 26029.505598147378
Step: 1000, error: 6391.664854904534
Step: 1500, error: 6228.053353954057
Step: 2000, error: 6214.2535475709155
Step: 2500, error: 6181.969777749042
Step: 3000, error: 6188.37575116693
Step: 3500, error: 6201.24191508516
Step: 4000, error: 6209.541144943915
Step: 4500, error: 6213.385105209378
Step: 5000, error: 6214.624552651341


In [31]:
# Make dataframe from predictions and indices
df_preds = pd.DataFrame(preds, columns=neighbor_book_index, index=neighbor_index)\
                .reset_index()

# Get target user's top predicted ratings
target_pred_books = df_preds[df_preds["index"] == target].columns[1:]
target_pred_ratings = df_preds[df_preds["index"] == target].values[0][1:]

# Put into df with relevant info from df_books
top_preds = pd.DataFrame({"book_index":target_pred_books, "predicted_rating":target_pred_ratings})\
                .sort_values(by="predicted_rating", ascending=False)\
                .merge(book_index.reset_index(), left_on="book_index", right_on="index")\
                .merge(
                    df_books[["book_id", "title", "avg_rating", "ratings_count", "year", "main_genre","url"]],
                    on="book_id"
                )\
                .drop(columns=["index"])

# View preds on actually read books
target_books_preds = top_preds[top_preds["book_index"].isin(target_user_ratings["book_index"].unique())]\
                        .drop(["book_index"], axis=1)
target_books_preds = pd.merge(target_books_preds, target_user_ratings[["book_id", "user_rating"]], on="book_id")
target_books_preds["diff"] = target_books_preds["predicted_rating"] - target_books_preds["user_rating"]
print("Predictions on Actually Read Books")
display(target_books_preds[["title", "predicted_rating", "user_rating", "diff"]])

# Filter out already read books to get top preds
print("___________\n")
print("Predictions on Unread Books")
top_preds = top_preds[~top_preds["book_index"].isin(target_user_ratings["book_index"].unique())]
display(top_preds[["title","avg_rating","predicted_rating","ratings_count","year","url"]]\
        .query("avg_rating > 3.9").head(20)
    )

Predictions on Actually Read Books


Unnamed: 0,title,predicted_rating,user_rating,diff
0,1984,4.971563,5.0,-0.028437
1,Matterhorn,4.971066,5.0,-0.028934
2,"Why We Love Dogs, Eat Pigs, and Wear Cows: An ...",4.970877,5.0,-0.029123
3,"The Devil's Chessboard: Allen Dulles, the CIA,...",4.970725,5.0,-0.029275
4,Miles: The Autobiography,4.970642,5.0,-0.029358
...,...,...,...,...
90,The Second Amendment,1.989107,2.0,-0.010893
91,The Doors of Perception,1.988802,2.0,-0.011198
92,"All Systems Red (The Murderbot Diaries, #1)",1.988441,2.0,-0.011559
93,"Casino Royale (James Bond, #1)",0.996899,1.0,-0.003101


___________

Predictions on Unread Books


Unnamed: 0,title,avg_rating,predicted_rating,ratings_count,year,url
0,The Philosophers Toolkit: A Compendium of Phil...,3.91,6.316999,394.0,2002,https://www.goodreads.com/book/show/192414.The...
2,The Philosophy Book,4.16,6.099649,1485.0,2011,https://www.goodreads.com/book/show/8493026-th...
3,"A Call to Action: Women, Religion, Violence, a...",3.99,5.838696,1331.0,2014,https://www.goodreads.com/book/show/20523863-a...
28,Music And Freedom,3.99,4.908676,503.0,2016,https://www.goodreads.com/book/show/30471827-m...
29,Receiver of Many (Hades & Persephone #1),4.1,4.858164,588.0,2015,https://www.goodreads.com/book/show/25984485-r...
30,"The Outer Limits of Reason: What Science, Math...",3.99,4.841499,342.0,2013,https://www.goodreads.com/book/show/17841838-t...
32,Marley and Me: Life and Love With the World's ...,4.12,4.747356,373596.0,2005,https://www.goodreads.com/book/show/12691.Marl...
33,The Great Bazaar & Brayan's Gold,4.05,4.682403,392.0,2015,https://www.goodreads.com/book/show/23013884-t...
36,A Little Life,4.28,4.661203,762.0,2016,https://www.goodreads.com/book/show/29408433-a...
37,Asterios Polyp,4.19,4.649906,20053.0,2009,https://www.goodreads.com/book/show/4070095-as...


In [30]:
# Highest rated books by each of the top genres
show_top_recs_by_genre(
    target_user_ratings, 
    large_neighborhood_ratings, 
    preds=top_preds, 
    how="MF", 
    whose_genres="target", 
    n_genres=10, 
    min_score=4.15, 
    min_ratings=400, 
    n_recs=20,
    write_to_excel=True
)

Genre 5: ['history', 'war', 'political', 'account', 'world', 'year', 'country', 'family', 'century', 'people']


Unnamed: 0,title,avg_rating,ratings_count,url
47,Conversation in the Cathedral,4.32,3331.0,https://www.goodreads.com/book/show/53970.Conv...
54,Bosnian Chronicle,4.21,619.0,https://www.goodreads.com/book/show/850223.Bos...
55,Black Earth: The Holocaust as History and Warning,4.21,1133.0,https://www.goodreads.com/book/show/23995415-b...
62,"American Tabloid (Underworld USA, #1)",4.21,10342.0,https://www.goodreads.com/book/show/36064.Amer...
73,Diplomacy,4.18,4584.0,https://www.goodreads.com/book/show/781183.Dip...
97,"Indonesia, Etc.: Exploring the Improbable Nation",4.18,470.0,https://www.goodreads.com/book/show/18377963-i...
176,Give Us the Ballot: The Modern Struggle for Vo...,4.42,720.0,https://www.goodreads.com/book/show/22929518-g...
269,Escape from Freedom,4.23,5910.0,https://www.goodreads.com/book/show/25491.Esca...
275,Country Driving: A Journey Through China from ...,4.19,4960.0,https://www.goodreads.com/book/show/6945572-co...
280,Stamped from the Beginning: The Definitive His...,4.54,996.0,https://www.goodreads.com/book/show/25898216-s...


_____

Genre 15: ['history', 'poem', 'essay', 'text', 'century', 'collection', 'world', 'writing', 'introduction', 'literature']


Unnamed: 0,title,avg_rating,ratings_count,url
2,The Philosophy Book,4.16,1485.0,https://www.goodreads.com/book/show/8493026-th...
50,The Complete Stories and Poems,4.37,161550.0,https://www.goodreads.com/book/show/23919.The_...
60,Felicity,4.27,2390.0,https://www.goodreads.com/book/show/24611522-f...
61,God Is Disappointed in You,4.16,1144.0,https://www.goodreads.com/book/show/17689005-g...
95,"Complete Poems, 1904-1962",4.35,18787.0,https://www.goodreads.com/book/show/26596.Comp...
108,Natasha's Dance: A Cultural History of Russia,4.16,2039.0,https://www.goodreads.com/book/show/97401.Nata...
123,The Essential Kierkegaard,4.16,1573.0,https://www.goodreads.com/book/show/24964.The_...
264,"Short Stories from Hogwarts of Heroism, Hardsh...",4.23,16961.0,https://www.goodreads.com/book/show/31538635-s...
265,Martin's Big Words: The Life of Dr. Martin Lut...,4.42,8859.0,https://www.goodreads.com/book/show/160943.Mar...
302,Crush,4.33,12173.0,https://www.goodreads.com/book/show/96259.Crush


_____

Genre 17: ['alien', 'planet', 'world', 'human', 'ship', 'space', 'war', 'crew', 'mission', 'race']


Unnamed: 0,title,avg_rating,ratings_count,url
114,This Thing of Darkness,4.48,1134.0,https://www.goodreads.com/book/show/142050.Thi...
182,"Rosie Revere, Engineer",4.54,4789.0,https://www.goodreads.com/book/show/17290220-r...
213,Miracle in the Andes,4.22,11354.0,https://www.goodreads.com/book/show/454236.Mir...
236,The Peregrine,4.18,1040.0,https://www.goodreads.com/book/show/1071726.Th...
319,The Ultimate Hitchhiker's Guide to the Galaxy,4.38,225626.0,https://www.goodreads.com/book/show/13.The_Ult...
406,"The Walking Dead, Compendium 1",4.43,58434.0,https://www.goodreads.com/book/show/6465707-th...
428,The Enigma of Amigara Fault,4.23,1855.0,https://www.goodreads.com/book/show/18129124-t...
460,"Spellbound (Grimnoir Chronicles, #2)",4.29,7184.0,https://www.goodreads.com/book/show/10822283-s...
478,"Planetary, Volume 4: Spacetime Archaeology",4.46,4114.0,https://www.goodreads.com/book/show/5210326-pl...
594,"Wolf by Wolf (Wolf by Wolf, #1)",4.28,9710.0,https://www.goodreads.com/book/show/24807186-w...


_____

Genre 8: ['guide', 'people', 'way', 'help', 'practical', 'world', 'experience', 'offer', 'question', 'business']


Unnamed: 0,title,avg_rating,ratings_count,url
64,Loving God,4.31,1342.0,https://www.goodreads.com/book/show/903912.Lov...
66,Reality Is Not What It Seems: The Journey to Q...,4.26,710.0,https://www.goodreads.com/book/show/30201328-r...
109,"The Story of the Human Body: Evolution, Health...",4.21,2759.0,https://www.goodreads.com/book/show/17736859-t...
126,See You at the Top,4.25,9153.0,https://www.goodreads.com/book/show/161415.See...
134,Make It Stick: The Science of Successful Learning,4.19,3855.0,https://www.goodreads.com/book/show/18770267-m...
180,Myths to Live By,4.25,3931.0,https://www.goodreads.com/book/show/821380.Myt...
187,Every Body Yoga: Let Go of Fear. Get On the Ma...,4.34,619.0,https://www.goodreads.com/book/show/30754069-e...
221,NeuroTribes: The Legacy of Autism and the Futu...,4.29,3998.0,https://www.goodreads.com/book/show/22514020-n...
272,Ready to Run: Unlocking Your Potential to Run ...,4.29,772.0,https://www.goodreads.com/book/show/18668429-r...
353,"No Death, No Fear",4.3,2017.0,https://www.goodreads.com/book/show/187636.No_...


_____

Genre 11: ['fiction', 'collection', 'world', 'literary', 'young', 'funny', 'short', 'comedy', 'horror', 'debut']


Unnamed: 0,title,avg_rating,ratings_count,url
37,Asterios Polyp,4.19,20053.0,https://www.goodreads.com/book/show/4070095-as...
431,Digging Up Mother: A Love Story,4.3,882.0,https://www.goodreads.com/book/show/28589336-d...
469,What Matters Most is How Well You Walk Through...,4.3,4351.0,https://www.goodreads.com/book/show/632901.Wha...
532,The Refrigerator Monologues,4.21,894.0,https://www.goodreads.com/book/show/32714267-t...
608,Tales,4.37,2100.0,https://www.goodreads.com/book/show/36313.Tales
892,The Essential Dykes to Watch Out For,4.36,6979.0,https://www.goodreads.com/book/show/3189884-th...
1098,Animal Farm / 1984,4.26,120269.0,https://www.goodreads.com/book/show/5472.Anima...
1456,Arrival,4.27,447.0,https://www.goodreads.com/book/show/31625351-a...
1773,Absolute Pandemonium: My Louder Than Life Story,4.2,604.0,https://www.goodreads.com/book/show/26036990-a...
1878,Before Sunrise & Before Sunset: Two Screenplays,4.35,1204.0,https://www.goodreads.com/book/show/379119.Bef...


_____

Genre 22: ['world', 'power', 'enemy', 'war', 'battle', 'evil', 'magic', 'save', 'ancient', 'warrior']


Unnamed: 0,title,avg_rating,ratings_count,url
320,When Christ and His Saints Slept (Henry II an...,4.27,10892.0,https://www.goodreads.com/book/show/43841.When...
385,"Red Sister (Book of the Ancestor, #1)",4.32,5784.0,https://www.goodreads.com/book/show/25895524-r...
505,"Contractor (The Contractors, #1)",4.3,1072.0,https://www.goodreads.com/book/show/22999740-c...
515,"The Broken God (A Requiem for Homo Sapiens, #1)",4.24,528.0,https://www.goodreads.com/book/show/2031004.Th...
541,"A Crown of Wishes (The Star-Touched Queen, #2)",4.2,2957.0,https://www.goodreads.com/book/show/29939047-a...
558,"Sins of Empire (Gods of Blood and Powder, #1)",4.48,2567.0,https://www.goodreads.com/book/show/28811016-s...
707,"Asterix the Gaul (Asterix, #1)",4.16,17151.0,https://www.goodreads.com/book/show/71292.Aste...
716,"Free the Darkness (King's Dark Tidings, #1)",4.3,6591.0,https://www.goodreads.com/book/show/28385685-f...
731,"The Last Unicorn (The Last Unicorn, #1)",4.18,77163.0,https://www.goodreads.com/book/show/29127.The_...
757,"Strange the Dreamer (Strange the Dreamer, #1)",4.39,1375.0,https://www.goodreads.com/book/show/29748925-s...


_____

Genre 3: ['collection', 'comic', 'short', 'fan', 'feature', 'bestselling', 'adventure', 'available', 'classic', 'includes']


Unnamed: 0,title,avg_rating,ratings_count,url
206,"Hawkeye, Volume 2: Little Hits",4.26,16304.0,https://www.goodreads.com/book/show/17277800-h...
210,The Private Eye,4.24,1847.0,https://www.goodreads.com/book/show/26247784-t...
298,The Clown of God,4.21,2353.0,https://www.goodreads.com/book/show/69086.The_...
401,The Dark Tower,4.27,96576.0,https://www.goodreads.com/book/show/5091.The_D...
516,Saga: Book Two,4.68,866.0,https://www.goodreads.com/book/show/32926680-saga
536,Saga: Book One,4.7,4201.0,https://www.goodreads.com/book/show/22078240-saga
680,The Complete Clive Barker's The Great And Secr...,4.21,15868.0,https://www.goodreads.com/book/show/34871.The_...
702,An Imperial Affliction,4.23,987.0,https://www.goodreads.com/book/show/23959944-a...
724,"Ms. Marvel, #1: Meta Morphosis",4.32,4702.0,https://www.goodreads.com/book/show/18757876-m...
834,The Last Answer,4.17,1150.0,https://www.goodreads.com/book/show/15755066-t...


_____

Genre 23: ['agent', 'team', 'mission', 'job', 'dead', 'terrorist', 'zombie', 'government', 'secret', 'drug']


Unnamed: 0,title,avg_rating,ratings_count,url
1684,Harrison Bergeron,4.23,10690.0,https://www.goodreads.com/book/show/10176119-h...
1705,The Plot to Hack America: How Putin’s Cyberspi...,4.17,444.0,https://www.goodreads.com/book/show/32202585-t...
1881,"Dust & Decay (Rot & Ruin, #2)",4.28,13867.0,https://www.goodreads.com/book/show/9917998-du...
2940,Never Split the Difference: Negotiating As If ...,4.49,3314.0,https://www.goodreads.com/book/show/26156469-n...
3112,Killfile (John Smith #1),4.16,779.0,https://www.goodreads.com/book/show/27213216-k...
3481,"Hawkeye, Volume 4: Rio Bravo",4.31,5523.0,https://www.goodreads.com/book/show/22237290-h...
3711,The Asset: Act II (Isabella Rose #2),4.2,1593.0,https://www.goodreads.com/book/show/29440957-t...
3971,"The First Hostage (J. B. Collins, #2)",4.43,1619.0,https://www.goodreads.com/book/show/26043842-t...
4761,The Girl Who Played with Fire (Millennium #2),4.22,7361.0,https://www.goodreads.com/book/show/6087991-th...
5392,"Much Obliged, Jeeves",4.24,472.0,https://www.goodreads.com/book/show/16394.Much...


_____

Genre 20: ['family', 'year', 'child', 'young', 'home', 'past', 'world', 'loss', 'secret', 'friend']


Unnamed: 0,title,avg_rating,ratings_count,url
36,A Little Life,4.28,762.0,https://www.goodreads.com/book/show/29408433-a...
101,"Carry On, Warrior: The Power of Embracing Your...",4.18,2041.0,https://www.goodreads.com/book/show/17138313-c...
288,Rising Strong,4.19,23667.0,https://www.goodreads.com/book/show/23317538-r...
398,After Auschwitz,4.29,467.0,https://www.goodreads.com/book/show/17828703-a...
448,The Center Cannot Hold: My Journey Through Mad...,4.23,500.0,https://www.goodreads.com/book/show/2181710.Th...
468,My Journey : Transforming Dreams into Actions,4.35,1743.0,https://www.goodreads.com/book/show/18371021-m...
485,I Liked My Life,4.21,3450.0,https://www.goodreads.com/book/show/29875926-i...
546,Dear Evan Hansen,4.7,563.0,https://www.goodreads.com/book/show/34381137-d...
560,Rare Bird: A Memoir of Loss and Love,4.4,1043.0,https://www.goodreads.com/book/show/21432315-r...
570,Lament for a Son,4.42,974.0,https://www.goodreads.com/book/show/148208.Lam...


_____

Genre 7: ['fairy', 'magic', 'adventure', 'world', 'magical', 'die', 'fantasy', 'journey', 'young', 'land']


Unnamed: 0,title,avg_rating,ratings_count,url
93,Interstellar Cinderella,4.22,2024.0,https://www.goodreads.com/book/show/22875394-i...
350,The Enemy: A Book About Peace,4.24,427.0,https://www.goodreads.com/book/show/4524994-th...
415,Grounded: The Adventures of Rapunzel (Tyme #1),4.16,1438.0,https://www.goodreads.com/book/show/23399287-g...
441,The Language of Thorns: Midnight Tales and Dan...,4.53,4200.0,https://www.goodreads.com/book/show/34076952-t...
491,The First Rumpole Omnibus,4.3,1278.0,https://www.goodreads.com/book/show/54383.The_...
685,A Boy Called Christmas,4.25,1471.0,https://www.goodreads.com/book/show/25882558-a...
758,"The Pillars of the Earth (Kingsbridge, #1)",4.29,471990.0,https://www.goodreads.com/book/show/5043.The_P...
980,Modoc: The True Story of the Greatest Elephant...,4.2,7189.0,https://www.goodreads.com/book/show/23982.Modoc
1122,We Found a Hat,4.21,1003.0,https://www.goodreads.com/book/show/28586748-w...
1149,Jaya: An Illustrated Retelling of the Mahabharata,4.2,9857.0,https://www.goodreads.com/book/show/9864913-jaya


_____

