In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
from scipy import sparse
from sklearn.neighbors import NearestNeighbors
from sklearn.preprocessing import Normalizer
from IPython.display import display
from scipy.sparse.linalg import svds

### 0.1 - Load and Prepare Data

In [2]:
# Load df_books and genres
wd = os.getcwd()
df_books = pd.read_csv(wd + "/data/goodreads_books.csv")
df_inferred_genres = pd.read_csv(wd + "/data/inferred_genres.csv")
genre_descriptors = pd.read_csv(wd + "/data/inferred_genre_top_words.csv")

# Join genres to books
df_books = pd.merge(df_books, df_inferred_genres.drop(columns="title"), how="left", on="book_id")

In [3]:
# Load sparse_reviews from file
df_reviews = sparse.load_npz(wd + "/data/user_reviews.npz")

# Load user (rows) and book (cols) indices
user_index = pd.read_csv(wd + "/data/user_index_for_sparse_matrix.csv").rename(columns={"0":"user_id"})
book_index = pd.read_csv(wd + "/data/book_index_for_sparse_matrix.csv").rename(columns={"0":"book_id"})

In [4]:
# Load my goodreads history
my_books = pd.read_csv(wd + "/data/goodreads_library_export.csv")

# Filter to books I've rated
my_books = my_books[["Book Id", "My Rating", "Title"]]\
    .rename(columns={"Book Id":"book_id", "My Rating":"rating", "Title":"title"}).query("rating > 0")
print(f"Target reader ratings: {my_books.shape[0]}")

# Reformat to fit sparse matrix
my_books_1 = pd.merge(
                my_books, df_books[["book_id", "title"]], 
                how="left", on="book_id", suffixes=["_mb","_dfb"]
            )
my_books_id_match = my_books_1[~my_books_1["title_dfb"].isna()]
print(f"Matching on book_id: {my_books_id_match.shape[0]}")

my_books_2 = pd.merge(
    my_books_1[my_books_1["title_dfb"].isna()], df_books[["book_id", "title"]], 
    how="left", left_on="title_mb", right_on="title", suffixes=["_mb","_dfb"]
    )
my_books_match_on_title = my_books_2[~my_books_2["title"].isna()]
print(f"Matching on title: {my_books_match_on_title.shape[0]}")

my_books_id_match = my_books_id_match[["book_id", "rating", "title_dfb"]]\
                        .rename(columns={"title_dfb":"title"})
my_books_match_on_title = my_books_match_on_title[["book_id_dfb", "rating", "title"]]\
                        .rename(columns={"book_id_dfb":"book_id"})

my_books_3 = pd.concat([my_books_id_match, my_books_match_on_title])
my_books_3 = my_books_3.drop_duplicates(subset="book_id")
print(f"Total matches: {my_books_3.shape[0]}, {(my_books_3.shape[0] / my_books.shape[0]):.2%} of total")

my_books_4 = pd.merge(book_index, my_books_3, how="left", on="book_id")#.fillna(0.)
my_books = np.array(my_books_4["rating"]).reshape(1,-1)

# Add to sparse matrix
df_reviews = sparse.vstack([df_reviews, my_books])

# # Normalize reviews within users
# norm = Normalizer()
# df_reviews = norm.fit_transform(df_reviews) 

Target reader ratings: 166
Matching on book_id: 30
Matching on title: 65
Total matches: 95, 57.23% of total


In [5]:
# Set target reader to make recommendations for
target = df_reviews.shape[0] - 1

#### 0.2 - Functions to Process Recommendations

In [6]:
# Function to use KNN to find similar users
def find_neighbors(n_neighbors, target_user, ratings_df, ind_books, books):

    # Instantiate KNN
    n_neighbors = n_neighbors

    nn_model = NearestNeighbors(
        metric="cosine",
        algorithm="auto",
        n_neighbors=n_neighbors,
        n_jobs=-1
    )

    # Fit to sparse matrix
    nn_model.fit(ratings_df)

    # Feed in user and get neighbors and distances
    reader = ratings_df[target,:].toarray()
    dists, neighbors = nn_model.kneighbors(reader, return_distance=True)

    similar_users = pd.DataFrame(
        [pd.Series(neighbors.reshape(-1)), pd.Series(dists.reshape(-1))]).T.rename(
            columns={0:"user", 1:"distance"}
    )

    # Get all books read by similar users
    book_ind = []
    book_rat = []
    uid = []
    target_user_books = []
    target_user_book_rat = []
    for nt in similar_users.itertuples():
        user = ratings_df[int(nt.user),:].toarray()
        book_inds = np.where(user[0] > 0)[0]
        ratings = user[0][np.where(user[0] > 0)[0]]
        for i in range(len(book_inds)):        
            book_ind.append(book_inds[i])
            book_rat.append(ratings[i])
            uid.append(nt.user)    
            if nt.distance < 0.000000001:
                target_user_books.append(book_inds[i])
                target_user_book_rat.append(ratings[i])

    neighbor_user_ratings = pd.DataFrame([uid, book_ind, book_rat]).T.rename(
                                columns={0:"uid",1:"book_index",2:"user_rating"}
                            )

    # Join overall rating for each book
    neighbor_user_ratings = pd.merge(
                                ind_books.reset_index(), neighbor_user_ratings, 
                                how="inner", left_on="index", right_on="book_index"
                            )
    neighbor_user_ratings = pd.merge(neighbor_user_ratings, books, how="inner", on="book_id")

    # Filter out books target reader has already read
    neighbor_user_ratings = neighbor_user_ratings[~neighbor_user_ratings["book_index"].isin(target_user_books)]
    neighbor_user_ratings.drop(["index"], axis=1, inplace=True)

    # Filter out later volumes in series using regex pattern
    regex1 = r"#([3-9]|[1-9]\d+)"
    regex2 = r"Vol. ([0-9]|[1-9]\d+)"
    neighbor_user_ratings = neighbor_user_ratings[~neighbor_user_ratings["title"].str.contains(regex1)]
    neighbor_user_ratings = neighbor_user_ratings[~neighbor_user_ratings["title"].str.contains(regex2)]
    neighbor_user_ratings = neighbor_user_ratings[~neighbor_user_ratings["title"].str.contains("#1-")]

    # View target user's ratings
    target_user_ratings = pd.DataFrame(
                            [target_user_books, target_user_book_rat]).T.rename(
                                columns={0:"book_index",1:"user_rating"}
                        )
    target_user_ratings = pd.merge(
                            ind_books.reset_index(), target_user_ratings,
                            how="inner", left_on="index", right_on="book_index"
                        )
    target_user_ratings = pd.merge(target_user_ratings, books, how="inner", on="book_id")    

    return neighbor_user_ratings, target_user_ratings

In [7]:
# Function to show top recs for each genre
def show_top_recs_by_genre(
        target, others, preds=None, how=["KNN", "MF"], whose_genres=["target", "neighbors"], 
        n_genres=5, min_score=3.9, min_ratings=400, n_recs=15, write_to_excel=False):

    if whose_genres == "target":
        genre_ranking = pd.DataFrame(target.loc[:, "Genre_1":].sum(axis=0).sort_values(ascending=False))
    else:
        genre_ranking = pd.DataFrame(others.loc[:, "Genre_1":].sum(axis=0).sort_values(ascending=False))    
    
    genres = []
    results = []
    # Loop through genres in descending relevance order and print top recs
    for genre in genre_ranking.index[0:n_genres]:        
        
        g = float(genre[6:])
        if how == "KNN":
            highest_rated_recs_genre = others.query("main_genre == @g")\
                .groupby(["title", "avg_rating", "ratings_count", "year", "url"])["book_id"]\
                .count().reset_index().sort_values(by=["avg_rating", "book_id"], ascending=False)
        
        elif how == "MF":
            highest_rated_recs_genre = preds.query("main_genre == @g")\
            .sort_values(by="predicted_rating", ascending=False)                

        highest_rated_recs_genre = highest_rated_recs_genre.query(
                                        "ratings_count > @min_ratings & avg_rating > @min_score"
                                    )

        # Print genre name and descriptor
        for nt in genre_descriptors.itertuples():
            genre_rep = genre.replace("_"," ")
            if nt.genre_string[0:len(f"{genre}:")] == f"{genre_rep}:":
                gs = (nt.genre_string)
        
        genres.append(gs)
        results.append(highest_rated_recs_genre[["title", "avg_rating", "ratings_count", "url"]].head(50))

        print(gs)
        display(highest_rated_recs_genre[["title", "avg_rating", "ratings_count", "url"]].head(n_recs))
        print("_____\n")
    
    # Write to excel if keyword set
    path = (wd + "/data/book_recs.xlsx")
    if write_to_excel == True:
        if os.path.isfile(path):
            with pd.ExcelWriter(path, engine="openpyxl", mode="a", if_sheet_exists='replace') as writer:
                
                pd.DataFrame(genres).rename(columns={0:"Genres"})\
                    .to_excel(writer, sheet_name="Genre Meanings", index=False)
                
                for i in range(len(results)):
                    results[i].to_excel(writer, sheet_name=f"{genres[i].split(':')[0]} Recs", index=False)
        else:
            with pd.ExcelWriter(path) as writer:
                
                pd.DataFrame(genres).rename(columns={0:"Genres"})\
                    .to_excel(writer, sheet_name="Genre Meanings", index=False)
                
                for i in range(len(results)):
                    results[i].to_excel(writer, sheet_name=f"{genres[i].split(':')[0]} Recs", index=False)
        writer.close

In [8]:
# Functions to show top rated and most popular among similar readers
def neighbors_most_popular(others, n=10, min_ratings=1000, min_score=3.8):
    
    popular_recs = others.query("ratings_count > @min_ratings & avg_rating > @min_score")\
        .groupby(["title", "avg_rating", "ratings_count", "year","url"])["book_id"]\
        .count().reset_index().sort_values(by=["book_id", "avg_rating"], ascending=False)\
        .nlargest(n, "book_id").rename(columns={"book_id":"percent_similar_users_read"})

    popular_recs["percent_similar_users_read"] = (popular_recs["percent_similar_users_read"] / 
                                                    others["uid"].nunique()).map('{:.1%}'.format)
    return popular_recs[["title","avg_rating","ratings_count","year","percent_similar_users_read","url"]]


def neighbors_top_rated(others, n=10, min_ratings=1000, min_score=3.8):

    highest_rated_recs = others.query("ratings_count > @min_ratings & avg_rating > @min_score")\
        .groupby(["title", "avg_rating", "ratings_count", "year","url"])["book_id"]\
        .count().reset_index().sort_values(by=["avg_rating", "book_id"], ascending=False)\
        .nlargest(n, "avg_rating").drop(columns="book_id")

    return highest_rated_recs

In [9]:
# Function to plot neighbors' and target's top genres
def plot_top_genres(others, target):
    
    fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(12,10))
    neighbor_genre_ranking = pd.DataFrame(others.loc[:, "Genre_1":].sum(axis=0).sort_values(ascending=False))
    target_genre_ranking = pd.DataFrame(target.loc[:, "Genre_1":].sum(axis=0).sort_values(ascending=False))
    
    sns.barplot(
        data=neighbor_genre_ranking, y=neighbor_genre_ranking.index, x=0, ax=ax[0]
    )
    ax[0].set_xlabel("Genre preference")
    ax[0].set_title("Neighbors' Top Genres")

    # Plot target reader's top genres    
    sns.barplot(
        data=target_genre_ranking, y=target_genre_ranking.index, x=0, ax=ax[1]
    )
    ax[1].set_xlabel("Genre preference")
    ax[1].set_title("Target User's Top Genres")
    
    plt.show()

## TESTING using gradient descent for matrix factorization 

In [18]:
# Find larger number of similar users than before to get broad pool of potentially relevant books
df_reviews.data[np.isnan(df_reviews.data)] = 0
df_reviews = sparse.csr_matrix(df_reviews)
large_neighborhood_ratings, target_user_ratings = find_neighbors(1000, target, df_reviews, book_index, df_books)

# Get unique users and books to slice df_reviews
neighbor_index = large_neighborhood_ratings["uid"].unique()
neighbor_index = np.append(neighbor_index, target)
neighbor_book_index = large_neighborhood_ratings["book_index"].unique()
neighbor_book_index = np.append(neighbor_book_index, target_user_ratings["book_index"].unique())

# Slice df_reviews to make User Ratings Matrix
df_reviews.data[df_reviews.data == 0] = np.nan
df_reviews = sparse.csr_matrix(df_reviews)
R = df_reviews[:, neighbor_book_index]
R = R[neighbor_index, :].toarray()

  neighbor_user_ratings = neighbor_user_ratings[~neighbor_user_ratings["title"].str.contains(regex1)]
  neighbor_user_ratings = neighbor_user_ratings[~neighbor_user_ratings["title"].str.contains(regex2)]


In [19]:
# def matrix_factorization1(R, k=5, steps=4000, lr=0.0002, beta=0.02, verbose=0):
#     """
#     Uses gradient descent to train matrix factorization algorithm. Useful explanation
#     of the math here: 
#     http://www.quuxlabs.com/blog/2010/09/matrix-factorization-a-simple-tutorial-and-implementation-in-python/#source-code

#     Args:
#         R: Utility matrix of user item ratings         
#         k: Number of latent features
#         steps: Number of epochs to run training
#         lr: Learning rate
#         beta: Regularization strength
#         verbose: Prints updates if 1

#     Returns:
#         Predictions, User Embeddings, Item Embdeddings
#     """
#     # Initialize random values for user (P) and book (Q) embedding matrices
#     P = np.random.rand(R.shape[0],K)
#     Q = np.random.rand(R.shape[1],K).T
    
#     for step in range(steps): # For each steps
#         r_0 = np.where(R > 0) # Find indices where rating > 0
#         e = 0 # Initialize error for step to 0
#         for n in range(len(r_0[0])): # For each value where rating > 0
#             i = r_0[0][n] # Get the row index where the value is
#             j = r_0[1][n] # Get the column index where the value is
#             resid = R[i][j] - np.dot(P[i,:],Q[:,j]) # residual is value - r_hat
#             e = e + pow(R[i][j] - np.dot(P[i,:],Q[:,j]), 2) # Add to error
#             for k in range(K): # for each component (latent feature)       
#                 P[i][k] = P[i][k] + lr * (2 * resid * Q[k][j] - beta * P[i][k]) # Update P by step in gradient
#                 Q[k][j] = Q[k][j] + lr * (2 * resid * P[i][k] - beta * Q[k][j]) # Update Q by step in gradient
#                 e = e + (beta/2) * (pow(P[i][k],2) + pow(Q[k][j],2)) # Update error with regularized term

#         # View progress
#         if verbose == 1:
#             if step % (steps / 10) == 0:
#                 print(f"Step: {step}, error: {e}")
        
#         # Break loop if error is very small
#         if e < 0.001:
#             break
    
#     # Get predictions
#     preds = np.dot(P, Q)

#     return preds, P, Q.T

In [35]:
# Function to train embedding matrices using alternating least squares (ALS) and make predictions 
def matrix_factorization(R, k=5, steps=4000, lr=0.0002, beta=0.02, verbose=0):
    """
    Uses gradient descent to train matrix factorization algorithm. Useful explanation
    of the math here: 
    http://www.quuxlabs.com/blog/2010/09/matrix-factorization-a-simple-tutorial-and-implementation-in-python/#source-code

    Args:
        R: Utility matrix of user item ratings         
        k: Number of latent features
        steps: Number of epochs to run training
        lr: Learning rate
        beta: Regularization strength
        verbose: Prints updates if 1

    Returns:
        Predictions, User Embeddings, Item Embdeddings
    """

    # Initialize random values for user (P) and book (Q) embedding matrices
    K = k
    P = np.random.rand(R.shape[0],k)
    Q = np.random.rand(R.shape[1],k).T

    for step in range(steps): # For each steps
        r_0 = np.where(R > 0) # Find indices where rating > 0
        e = 0 # Initialize error for step to 0
        i = r_0[0]
        j = r_0[1]
        resid = R[i,j] - (P @ Q)[i,j] # residual is value - r_hat       
        for k in range(K): # for each component (latent feature)                
            P[i,k] = P[i,k] + lr * (2 * resid * Q[k,j] - beta * P[i,k]) # Update P by step in gradient                             
            Q[k,j] = Q[k,j] + lr * (2 * resid * P[i,k] - beta * Q[k,j]) # Update Q by step in gradient            

        # Calculate error every 1k steps
        e = 0 # Initialize counter for error            
        if (step + 1) % 1000 == 0: # Only do every 1k steps
            e = e + np.sum(pow(resid, 2)) # Add to error
            for k in range(K): # for each component (latent feature)                             
                e = e + (beta/2) * np.sum(pow(P[i,k],2) + pow(Q[k,j],2)) # Update error with regularized term

            # View progress
            if verbose == 1:
                print(f"Step: {step + 1}, error: {e}") # Print update                           

        # # Break loop if error is very small
        # if (step > 1500) & (e < 0.001):
        #     break
    
    # Get predictions
    preds = np.dot(P, Q)

    return preds, P, Q.T

In [56]:
preds, user_embeddings, item_embeddings = matrix_factorization(R, k=30, steps=3000, verbose=1)

In [57]:
# Make dataframe from predictions and indices
df_preds = pd.DataFrame(preds, columns=neighbor_book_index, index=neighbor_index)\
                .reset_index()

# Get target user's top predicted ratings
target_pred_books = df_preds[df_preds["index"] == target].columns[1:]
target_pred_ratings = df_preds[df_preds["index"] == target].values[0][1:]

# Put into df with relevant info from df_books
top_preds = pd.DataFrame({"book_index":target_pred_books, "predicted_rating":target_pred_ratings})\
                .sort_values(by="predicted_rating", ascending=False)\
                .merge(book_index.reset_index(), left_on="book_index", right_on="index")\
                .merge(
                    df_books[["book_id", "title", "avg_rating", "ratings_count", "year", "main_genre","url"]],
                    on="book_id"
                )\
                .drop(columns=["index"])

# View preds on actually read books
target_books_preds = top_preds[top_preds["book_index"].isin(target_user_ratings["book_index"].unique())]\
                        .drop(["book_index"], axis=1)
target_books_preds = pd.merge(target_books_preds, target_user_ratings[["book_id", "user_rating"]], on="book_id")
target_books_preds["diff"] = target_books_preds["predicted_rating"] - target_books_preds["user_rating"]
print("Predictions on Actually Read Books")
display(target_books_preds[["title", "predicted_rating", "user_rating", "diff"]])

# Filter out already read books to get top preds
print("___________\n")
print("Predictions on Unread Books")
top_preds = top_preds[~top_preds["book_index"].isin(target_user_ratings["book_index"].unique())]
display(top_preds[["title","avg_rating","predicted_rating","ratings_count","year","url"]]\
        .query("avg_rating > 3.9").head(20)
    )

Predictions on Actually Read Books


Unnamed: 0,title,predicted_rating,user_rating,diff
0,"The Dark Forest (Remembrance of Earth’s Past, #2)",4.708837,5.0,-0.291163
1,1984,4.707979,5.0,-0.292021
2,The Ones Who Walk Away from Omelas,4.699686,5.0,-0.300314
3,Steve Jobs,4.687123,5.0,-0.312877
4,The Three-Body Problem (Remembrance of Earth’s...,4.683958,5.0,-0.316042
...,...,...,...,...
90,Seven Brief Lessons on Physics,1.890757,2.0,-0.109243
91,The One Thing: The Surprisingly Simple Truth B...,1.889901,2.0,-0.110099
92,Steal Like an Artist: 10 Things Nobody Told Yo...,1.856815,2.0,-0.143185
93,Hillbilly Elegy: A Memoir of a Family and Cult...,1.058190,1.0,0.058190


___________

Predictions on Unread Books


Unnamed: 0,title,avg_rating,predicted_rating,ratings_count,year,url
0,The Farming of Bones,4.06,5.17144,5843.0,1999,https://www.goodreads.com/book/show/31115.The_...
1,When Food Is Love: Exploring the Relationship ...,4.0,4.867436,1303.0,1991,https://www.goodreads.com/book/show/39175.When...
16,"The Outer Limits of Reason: What Science, Math...",3.99,4.638082,342.0,2013,https://www.goodreads.com/book/show/17841838-t...
21,The Power of Vulnerability: Teachings of Authe...,4.57,4.623207,3778.0,2013,https://www.goodreads.com/book/show/23500254-t...
29,The Secret of Secrets,4.43,4.57073,323.0,1997,https://www.goodreads.com/book/show/530479.The...
30,Scheisshaus Luck: Surviving the Unspeakable in...,4.09,4.541463,1049.0,2008,https://www.goodreads.com/book/show/3930572-sc...
31,Every Body Yoga: Let Go of Fear. Get On the Ma...,4.34,4.520398,619.0,2017,https://www.goodreads.com/book/show/30754069-e...
32,A General Theory of Oblivion,3.91,4.513305,862.0,2015,https://www.goodreads.com/book/show/23346410-a...
33,This is the Story of a Happy Marriage,4.03,4.486513,13538.0,2013,https://www.goodreads.com/book/show/17349222-t...
40,"My Kind of Wonderful (Cedar Ridge, #2)",4.09,4.39461,3181.0,2015,https://www.goodreads.com/book/show/25317360-m...


In [58]:
# Highest rated books by each of the top genres
show_top_recs_by_genre(
    target_user_ratings, 
    large_neighborhood_ratings, 
    preds=top_preds, 
    how="MF", 
    whose_genres="target", 
    n_genres=10, 
    min_score=4.15, 
    min_ratings=400, 
    n_recs=20,
    write_to_excel=True
)

Genre 5: ['history', 'war', 'political', 'account', 'world', 'year', 'country', 'family', 'century', 'people']


Unnamed: 0,title,avg_rating,ratings_count,url
51,"Indonesia, Etc.: Exploring the Improbable Nation",4.18,470.0,https://www.goodreads.com/book/show/18377963-i...
64,Destiny Disrupted: A History of the World Thro...,4.34,416.0,https://www.goodreads.com/book/show/6715481-de...
129,Conversation in the Cathedral,4.32,3331.0,https://www.goodreads.com/book/show/53970.Conv...
223,My Promised Land: The Triumph and Tragedy of I...,4.22,4226.0,https://www.goodreads.com/book/show/15798334-m...
260,The Girl with Seven Names: A North Korean Defe...,4.45,8293.0,https://www.goodreads.com/book/show/25362017-t...
272,"March: Book One (March, #1)",4.34,21092.0,https://www.goodreads.com/book/show/17346698-m...
279,Arch of Triumph: A Novel of a Man Without a Co...,4.4,11703.0,https://www.goodreads.com/book/show/672948.Arc...
322,Shoe Dog: A Memoir by the Creator of NIKE,4.44,27468.0,https://www.goodreads.com/book/show/27220736-s...
357,The New Jim Crow: Mass Incarceration in the Ag...,4.49,24694.0,https://www.goodreads.com/book/show/6792458-th...
379,The Silk Roads: A New History of the World,4.25,3022.0,https://www.goodreads.com/book/show/25812847-t...


_____

Genre 15: ['history', 'poem', 'essay', 'text', 'century', 'collection', 'world', 'writing', 'introduction', 'literature']


Unnamed: 0,title,avg_rating,ratings_count,url
46,Natasha's Dance: A Cultural History of Russia,4.16,2039.0,https://www.goodreads.com/book/show/97401.Nata...
49,The Roots of Romanticism,4.24,437.0,https://www.goodreads.com/book/show/84713.The_...
56,"Complete Poems, 1904-1962",4.35,18787.0,https://www.goodreads.com/book/show/26596.Comp...
59,Teaching My Mother How to Give Birth,4.38,7830.0,https://www.goodreads.com/book/show/13376363-t...
67,Tranny: Confessions of Punk Rock's Most Infamo...,4.23,2655.0,https://www.goodreads.com/book/show/29467305-t...
76,"The Camera (Ansel Adams Photography, #1)",4.22,2663.0,https://www.goodreads.com/book/show/20501.The_...
125,A Call to Conscience: The Landmark Speeches,4.53,502.0,https://www.goodreads.com/book/show/225073.A_C...
182,In a Grove,4.19,1012.0,https://www.goodreads.com/book/show/8132998-in...
187,We Should All Be Feminists,4.47,1351.0,https://www.goodreads.com/book/show/23301818-w...
197,The World of Ice & Fire: The Untold History of...,4.26,12952.0,https://www.goodreads.com/book/show/17345242-t...


_____

Genre 17: ['alien', 'planet', 'world', 'human', 'ship', 'space', 'war', 'crew', 'mission', 'race']


Unnamed: 0,title,avg_rating,ratings_count,url
148,The Abyss Beyond Dreams,4.21,4944.0,https://www.goodreads.com/book/show/20697413-t...
206,The Evolutionary Void,4.26,13729.0,https://www.goodreads.com/book/show/7438179-th...
286,"Wolf by Wolf (Wolf by Wolf, #1)",4.28,9710.0,https://www.goodreads.com/book/show/24807186-w...
586,"Kings of the Wyld (The Band, #1)",4.42,2285.0,https://www.goodreads.com/book/show/30841984-k...
621,The Winter Fortress: The Epic Mission to Sabot...,4.23,1057.0,https://www.goodreads.com/book/show/25897720-t...
688,The World Is Blue: How Our Fate and the Ocean'...,4.19,486.0,https://www.goodreads.com/book/show/6885052-th...
706,Foundation / Foundation and Empire / Second Fo...,4.35,3316.0,https://www.goodreads.com/book/show/278097.Fou...
733,"The Honor of the Queen (Honor Harrington, #2)",4.21,487.0,https://www.goodreads.com/book/show/261758.The...
822,"Caliban's War (The Expanse, #2)",4.29,41459.0,https://www.goodreads.com/book/show/12591698-c...
855,"Spellbound (Grimnoir Chronicles, #2)",4.29,7184.0,https://www.goodreads.com/book/show/10822283-s...


_____

Genre 8: ['guide', 'people', 'way', 'help', 'practical', 'world', 'experience', 'offer', 'question', 'business']


Unnamed: 0,title,avg_rating,ratings_count,url
21,The Power of Vulnerability: Teachings of Authe...,4.57,3778.0,https://www.goodreads.com/book/show/23500254-t...
31,Every Body Yoga: Let Go of Fear. Get On the Ma...,4.34,619.0,https://www.goodreads.com/book/show/30754069-e...
84,Loving God,4.31,1342.0,https://www.goodreads.com/book/show/903912.Lov...
107,Mountains Beyond Mountains: The Quest of Dr. P...,4.21,53936.0,https://www.goodreads.com/book/show/10235.Moun...
114,"The Story of the Human Body: Evolution, Health...",4.21,2759.0,https://www.goodreads.com/book/show/17736859-t...
220,The Hard Thing About Hard Things: Building a B...,4.2,20745.0,https://www.goodreads.com/book/show/18176747-t...
235,The Power of a Praying Woman,4.38,9867.0,https://www.goodreads.com/book/show/18054709-t...
284,The Pleasures of God: Meditations on God's Del...,4.33,4596.0,https://www.goodreads.com/book/show/45364.The_...
291,The (7L) The Seven Levels of Communication: Go...,4.33,482.0,https://www.goodreads.com/book/show/9858250-th...
301,Ready to Run: Unlocking Your Potential to Run ...,4.29,772.0,https://www.goodreads.com/book/show/18668429-r...


_____

Genre 11: ['fiction', 'collection', 'world', 'literary', 'young', 'funny', 'short', 'comedy', 'horror', 'debut']


Unnamed: 0,title,avg_rating,ratings_count,url
508,Incidences,4.32,658.0,https://www.goodreads.com/book/show/685172.Inc...
672,Asterios Polyp,4.19,20053.0,https://www.goodreads.com/book/show/4070095-as...
683,Peter and Alice,4.44,561.0,https://www.goodreads.com/book/show/17347724-p...
735,Absolute Pandemonium: My Louder Than Life Story,4.2,604.0,https://www.goodreads.com/book/show/26036990-a...
1127,The Essential Dykes to Watch Out For,4.36,6979.0,https://www.goodreads.com/book/show/3189884-th...
1224,Tales,4.37,2100.0,https://www.goodreads.com/book/show/36313.Tales
1761,A Supposedly Fun Thing I'll Never Do Again: E...,4.28,23746.0,https://www.goodreads.com/book/show/6748.A_Sup...
2232,The Faraway Nearby,4.19,3409.0,https://www.goodreads.com/book/show/16158561-t...
2634,Ministry of Moral Panic,4.24,557.0,https://www.goodreads.com/book/show/17238889-m...
2766,A Manual for Cleaning Women: Selected Stories,4.17,6863.0,https://www.goodreads.com/book/show/22929586-a...


_____

Genre 22: ['world', 'power', 'enemy', 'war', 'battle', 'evil', 'magic', 'save', 'ancient', 'warrior']


Unnamed: 0,title,avg_rating,ratings_count,url
66,Blackwing (Ravens' Mark #1),4.24,401.0,https://www.goodreads.com/book/show/33916060-b...
122,"Everwild (Skinjacker, #2)",4.21,9779.0,https://www.goodreads.com/book/show/6390465-ev...
124,"Legend (The Drenai Saga, #1)",4.25,18805.0,https://www.goodreads.com/book/show/618177.Legend
146,"Asterix the Gaul (Asterix, #1)",4.16,17151.0,https://www.goodreads.com/book/show/71292.Aste...
155,"Mara, Daughter of the Nile",4.23,7903.0,https://www.goodreads.com/book/show/406186.Mar...
190,"Avatar: The Last Airbender: The Search, Part 2...",4.35,4658.0,https://www.goodreads.com/book/show/17445144-a...
229,"The Assassin's Blade (Throne of Glass, #0.1-0.5)",4.48,2010.0,https://www.goodreads.com/book/show/18594430-t...
355,Villains by Necessity,4.19,1116.0,https://www.goodreads.com/book/show/401454.Vil...
448,"Scythe (Arc of a Scythe, #1)",4.3,13000.0,https://www.goodreads.com/book/show/28954189-s...
546,"Dawn of Wonder (The Wakening, #1)",4.35,13351.0,https://www.goodreads.com/book/show/25451852-d...


_____

Genre 3: ['collection', 'comic', 'short', 'fan', 'feature', 'bestselling', 'adventure', 'available', 'classic', 'includes']


Unnamed: 0,title,avg_rating,ratings_count,url
135,A Good Man is Hard to Find and Other Stories,4.24,26384.0,https://www.goodreads.com/book/show/48464.A_Go...
166,Y: The Last Man - The Deluxe Edition Book One,4.36,10549.0,https://www.goodreads.com/book/show/3710603-y
394,The Private Eye,4.24,1847.0,https://www.goodreads.com/book/show/26247784-t...
401,Groot,4.35,979.0,https://www.goodreads.com/book/show/26030887-g...
404,The Clown of God,4.21,2353.0,https://www.goodreads.com/book/show/69086.The_...
469,"Preacher, Book 4",4.27,1535.0,https://www.goodreads.com/book/show/9785203-pr...
531,"The Guild (The Guild, #1)",4.22,4861.0,https://www.goodreads.com/book/show/8693600-th...
553,"A History of the English Speaking Peoples, 4 Vols",4.31,767.0,https://www.goodreads.com/book/show/1644043.A_...
652,The Glory of Their Times: The Story of the Ear...,4.32,3025.0,https://www.goodreads.com/book/show/461749.The...
866,The Julian Chapter,4.32,11380.0,https://www.goodreads.com/book/show/20878809-t...


_____

Genre 23: ['agent', 'team', 'mission', 'job', 'dead', 'terrorist', 'zombie', 'government', 'secret', 'drug']


Unnamed: 0,title,avg_rating,ratings_count,url
80,Never Split the Difference: Negotiating As If ...,4.49,3314.0,https://www.goodreads.com/book/show/26156469-n...
522,Harrison Bergeron,4.23,10690.0,https://www.goodreads.com/book/show/10176119-h...
1824,The Match: The Day the Game of Golf Changed Fo...,4.32,1532.0,https://www.goodreads.com/book/show/571428.The...
2238,The Girl Who Played with Fire (Millennium #2),4.22,7361.0,https://www.goodreads.com/book/show/6087991-th...
2478,Shadows for Silence in the Forests of Hell,4.16,5683.0,https://www.goodreads.com/book/show/21411388-s...
2586,"The Cartel (The Cartel, #1)",4.58,5703.0,https://www.goodreads.com/book/show/5953477-th...
2997,"Hawkeye, Volume 4: Rio Bravo",4.31,5523.0,https://www.goodreads.com/book/show/22237290-h...
3201,"Much Obliged, Jeeves",4.24,472.0,https://www.goodreads.com/book/show/16394.Much...
3473,"The First Hostage (J. B. Collins, #2)",4.43,1619.0,https://www.goodreads.com/book/show/26043842-t...


_____

Genre 20: ['family', 'year', 'child', 'young', 'home', 'past', 'world', 'loss', 'secret', 'friend']


Unnamed: 0,title,avg_rating,ratings_count,url
85,A Little Life,4.28,762.0,https://www.goodreads.com/book/show/29408433-a...
105,"Far from the Tree: Parents, Children, and the ...",4.27,12131.0,https://www.goodreads.com/book/show/13547504-f...
160,Stars Between the Sun and Moon: One Woman's Li...,4.16,451.0,https://www.goodreads.com/book/show/25420166-s...
170,The Heart of a Woman,4.19,16156.0,https://www.goodreads.com/book/show/5160.The_H...
278,I Liked My Life,4.21,3450.0,https://www.goodreads.com/book/show/29875926-i...
388,Hannah's Gift: Lessons from a Life Fully Lived,4.35,868.0,https://www.goodreads.com/book/show/806124.Han...
514,Lament for a Son,4.42,974.0,https://www.goodreads.com/book/show/148208.Lam...
536,North of Normal: A Memoir of My Wilderness Chi...,4.16,3428.0,https://www.goodreads.com/book/show/18635097-n...
543,My Lovely Wife in the Psych Ward,4.17,1215.0,https://www.goodreads.com/book/show/31371250-m...
559,All But My Life: A Memoir,4.28,14073.0,https://www.goodreads.com/book/show/163363.All...


_____

Genre 7: ['fairy', 'magic', 'adventure', 'world', 'magical', 'die', 'fantasy', 'journey', 'young', 'land']


Unnamed: 0,title,avg_rating,ratings_count,url
136,The War that Saved My Life (The War That Saved...,4.44,21307.0,https://www.goodreads.com/book/show/20912424-t...
142,"The Trials of Morrigan Crow (Nevermoor, #1)",4.52,754.0,https://www.goodreads.com/book/show/34219873-n...
511,Mufaro's Beautiful Daughters: An African Tale,4.32,16896.0,https://www.goodreads.com/book/show/845403.Muf...
731,Grounded: The Adventures of Rapunzel (Tyme #1),4.16,1438.0,https://www.goodreads.com/book/show/23399287-g...
884,The Complete Grimm's Fairy Tales,4.31,117324.0,https://www.goodreads.com/book/show/22917.The_...
1036,"The Pillars of the Earth (Kingsbridge, #1)",4.29,471990.0,https://www.goodreads.com/book/show/5043.The_P...
1079,Uncle Dynamite,4.28,518.0,https://www.goodreads.com/book/show/16241184-u...
1464,"Out from Boneville (Bone, #1)",4.16,63254.0,https://www.goodreads.com/book/show/106134.Out...
1488,The First Rumpole Omnibus,4.3,1278.0,https://www.goodreads.com/book/show/54383.The_...
1593,J.R.R. Tolkien 4-Book Boxed Set: The Hobbit an...,4.59,92172.0,https://www.goodreads.com/book/show/30.J_R_R_T...


_____

