In [124]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
from scipy import sparse
from sklearn.neighbors import NearestNeighbors
from sklearn.preprocessing import Normalizer
from IPython.display import display
from scipy.sparse.linalg import svds

### 0.1 - Load and Prepare Data

In [125]:
# Load df_books and genres
wd = os.getcwd()
df_books = pd.read_csv(wd + "/data/goodreads_books.csv")
df_inferred_genres = pd.read_csv(wd + "/data/inferred_genres.csv")
genre_descriptors = pd.read_csv(wd + "/data/inferred_genre_top_words.csv")

# Join genres to books
df_books = pd.merge(df_books, df_inferred_genres.drop(columns="title"), how="left", on="book_id")

In [126]:
# Load sparse_reviews from file
df_reviews = sparse.load_npz(wd + "/data/user_reviews.npz")

# Load user (rows) and book (cols) indices
user_index = pd.read_csv(wd + "/data/user_index_for_sparse_matrix.csv").rename(columns={"0":"user_id"})
book_index = pd.read_csv(wd + "/data/book_index_for_sparse_matrix.csv").rename(columns={"0":"book_id"})

In [127]:
# Load my goodreads history
my_books = pd.read_csv(wd + "/data/goodreads_library_export.csv")

# Filter to books I've rated
my_books = my_books[["Book Id", "My Rating", "Title"]]\
    .rename(columns={"Book Id":"book_id", "My Rating":"rating", "Title":"title"}).query("rating > 0")
print(f"Target reader ratings: {my_books.shape[0]}")

# Reformat to fit sparse matrix
my_books_1 = pd.merge(
                my_books, df_books[["book_id", "title"]], 
                how="left", on="book_id", suffixes=["_mb","_dfb"]
            )
my_books_id_match = my_books_1[~my_books_1["title_dfb"].isna()]
print(f"Matching on book_id: {my_books_id_match.shape[0]}")

my_books_2 = pd.merge(
    my_books_1[my_books_1["title_dfb"].isna()], df_books[["book_id", "title"]], 
    how="left", left_on="title_mb", right_on="title", suffixes=["_mb","_dfb"]
    )
my_books_match_on_title = my_books_2[~my_books_2["title"].isna()]
print(f"Matching on title: {my_books_match_on_title.shape[0]}")

my_books_id_match = my_books_id_match[["book_id", "rating", "title_dfb"]]\
                        .rename(columns={"title_dfb":"title"})
my_books_match_on_title = my_books_match_on_title[["book_id_dfb", "rating", "title"]]\
                        .rename(columns={"book_id_dfb":"book_id"})

my_books_3 = pd.concat([my_books_id_match, my_books_match_on_title])
my_books_3 = my_books_3.drop_duplicates(subset="book_id")
print(f"Total matches: {my_books_3.shape[0]}, {(my_books_3.shape[0] / my_books.shape[0]):.2%} of total")

my_books_4 = pd.merge(book_index, my_books_3, how="left", on="book_id")#.fillna(0.)
my_books = np.array(my_books_4["rating"]).reshape(1,-1)

# Add to sparse matrix
df_reviews = sparse.vstack([df_reviews, my_books])

# # Normalize reviews within users
# norm = Normalizer()
# df_reviews = norm.fit_transform(df_reviews) 

Target reader ratings: 166
Matching on book_id: 30
Matching on title: 65
Total matches: 95, 57.23% of total


In [128]:
# Set target reader to make recommendations for
target = df_reviews.shape[0] - 1

#### 0.2 - Functions to Process Recommendations

In [129]:
# Function to use KNN to find similar users
def find_neighbors(n_neighbors, target_user, ratings_df, ind_books, books):

    # Instantiate KNN
    n_neighbors = n_neighbors

    nn_model = NearestNeighbors(
        metric="cosine",
        algorithm="auto",
        n_neighbors=n_neighbors,
        n_jobs=-1
    )

    # Fit to sparse matrix
    nn_model.fit(ratings_df)

    # Feed in user and get neighbors and distances
    reader = ratings_df[target,:].toarray()
    dists, neighbors = nn_model.kneighbors(reader, return_distance=True)

    similar_users = pd.DataFrame(
        [pd.Series(neighbors.reshape(-1)), pd.Series(dists.reshape(-1))]).T.rename(
            columns={0:"user", 1:"distance"}
    )

    # Get all books read by similar users
    book_ind = []
    book_rat = []
    uid = []
    target_user_books = []
    target_user_book_rat = []
    for nt in similar_users.itertuples():
        user = ratings_df[int(nt.user),:].toarray()
        book_inds = np.where(user[0] > 0)[0]
        ratings = user[0][np.where(user[0] > 0)[0]]
        for i in range(len(book_inds)):        
            book_ind.append(book_inds[i])
            book_rat.append(ratings[i])
            uid.append(nt.user)    
            if nt.distance < 0.000000001:
                target_user_books.append(book_inds[i])
                target_user_book_rat.append(ratings[i])

    neighbor_user_ratings = pd.DataFrame([uid, book_ind, book_rat]).T.rename(
                                columns={0:"uid",1:"book_index",2:"user_rating"}
                            )

    # Join overall rating for each book
    neighbor_user_ratings = pd.merge(
                                ind_books.reset_index(), neighbor_user_ratings, 
                                how="inner", left_on="index", right_on="book_index"
                            )
    neighbor_user_ratings = pd.merge(neighbor_user_ratings, books, how="inner", on="book_id")

    # Filter out books target reader has already read
    neighbor_user_ratings = neighbor_user_ratings[~neighbor_user_ratings["book_index"].isin(target_user_books)]
    neighbor_user_ratings.drop(["index"], axis=1, inplace=True)

    # Filter out later volumes in series using regex pattern
    regex1 = r"#([3-9]|[1-9]\d+)"
    regex2 = r"Vol. ([0-9]|[1-9]\d+)"
    neighbor_user_ratings = neighbor_user_ratings[~neighbor_user_ratings["title"].str.contains(regex1)]
    neighbor_user_ratings = neighbor_user_ratings[~neighbor_user_ratings["title"].str.contains(regex2)]
    neighbor_user_ratings = neighbor_user_ratings[~neighbor_user_ratings["title"].str.contains("#1-")]

    # View target user's ratings
    target_user_ratings = pd.DataFrame(
                            [target_user_books, target_user_book_rat]).T.rename(
                                columns={0:"book_index",1:"user_rating"}
                        )
    target_user_ratings = pd.merge(
                            ind_books.reset_index(), target_user_ratings,
                            how="inner", left_on="index", right_on="book_index"
                        )
    target_user_ratings = pd.merge(target_user_ratings, books, how="inner", on="book_id")    

    return neighbor_user_ratings, target_user_ratings

In [130]:
# Function to show top recs for each genre
def show_top_recs_by_genre(
        target, others, preds=None, how=["KNN", "MF"], whose_genres=["target", "neighbors"], 
        n_genres=5, min_score=3.9, min_ratings=400, n_recs=15, write_to_excel=False):

    if whose_genres == "target":
        genre_ranking = pd.DataFrame(target.loc[:, "Genre_1":].sum(axis=0).sort_values(ascending=False))
    else:
        genre_ranking = pd.DataFrame(others.loc[:, "Genre_1":].sum(axis=0).sort_values(ascending=False))    
    
    genres = []
    results = []
    # Loop through genres in descending relevance order and print top recs
    for genre in genre_ranking.index[0:n_genres]:        
        
        g = float(genre[6:])
        if how == "KNN":
            highest_rated_recs_genre = others.query("main_genre == @g")\
                .groupby(["title", "avg_rating", "ratings_count", "year", "url"])["book_id"]\
                .count().reset_index().sort_values(by=["avg_rating", "book_id"], ascending=False)
        
        elif how == "MF":
            highest_rated_recs_genre = preds.query("main_genre == @g")\
            .sort_values(by="predicted_rating", ascending=False)                

        highest_rated_recs_genre = highest_rated_recs_genre.query(
                                        "ratings_count > @min_ratings & avg_rating > @min_score"
                                    )

        # Print genre name and descriptor
        for nt in genre_descriptors.itertuples():
            genre_rep = genre.replace("_"," ")
            if nt.genre_string[0:len(f"{genre}:")] == f"{genre_rep}:":
                gs = (nt.genre_string)
        
        genres.append(gs)
        results.append(highest_rated_recs_genre[["title", "avg_rating", "ratings_count", "url"]].head(50))

        print(gs)
        display(highest_rated_recs_genre[["title", "avg_rating", "ratings_count", "url"]].head(n_recs))
        print("_____\n")
    
    # Write to excel if keyword set
    path = (wd + "/data/book_recs.xlsx")
    if write_to_excel == True:
        if os.path.isfile(path):
            with pd.ExcelWriter(path, engine="openpyxl", mode="a", if_sheet_exists='replace') as writer:
                
                pd.DataFrame(genres).rename(columns={0:"Genres"})\
                    .to_excel(writer, sheet_name="Genre Meanings", index=False)
                
                for i in range(len(results)):
                    results[i].to_excel(writer, sheet_name=f"{genres[i].split(':')[0]} Recs", index=False)
        else:
            with pd.ExcelWriter(path) as writer:
                
                pd.DataFrame(genres).rename(columns={0:"Genres"})\
                    .to_excel(writer, sheet_name="Genre Meanings", index=False)
                
                for i in range(len(results)):
                    results[i].to_excel(writer, sheet_name=f"{genres[i].split(':')[0]} Recs", index=False)
        writer.close

In [131]:
# Functions to show top rated and most popular among similar readers
def neighbors_most_popular(others, n=10, min_ratings=1000, min_score=3.8):
    
    popular_recs = others.query("ratings_count > @min_ratings & avg_rating > @min_score")\
        .groupby(["title", "avg_rating", "ratings_count", "year","url"])["book_id"]\
        .count().reset_index().sort_values(by=["book_id", "avg_rating"], ascending=False)\
        .nlargest(n, "book_id").rename(columns={"book_id":"percent_similar_users_read"})

    popular_recs["percent_similar_users_read"] = (popular_recs["percent_similar_users_read"] / 
                                                    others["uid"].nunique()).map('{:.1%}'.format)
    return popular_recs[["title","avg_rating","ratings_count","year","percent_similar_users_read","url"]]


def neighbors_top_rated(others, n=10, min_ratings=1000, min_score=3.8):

    highest_rated_recs = others.query("ratings_count > @min_ratings & avg_rating > @min_score")\
        .groupby(["title", "avg_rating", "ratings_count", "year","url"])["book_id"]\
        .count().reset_index().sort_values(by=["avg_rating", "book_id"], ascending=False)\
        .nlargest(n, "avg_rating").drop(columns="book_id")

    return highest_rated_recs

In [132]:
# Function to plot neighbors' and target's top genres
def plot_top_genres(others, target):
    
    fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(12,10))
    neighbor_genre_ranking = pd.DataFrame(others.loc[:, "Genre_1":].sum(axis=0).sort_values(ascending=False))
    target_genre_ranking = pd.DataFrame(target.loc[:, "Genre_1":].sum(axis=0).sort_values(ascending=False))
    
    sns.barplot(
        data=neighbor_genre_ranking, y=neighbor_genre_ranking.index, x=0, ax=ax[0]
    )
    ax[0].set_xlabel("Genre preference")
    ax[0].set_title("Neighbors' Top Genres")

    # Plot target reader's top genres    
    sns.barplot(
        data=target_genre_ranking, y=target_genre_ranking.index, x=0, ax=ax[1]
    )
    ax[1].set_xlabel("Genre preference")
    ax[1].set_title("Target User's Top Genres")
    
    plt.show()

## TESTING using gradient descent for matrix factorization 

In [133]:
# Find larger number of similar users than before to get broad pool of potentially relevant books
df_reviews.data[np.isnan(df_reviews.data)] = 0
df_reviews = sparse.csr_matrix(df_reviews)
large_neighborhood_ratings, target_user_ratings = find_neighbors(500, target, df_reviews, book_index, df_books)

# Get unique users and books to slice df_reviews
neighbor_index = large_neighborhood_ratings["uid"].unique()
neighbor_index = np.append(neighbor_index, target)
neighbor_book_index = large_neighborhood_ratings["book_index"].unique()
neighbor_book_index = np.append(neighbor_book_index, target_user_ratings["book_index"].unique())

# Slice df_reviews to make User Ratings Matrix
df_reviews.data[df_reviews.data == 0] = np.nan
df_reviews = sparse.csr_matrix(df_reviews)
R = df_reviews[:, neighbor_book_index]
R = R[neighbor_index, :].toarray()

  neighbor_user_ratings = neighbor_user_ratings[~neighbor_user_ratings["title"].str.contains(regex1)]
  neighbor_user_ratings = neighbor_user_ratings[~neighbor_user_ratings["title"].str.contains(regex2)]


In [134]:
# def matrix_factorization1(R, k=5, steps=4000, lr=0.0002, beta=0.02, verbose=0):
#     """
#     Uses gradient descent to train matrix factorization algorithm. Useful explanation
#     of the math here: 
#     http://www.quuxlabs.com/blog/2010/09/matrix-factorization-a-simple-tutorial-and-implementation-in-python/#source-code

#     Args:
#         R: Utility matrix of user item ratings         
#         k: Number of latent features
#         steps: Number of epochs to run training
#         lr: Learning rate
#         beta: Regularization strength
#         verbose: Prints updates if 1

#     Returns:
#         Predictions, User Embeddings, Item Embdeddings
#     """
#     # Initialize random values for user (P) and book (Q) embedding matrices
#     P = np.random.rand(R.shape[0],K)
#     Q = np.random.rand(R.shape[1],K).T
    
#     for step in range(steps): # For each steps
#         r_0 = np.where(R > 0) # Find indices where rating > 0
#         e = 0 # Initialize error for step to 0
#         for n in range(len(r_0[0])): # For each value where rating > 0
#             i = r_0[0][n] # Get the row index where the value is
#             j = r_0[1][n] # Get the column index where the value is
#             resid = R[i][j] - np.dot(P[i,:],Q[:,j]) # residual is value - r_hat
#             e = e + pow(R[i][j] - np.dot(P[i,:],Q[:,j]), 2) # Add to error
#             for k in range(K): # for each component (latent feature)       
#                 P[i][k] = P[i][k] + lr * (2 * resid * Q[k][j] - beta * P[i][k]) # Update P by step in gradient
#                 Q[k][j] = Q[k][j] + lr * (2 * resid * P[i][k] - beta * Q[k][j]) # Update Q by step in gradient
#                 e = e + (beta/2) * (pow(P[i][k],2) + pow(Q[k][j],2)) # Update error with regularized term

#         # View progress
#         if verbose == 1:
#             if step % (steps / 10) == 0:
#                 print(f"Step: {step}, error: {e}")
        
#         # Break loop if error is very small
#         if e < 0.001:
#             break
    
#     # Get predictions
#     preds = np.dot(P, Q)

#     return preds, P, Q.T

In [194]:
# Function to train embedding matrices using alternating least squares (ALS) and make predictions 
def matrix_factorization(R, k_components=5, steps=4000, lr=0.0002, beta=0.05, decay=0.95, verbose=0):
    """
    Uses gradient descent to train matrix factorization algorithm. Useful explanation
    of the math here: 
    http://www.quuxlabs.com/blog/2010/09/matrix-factorization-a-simple-tutorial-and-implementation-in-python/#source-code

    Args:
        R: Utility matrix of user item ratings         
        k: Number of latent features
        steps: Number of epochs to run training
        lr: Learning rate
        beta: Regularization strength
        verbose: Prints updates if 1

    Returns:
        Predictions, User Embeddings, Item Embdeddings
    """

    # Initialize random values for user (P) and book (Q) embedding matrices    
    P = np.random.rand(R.shape[0],k_components)
    Q = np.random.rand(R.shape[1],k_components).T

    # Get nonzero ratings to perform operations on
    r_0 = np.where(R > 0) # Find indices where rating > 0
    i = r_0[0] # Get row indices where rating > 0
    j = r_0[1] # Get column indices where rating > 0 
    nonzero_ratings = R[i,j]

    # For each step calc residual and take step down gradient
    for step in range(steps): 
        resid = nonzero_ratings - (P @ Q)[i,j] # residual is value - r_hat       
        for k in range(k_components): # for each component (latent feature)                
            P[i,k] = P[i,k] + lr * (2 * resid * Q[k,j] - beta * P[i,k]) # Update P by step in gradient                             
            Q[k,j] = Q[k,j] + lr * (2 * resid * P[i,k] - beta * Q[k,j]) # Update Q by step in gradient    

        # Calculate error every 1k steps                  
        if ((step + 1) % 1000 == 0) | (step == 0): # Only do every 1k steps
            error = 0 # Initialize counter for error  
            error = error + np.sum(pow(resid, 2)) # Add to error
            for k in range(k_components): # for each component (latent feature)                             
                error = error + (beta/2) * np.sum(pow(P[i,k],2) + pow(Q[k,j],2)) # Add regularized term 

            # View progress
            if verbose == 1:
                print(f"Step: {step + 1}, error: {error:.2f}, lr: {lr:.6f}") 
            
            # Set learning rate to decay until 0.00002
            if lr > 0.00002:            
                # lr = (1 / (1 + decay * (step))) * lr 
                lr = lr * decay
    
    # Get predictions
    preds = np.dot(P, Q)

    return preds, P, Q.T

In [208]:
preds, user_embeddings, item_embeddings = matrix_factorization(R, k_components=100, lr=0.0005, beta=0.1, steps=10000, verbose=1)

Step: 1, error: 1099942.36, lr: 0.000500
Step: 1000, error: 9569.98, lr: 0.000475
Step: 2000, error: 8224.36, lr: 0.000451
Step: 3000, error: 7153.62, lr: 0.000429
Step: 4000, error: 6294.24, lr: 0.000407
Step: 5000, error: 5598.85, lr: 0.000387
Step: 6000, error: 5031.78, lr: 0.000368
Step: 7000, error: 4565.95, lr: 0.000349
Step: 8000, error: 4180.60, lr: 0.000332
Step: 9000, error: 3859.71, lr: 0.000315
Step: 10000, error: 3590.82, lr: 0.000299


In [209]:
# Make dataframe from predictions and indices
df_preds = pd.DataFrame(preds, columns=neighbor_book_index, index=neighbor_index)\
                .reset_index()

# Get target user's top predicted ratings
target_pred_books = df_preds[df_preds["index"] == target].columns[1:]
target_pred_ratings = df_preds[df_preds["index"] == target].values[0][1:]

# Put into df with relevant info from df_books
top_preds = pd.DataFrame({"book_index":target_pred_books, "predicted_rating":target_pred_ratings})\
                .sort_values(by="predicted_rating", ascending=False)\
                .merge(book_index.reset_index(), left_on="book_index", right_on="index")\
                .merge(
                    df_books[["book_id", "title", "avg_rating", "ratings_count", "year", "main_genre","url"]],
                    on="book_id"
                )\
                .drop(columns=["index"])

# View preds on actually read books
target_books_preds = top_preds[top_preds["book_index"].isin(target_user_ratings["book_index"].unique())]\
                        .drop(["book_index"], axis=1)
target_books_preds = pd.merge(target_books_preds, target_user_ratings[["book_id", "user_rating"]], on="book_id")
target_books_preds["diff"] = target_books_preds["predicted_rating"] - target_books_preds["user_rating"]
print("Predictions on Actually Read Books")
display(target_books_preds[["title", "predicted_rating", "user_rating", "diff"]])

# Filter out already read books to get top preds
print("___________\n")
print("Predictions on Unread Books")
top_preds = top_preds[~top_preds["book_index"].isin(target_user_ratings["book_index"].unique())]
display(top_preds[["title","avg_rating","predicted_rating","ratings_count","year","url"]]\
        .query("avg_rating > 3.9").head(20)
    )

Predictions on Actually Read Books


Unnamed: 0,title,predicted_rating,user_rating,diff
0,Miles: The Autobiography,4.963302,5.0,-0.036698
1,"The Devil's Chessboard: Allen Dulles, the CIA,...",4.963243,5.0,-0.036757
2,"The Fifth Season (The Broken Earth, #1)",4.963189,5.0,-0.036811
3,Sapiens: A Brief History of Humankind,4.963188,5.0,-0.036812
4,Born a Crime: Stories From a South African Chi...,4.963179,5.0,-0.036821
...,...,...,...,...
90,Fear and Loathing in Las Vegas,1.985985,2.0,-0.014015
91,The Sympathizer,1.985964,2.0,-0.014036
92,The One Thing: The Surprisingly Simple Truth B...,1.985950,2.0,-0.014050
93,"Casino Royale (James Bond, #1)",0.995555,1.0,-0.004445


___________

Predictions on Unread Books


Unnamed: 0,title,avg_rating,predicted_rating,ratings_count,year,url
0,Story Genius: How to Use Brain Science to Go B...,4.28,8.658874,385.0,2016,https://www.goodreads.com/book/show/27833542-s...
4,All Who Go Do Not Return: A Memoir,4.22,8.259791,1980.0,2015,https://www.goodreads.com/book/show/22244929-a...
5,Incidences,4.32,8.201976,658.0,2007,https://www.goodreads.com/book/show/685172.Inc...
6,Parliament of Whores: A Lone Humorist Attempts...,4.02,8.190287,2569.0,1991,https://www.goodreads.com/book/show/44781.Parl...
7,The Elephant Whisperer,4.44,8.100394,6905.0,2009,https://www.goodreads.com/book/show/6375561-th...
8,"Gateway (Heechee Saga, #1)",4.07,8.099785,30282.0,2004,https://www.goodreads.com/book/show/218427.Gat...
9,City of Saints & Thieves,3.95,8.066592,1705.0,2017,https://www.goodreads.com/book/show/33956433-c...
11,Alice in Wonderland and the World Trade Center...,3.97,8.01839,241.0,2002,https://www.goodreads.com/book/show/240887.Ali...
12,Teach Me to Forget,3.94,7.951542,817.0,2016,https://www.goodreads.com/book/show/29526341-t...
13,"Option B: Facing Adversity, Building Resilienc...",3.94,7.910342,14348.0,2017,https://www.goodreads.com/book/show/32938155-o...


In [210]:
# Highest rated books by each of the top genres
show_top_recs_by_genre(
    target_user_ratings, 
    large_neighborhood_ratings, 
    preds=top_preds, 
    how="MF", 
    whose_genres="target", 
    n_genres=10, 
    min_score=4.15, 
    min_ratings=400, 
    n_recs=20,
    write_to_excel=False
)

Genre 5: ['history', 'war', 'political', 'account', 'world', 'year', 'country', 'family', 'century', 'people']


Unnamed: 0,title,avg_rating,ratings_count,url
126,Daring to Drive: A Saudi Woman’s Awakening,4.32,871.0,https://www.goodreads.com/book/show/32620356-d...
140,The Girl with Seven Names: A North Korean Defe...,4.45,8293.0,https://www.goodreads.com/book/show/25362017-t...
148,Ratf**ked: The True Story Behind The Secret Pl...,4.16,438.0,https://www.goodreads.com/book/show/26889647-r...
164,"American Tabloid (Underworld USA, #1)",4.21,10342.0,https://www.goodreads.com/book/show/36064.Amer...
190,A Fighting Chance,4.17,8740.0,https://www.goodreads.com/book/show/18779662-a...
196,Fortune is a Woman,4.21,992.0,https://www.goodreads.com/book/show/355715.For...
204,The Silk Roads: A New History of the World,4.25,3022.0,https://www.goodreads.com/book/show/25812847-t...
231,Arch of Triumph: A Novel of a Man Without a Co...,4.4,11703.0,https://www.goodreads.com/book/show/672948.Arc...
290,Open,4.18,48230.0,https://www.goodreads.com/book/show/6480781-open
291,The Great Dune Trilogy,4.36,52034.0,https://www.goodreads.com/book/show/53764.The_...


_____

Genre 15: ['history', 'poem', 'essay', 'text', 'century', 'collection', 'world', 'writing', 'introduction', 'literature']


Unnamed: 0,title,avg_rating,ratings_count,url
24,"The Unreal and the Real: Selected Stories, Vol...",4.23,459.0,https://www.goodreads.com/book/show/13591879-t...
73,Natasha's Dance: A Cultural History of Russia,4.16,2039.0,https://www.goodreads.com/book/show/97401.Nata...
77,Sounds Like Me: My Life (So Far) in Song,4.26,3867.0,https://www.goodreads.com/book/show/25434361-s...
106,Collected Writings: Common Sense/The Crisis/Ri...,4.24,1503.0,https://www.goodreads.com/book/show/99953.Coll...
149,Woman in the Mists: The Story of Dian Fossey a...,4.16,2955.0,https://www.goodreads.com/book/show/546693.Wom...
194,"Notes from Underground, White Nights, The Drea...",4.17,66680.0,https://www.goodreads.com/book/show/17876.Note...
245,Mama Ruby,4.25,1376.0,https://www.goodreads.com/book/show/9475835-ma...
248,Her Smoke Rose Up Forever,4.22,2539.0,https://www.goodreads.com/book/show/27059.Her_...
295,Gaviotas: A Village to Reinvent the World,4.17,555.0,https://www.goodreads.com/book/show/136893.Gav...
346,The World of Ice & Fire: The Untold History of...,4.26,12952.0,https://www.goodreads.com/book/show/17345242-t...


_____

Genre 17: ['alien', 'planet', 'world', 'human', 'ship', 'space', 'war', 'crew', 'mission', 'race']


Unnamed: 0,title,avg_rating,ratings_count,url
90,"Spellbound (Grimnoir Chronicles, #2)",4.29,7184.0,https://www.goodreads.com/book/show/10822283-s...
159,"Wolf by Wolf (Wolf by Wolf, #1)",4.28,9710.0,https://www.goodreads.com/book/show/24807186-w...
303,Foundation / Foundation and Empire / Second Fo...,4.35,3316.0,https://www.goodreads.com/book/show/278097.Fou...
338,The Evolutionary Void,4.26,13729.0,https://www.goodreads.com/book/show/7438179-th...
372,The Last Rhinos: My Battle to Save One of the ...,4.34,1094.0,https://www.goodreads.com/book/show/13167157-t...
385,The Deep: The Extraordinary Creatures of the A...,4.28,901.0,https://www.goodreads.com/book/show/442668.The...
648,Leviathan Wakes (Expanse #1),4.2,1998.0,https://www.goodreads.com/book/show/9533361-le...
866,The Hyperion Omnibus: Hyperion / The Fall of H...,4.41,2254.0,https://www.goodreads.com/book/show/3943.The_H...
876,"Deep: Freediving, Renegade Science, and What t...",4.4,1144.0,https://www.goodreads.com/book/show/18222705-deep
898,"Caliban's War (The Expanse, #2)",4.29,41459.0,https://www.goodreads.com/book/show/12591698-c...


_____

Genre 8: ['guide', 'people', 'way', 'help', 'practical', 'world', 'experience', 'offer', 'question', 'business']


Unnamed: 0,title,avg_rating,ratings_count,url
23,The Shock Doctrine: The Rise of Disaster Capit...,4.23,664.0,https://www.goodreads.com/book/show/2171147.Th...
39,"Autism's False Prophets: Bad Science, Risky Me...",4.18,987.0,https://www.goodreads.com/book/show/3360358-au...
46,The (7L) The Seven Levels of Communication: Go...,4.33,482.0,https://www.goodreads.com/book/show/9858250-th...
47,The Book on the Taboo Against Knowing Who You Are,4.28,10108.0,https://www.goodreads.com/book/show/60551.The_...
59,There Is No Good Card for This: What To Say an...,4.31,547.0,https://www.goodreads.com/book/show/28257714-t...
99,Algorithms to Live By: The Computer Science of...,4.16,4639.0,https://www.goodreads.com/book/show/25666050-a...
165,Ready to Run: Unlocking Your Potential to Run ...,4.29,772.0,https://www.goodreads.com/book/show/18668429-r...
176,The Power of Vulnerability: Teachings of Authe...,4.57,3778.0,https://www.goodreads.com/book/show/23500254-t...
239,Calm: Calm the Mind. Change the World,4.17,580.0,https://www.goodreads.com/book/show/25194140-calm
241,Impro,4.27,1653.0,https://www.goodreads.com/book/show/306940.Impro


_____

Genre 11: ['fiction', 'collection', 'world', 'literary', 'young', 'funny', 'short', 'comedy', 'horror', 'debut']


Unnamed: 0,title,avg_rating,ratings_count,url
5,Incidences,4.32,658.0,https://www.goodreads.com/book/show/685172.Inc...
138,Asterios Polyp,4.19,20053.0,https://www.goodreads.com/book/show/4070095-as...
267,A Manual for Cleaning Women: Selected Stories,4.17,6863.0,https://www.goodreads.com/book/show/22929586-a...
400,A Supposedly Fun Thing I'll Never Do Again: E...,4.28,23746.0,https://www.goodreads.com/book/show/6748.A_Sup...
556,Sherlock Holmes: The Complete Novels and Stori...,4.45,18729.0,https://www.goodreads.com/book/show/3581.Sherl...
1289,The Essential Dykes to Watch Out For,4.36,6979.0,https://www.goodreads.com/book/show/3189884-th...
1349,"Beautiful Outlaw: Experiencing the Playful, Di...",4.31,2633.0,https://www.goodreads.com/book/show/12079560-b...


_____

Genre 22: ['world', 'power', 'enemy', 'war', 'battle', 'evil', 'magic', 'save', 'ancient', 'warrior']


Unnamed: 0,title,avg_rating,ratings_count,url
69,The Sword of Summer (Magnus Chase and The Gods...,4.26,921.0,https://www.goodreads.com/book/show/25909686-t...
123,"Legend (The Drenai Saga, #1)",4.25,18805.0,https://www.goodreads.com/book/show/618177.Legend
137,Lost Boy: The True Story of Captain Hook,4.2,1420.0,https://www.goodreads.com/book/show/32828538-l...
157,"The Last Namsara (Iskari, #1)",4.25,865.0,https://www.goodreads.com/book/show/32667458-t...
288,"Everwild (Skinjacker, #2)",4.21,9779.0,https://www.goodreads.com/book/show/6390465-ev...
312,When Christ and His Saints Slept (Henry II an...,4.27,10892.0,https://www.goodreads.com/book/show/43841.When...
329,"Rebel Spring (Falling Kingdoms, #2)",4.16,521.0,https://www.goodreads.com/book/show/18668021-r...
341,The Cartel,4.29,9961.0,https://www.goodreads.com/book/show/23602561-t...
363,The Hammer of Thor (Magnus Chase and the Gods ...,4.37,26454.0,https://www.goodreads.com/book/show/27904311-t...
443,Time and Chance (Henry II & Eleanor of Aquitai...,4.28,6106.0,https://www.goodreads.com/book/show/234560.Tim...


_____

Genre 3: ['collection', 'comic', 'short', 'fan', 'feature', 'bestselling', 'adventure', 'available', 'classic', 'includes']


Unnamed: 0,title,avg_rating,ratings_count,url
71,Y: The Last Man - The Deluxe Edition Book One,4.36,10549.0,https://www.goodreads.com/book/show/3710603-y
215,Walt Disney Imagineering: A Behind the Dreams ...,4.23,1279.0,https://www.goodreads.com/book/show/380248.Wal...
221,A Good Man is Hard to Find and Other Stories,4.24,26384.0,https://www.goodreads.com/book/show/48464.A_Go...
237,Calvin and Hobbes,4.61,120371.0,https://www.goodreads.com/book/show/77727.Calv...
266,"A History of the English Speaking Peoples, 4 Vols",4.31,767.0,https://www.goodreads.com/book/show/1644043.A_...
376,The Paper Menagerie,4.37,2476.0,https://www.goodreads.com/book/show/13501210-t...
399,Adventure Time With Fionna and Cake,4.41,2542.0,https://www.goodreads.com/book/show/17348705-a...
455,Saga: Book One,4.7,4201.0,https://www.goodreads.com/book/show/22078240-saga
513,The Complete Clive Barker's The Great And Secr...,4.21,15868.0,https://www.goodreads.com/book/show/34871.The_...
522,"Harry Potter: The Prequel (Harry Potter, #0.5)",4.22,13267.0,https://www.goodreads.com/book/show/8933944-ha...


_____

Genre 23: ['agent', 'team', 'mission', 'job', 'dead', 'terrorist', 'zombie', 'government', 'secret', 'drug']


Unnamed: 0,title,avg_rating,ratings_count,url
615,Shadows for Silence in the Forests of Hell,4.16,5683.0,https://www.goodreads.com/book/show/21411388-s...
708,Harrison Bergeron,4.23,10690.0,https://www.goodreads.com/book/show/10176119-h...
758,"The Cartel (The Cartel, #1)",4.58,5703.0,https://www.goodreads.com/book/show/5953477-th...


_____

Genre 20: ['family', 'year', 'child', 'young', 'home', 'past', 'world', 'loss', 'secret', 'friend']


Unnamed: 0,title,avg_rating,ratings_count,url
4,All Who Go Do Not Return: A Memoir,4.22,1980.0,https://www.goodreads.com/book/show/22244929-a...
28,The Chronology of Water,4.2,4956.0,https://www.goodreads.com/book/show/9214995-th...
91,Lament for a Son,4.42,974.0,https://www.goodreads.com/book/show/148208.Lam...
191,"Far from the Tree: Parents, Children, and the ...",4.27,12131.0,https://www.goodreads.com/book/show/13547504-f...
268,I Liked My Life,4.21,3450.0,https://www.goodreads.com/book/show/29875926-i...
384,These Is My Words: The Diary of Sarah Agnes Pr...,4.34,44889.0,https://www.goodreads.com/book/show/348225.The...
411,"Murderville (Murderville, #1)",4.57,2120.0,https://www.goodreads.com/book/show/9805946-mu...
433,"Good Night, Mr. Tom",4.28,25271.0,https://www.goodreads.com/book/show/161099.Goo...
482,I Have Lived a Thousand Years,4.16,16409.0,https://www.goodreads.com/book/show/48834.I_Ha...
528,The Outside Circle: A Graphic Novel,4.23,875.0,https://www.goodreads.com/book/show/23130299-t...


_____

Genre 7: ['fairy', 'magic', 'adventure', 'world', 'magical', 'die', 'fantasy', 'journey', 'young', 'land']


Unnamed: 0,title,avg_rating,ratings_count,url
80,Mufaro's Beautiful Daughters: An African Tale,4.32,16896.0,https://www.goodreads.com/book/show/845403.Muf...
299,The War that Saved My Life (The War That Saved...,4.44,21307.0,https://www.goodreads.com/book/show/20912424-t...
300,The First Rumpole Omnibus,4.3,1278.0,https://www.goodreads.com/book/show/54383.The_...
322,Uncle Dynamite,4.28,518.0,https://www.goodreads.com/book/show/16241184-u...
612,"The Pillars of the Earth (Kingsbridge, #1)",4.29,471990.0,https://www.goodreads.com/book/show/5043.The_P...
704,Bridge of Birds (The Chronicles of Master Li a...,4.3,8063.0,https://www.goodreads.com/book/show/15177.Brid...
1075,"Toby Alone (Tobie Lolness, #1)",4.18,1467.0,https://www.goodreads.com/book/show/2535732.To...
1129,Alice in Wonderland: Based on the Motion Pictu...,4.19,2221.0,https://www.goodreads.com/book/show/7052617-al...
1520,"The Lion, the Witch, and the Wardrobe (Chronic...",4.19,1575387.0,https://www.goodreads.com/book/show/100915.The...


_____

