In [1]:
import pandas as pd
import numpy as np
from sentence_transformers import SentenceTransformer

df = pd.read_csv('../data/books_summary.csv')
bert_model = SentenceTransformer('all-MiniLM-L6-v2')

df['BERT_Vector'] = df['Summary'].apply(lambda x: bert_model.encode(str(x)))

  from tqdm.autonotebook import tqdm, trange


In [2]:
from sklearn.metrics.pairwise import cosine_similarity

bert_matrix = np.vstack(df['BERT_Vector'].values)

bert_cosine_sim = cosine_similarity(bert_matrix, bert_matrix)

book_indices = pd.Series(df.index, index=df['Title'])

book_indices


Title
The Hunger Games (The Hunger Games, #1)                          0
Harry Potter and the Order of the Phoenix (Harry Potter, #5)     1
Pride and Prejudice                                              2
To Kill a Mockingbird                                            3
The Book Thief                                                   4
                                                                ..
Angela’s Ashes (Frank McCourt, #1)                              95
Interview with the Vampire (The Vampire Chronicles, #1)         96
Don Quixote                                                     97
The Old Man and the Sea                                         98
The Poisonwood Bible                                            99
Length: 100, dtype: int64

In [3]:
def recommend_book_bert(title, top_n=5):
  idx = book_indices[title]
  sim_scores = list(enumerate(bert_cosine_sim[idx]))

  sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)[1: top_n + 1]

  book_indices_list = [i[0] for i in sim_scores]

  return df.iloc[book_indices_list][['Title', 'Author']]

print(recommend_book_bert('The Great Gatsby'))

                       Title           Author
85  A Tree Grows in Brooklyn     Betty  Smith
33           Charlotte’s Web       E.B. White
3      To Kill a Mockingbird       Harper Lee
87     To Kill a Mockingbird       Harper Lee
44        The Princess Bride  William Goldman
