## Quote Search

In [2]:
from sentence_transformers import SentenceTransformer, util
import torch

import pandas as pd
import numpy as np
import os

In [3]:
def print_quote_by_index(idx, df):
    """
    Given the index number and the dataframe of quotes, prints the quote along 
    with the author.
    """
    quote = df.iloc[idx].quote
    author = df.iloc[idx].author
    print(f'"{quote}"')
    print(f' - {author}\n')

In [4]:
quotes_df = pd.read_csv('quotes_clean.csv')

In [5]:
embedding_files = sorted(os.listdir('SentenceBERT_embeddings'))

embedding_matrix = None

for file in embedding_files:
    embeddings_part = np.load('SentenceBERT_embeddings/' + file, allow_pickle=True)
    
    if embedding_matrix is None:
        embedding_matrix = embeddings_part
    else:
        embedding_matrix = np.vstack((embedding_matrix, embeddings_part))

print(f'Embeddings loaded. Matrix shape: {embedding_matrix.shape}')

Embeddings loaded. Matrix shape: (499708, 384)


In [15]:
model = SentenceTransformer('all-MiniLM-L6-v2')

input_embedding = model.encode('the meaning of life is to love')

similarity_scores = util.cos_sim(input_embedding, embedding_matrix)
values, indices = torch.topk(similarity_scores, k=5)

for idx in indices.tolist()[0]:
    print_quote_by_index(idx, quotes_df)

"What is life for? Life is for love."
 - Debasish Mridha

"To love life is to love all of it."
 - Erin Fall Haskell

"To love life is to love God."
 - Leo Tolstoy

"To love life is to love all of life; the suffering, the joy, the pain, the magnificence...all of it!"
 - Erin Fall Haskell

"We are born to be loved, we live to be loved, and we die to be loved; so life is for love."
 - Debasish Mridha

