# Retrival Augmented Generation (RAG) with Book Recommendations

### Getting ready

Importing libraries

In [1]:
import faiss
from sentence_transformers import SentenceTransformer

import pandas as pd
import numpy as np

Loading the dataset

In [3]:
df = pd.read_csv(r'..\assets\datasets\preprocessed_with_tags.csv')
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 41617 entries, 0 to 41616
Data columns (total 26 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   bookId            41617 non-null  object 
 1   title             41617 non-null  object 
 2   series            20420 non-null  object 
 3   author            41617 non-null  object 
 4   rating            41617 non-null  float64
 5   description       41617 non-null  object 
 6   language          41617 non-null  object 
 7   isbn              41617 non-null  object 
 8   genres            41617 non-null  object 
 9   characters        41617 non-null  object 
 10  bookFormat        41234 non-null  object 
 11  edition           3773 non-null   object 
 12  pages             40464 non-null  object 
 13  publisher         39307 non-null  object 
 14  publishDate       41298 non-null  object 
 15  firstPublishDate  25401 non-null  object 
 16  awards            41617 non-null  object

Loading the embeddings

In [4]:
embeddings = np.load(r"..\assets\models\book_embeddings.npy")

Loading the `faiss` index

In [5]:
index = faiss.read_index(r"..\assets\models\book_index.faiss")

Initiating `SentenceTransformer` model

In [7]:
model = SentenceTransformer('all-MiniLM-L6-v2')

### Main function

In [16]:
def recommend_from_query(
    query: str,
    k: int=5
) -> list[dict]:
    """
    Recommend books based on a query string.

    Args:
        query (str): The query string to search for.
        k (int): The number of recommendations to return.
    
    Returns:
        list[dict]: A list of dictionaries containing book recommendations.
    """

    # Encoding query into embedding
    query_embedding = model.encode(query)

    # Search the FAISS index
    distances, indices = index.search(np.array([query_embedding]), k)

    # Return results as list of dictionaries
    return df.iloc[indices[0][1:]].to_dict(orient='records')


In [None]:
queries = [
        "fantasy adventure with magic and dragons",
        "romantic comedy set in modern times",
        "psychological thriller with unreliable narrator",
        "historical fiction about World War II",
        "science fiction space exploration"
    ]
    
# Get recommendations for each query
for query in queries:
    print(f"Query: '{query}'\n")
    similar_books = recommend_from_query(query, k=3)
    
    for book in similar_books:
        print(f"Title: {book['title']}")
        print(f"Author: {book['author']}")
        print(f"Description: {book['description']}")
        print('\n')

    print('-' * 50)

Query: 'fantasy adventure with magic and dragons'

Title: The Coming of Dragons
Author: A.J. Lake
Description: 
  Edmund, a king’s son in disguise, and Elspeth, a sea captain’s tomboy daughter, are the only two survivors of a terrible shipwreck. They just want to go home, but fate has other plans as they are drawn into the fight against an evil warlord terrorizing their homeland. Accompanied by a mysterious minstrel and haunted by magical powers they did not seek, Edmund and Elspeth journey across a savage land of wild boars, fierce rogue knights, and black magic. Fantasy fans will devour this dramatic tale of mystery, wonder, and the power of friendship.



Title: Dragon Age: The World of Thedas Volume 1
Author: David Gaider, Ben Gelinas (Goodreads Author), Mike Laidlaw, Dave Marshall (Editor), Various (Illustrations)
Description: For Dragon Age newcomers, this comprehensive volume brings you up to speed on everything you need to know about the regions, religions, monsters, magic, and

  return forward_call(*args, **kwargs)
  return forward_call(*args, **kwargs)
  return forward_call(*args, **kwargs)
  return forward_call(*args, **kwargs)
  return forward_call(*args, **kwargs)
