# Retrival Augmented Generation (RAG) with Book Recommendations

### Getting ready

Importing libraries

In [1]:
import faiss
from sentence_transformers import SentenceTransformer

import pandas as pd
import numpy as np

Loading the dataset

In [3]:
df = pd.read_csv(r'..\assets\datasets\preprocessed_with_tags.csv')
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 41617 entries, 0 to 41616
Data columns (total 26 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   bookId            41617 non-null  object 
 1   title             41617 non-null  object 
 2   series            20420 non-null  object 
 3   author            41617 non-null  object 
 4   rating            41617 non-null  float64
 5   description       41617 non-null  object 
 6   language          41617 non-null  object 
 7   isbn              41617 non-null  object 
 8   genres            41617 non-null  object 
 9   characters        41617 non-null  object 
 10  bookFormat        41234 non-null  object 
 11  edition           3773 non-null   object 
 12  pages             40464 non-null  object 
 13  publisher         39307 non-null  object 
 14  publishDate       41298 non-null  object 
 15  firstPublishDate  25401 non-null  object 
 16  awards            41617 non-null  object

Loading the embeddings

In [4]:
embeddings = np.load(r"..\assets\models\book_embeddings.npy")

Loading the `faiss` index

In [5]:
index = faiss.read_index(r"..\assets\models\book_index.faiss")

Initiating `SentenceTransformer` model

In [7]:
model = SentenceTransformer('all-MiniLM-L6-v2')

### Main function

In [16]:
def recommend_from_query(
    query: str,
    k: int=5
) -> list[dict]:
    """
    Recommend books based on a query string.

    Args:
        query (str): The query string to search for.
        k (int): The number of recommendations to return.
    
    Returns:
        list[dict]: A list of dictionaries containing book recommendations.
    """

    # Encoding query into embedding
    query_embedding = model.encode(query)

    # Search the FAISS index
    distances, indices = index.search(np.array([query_embedding]), k)

    # Return results as list of dictionaries
    return df.iloc[indices[0][1:]].to_dict(orient='records')


In [23]:
queries = [
        "Recommend me a book about hacking and cybersecurity"
    ]
    
# Get recommendations for each query
for query in queries:
    print(f"Query: '{query}'\n")
    similar_books = recommend_from_query(query)
    
    for book in similar_books:
        print(f"Title: {book['title']}")
        print(f"Author: {book['author']}")
        print(f"Description: {book['description']}")
        print('\n')

    print('-' * 50)

Query: 'Recommend me a book about hacking and cybersecurity'

Title: The Hacker Crackdown: Law and Disorder on the Electronic Frontier
Author: Bruce Sterling
Description: A journalist investigates the past, present, and future of computer crimes, as he attends a hacker convention, documents the extent of the computer crimes, and presents intriguing facts about hackers and their misdoings.


Title: The Hackers Code: The Great Prison Escape Project And Ways to Fly High in Life
Author: A.K. Mars (Goodreads Author)
Description: The Hackers Code is a great work of non-fiction inspirational book. In fact it is not just inspirational book but a lot to do with many essential parts of life.This book “The Hackers Code” is written to teach people that many of us are living life in some kind of prison. and this prison is present in our mind in the form of our belief, thoughts, behaviors and emotions. Many of us have dreams of our own and we give up on those things. and there are reasons for those.

  return forward_call(*args, **kwargs)
