# AyurBot: Using RAG Based LLM

In [7]:
import os
from dotenv import load_dotenv

# Load the environment variables
load_dotenv()

MONGODB_CONNECTION_STRING = os.getenv("MONGODB_URI")
MONGODB_DATABASE = os.getenv("MONGODB_DATABASE")
MONGODB_COLLECTION = os.getenv("MONGODB_COLLECTION")
API_TOKEN = os.getenv("API_TOKEN")


## Text extraction from Book

In [None]:
# Function to extract the data from the file
import re
import fitz  # PyMuPDF

# Open the PDF file
pdf_document = fitz.open('/Users/mrinoyb2/git/AyurBot/Data/pdf/Ayurveda_Book.pdf')

# Function to preprocess and clean text
def preprocess_text_mupdf(text):
    # Remove headers/footers
    text = re.sub(r'\n\s*\n', '\n', text)  # Remove empty lines
    text = re.sub(r'[^A-Za-z0-9.,;:!?()\'\"\n]+', ' ', text)  # Remove special characters but keep punctuation
    text = re.sub(r'\s+', ' ', text)  # Replace multiple spaces with single space
    return text.strip()

# Extract and clean text
cleaned_text_mupdf = ""
for page_number in range(pdf_document.page_count):
    page = pdf_document.load_page(page_number)
    text = page.get_text()
    cleaned_text_mupdf += preprocess_text_mupdf(text)

# Close the PDF document
pdf_document.close()

# Output the first 
print(cleaned_text_mupdf)

# Save the cleaned text to a file
with open('/Users/mrinoyb2/git/AyurBot/Data/clean_text/Ayurveda_Book.txt', 'w') as file:
    file.write(cleaned_text_mupdf)


## Store chunks in MongoDB database

In [None]:
import pymongo

# Connect to MongoDB (Update the connection string as per your MongoDB setup)
client = pymongo.MongoClient(MONGODB_CONNECTION_STRING)
db = client[MONGODB_DATABASE]
collection = db[MONGODB_COLLECTION]

# Chunking the text by sentence to boost accurate retrieval
def chunk_by_sentence(text):
    sentences = []
    tmp_sentence = ""
    for char in text:
        if char in [".", "!", "?"]:
            sentences.append(tmp_sentence)
            tmp_sentence = ""
        else:
            tmp_sentence += char
    # Add any remaining text as the last sentence
    if tmp_sentence:
        sentences.append(tmp_sentence)
    return sentences
    

# Chunk the text
chunks = chunk_by_sentence(cleaned_text_mupdf)

# Store chunks in MongoDB
for idx, chunk in enumerate(chunks):
    # Create a document for each chunk
    document = {"_id": idx, "text": chunk}
    # Insert the document into the collection
    collection.insert_one(document)

print(f"Total chunks stored in MongoDB: {len(chunks)}")

## Implement RAG

### Create word embeddings

In [None]:
from sentence_transformers import SentenceTransformer
import pymongo

# Connect to MongoDB
client = pymongo.MongoClient(MONGODB_CONNECTION_STRING)
db = client[MONGODB_DATABASE]
chunks_collection = db[MONGODB_COLLECTION]

# Load the sentence transformer model
model = SentenceTransformer('all-MiniLM-L6-v2')

# Function to update documents with embeddings
def update_documents_with_embeddings():
    for document in chunks_collection.find():
        # Generate embedding
        embedding = model.encode(document['text'], convert_to_tensor=False)
        # Update document with embedding
        chunks_collection.update_one({'_id': document['_id']}, {'$set': {'embedding': embedding.tolist()}})

# Uncomment the following line to run the embedding update
update_documents_with_embeddings()


### Semantic search retrieval

In [10]:
from sentence_transformers import SentenceTransformer
import pymongo
import numpy as np
from scipy.spatial.distance import cosine

# Connect to MongoDB
client = pymongo.MongoClient(MONGODB_CONNECTION_STRING)
db = client[MONGODB_DATABASE]
chunks_collection = db[MONGODB_COLLECTION]

# Function to perform semantic search
def semantic_search(query, top_k=5):
    # Generate query embedding
    model = SentenceTransformer('all-MiniLM-L6-v2')
    query_embedding = model.encode(query, convert_to_tensor=False)
    
    # Retrieve all embeddings from MongoDB and calculate similarity
    similarities = []
    for document in chunks_collection.find():
        doc_embedding = np.array(document['embedding'])
        similarity = 1 - cosine(query_embedding, doc_embedding)  # Higher score means more similar
        similarities.append((document['_id'], similarity, document['text']))
    
    # Sort by similarity score in descending order
    similarities.sort(key=lambda x: x[1], reverse=True)
    
    # Return top_k most similar documents
    return similarities[:top_k]

# Example usage
query = "How to cure backpain?"
results = semantic_search(query)
for idx, (doc_id, similarity, text) in enumerate(results, start=1):
    print(f"Result {idx} (Score: {similarity:.3f}): {text[:100]}...")  # Print the first 100 characters for brevity



  from .autonotebook import tqdm as notebook_tqdm


Result 1 (Score: 0.653): Whatever the cause of your backache, the following natural Ayurvedic home remedies will be helpful....
Result 2 (Score: 0.645): Following these guidelines will help you heal your aching back as well as avoid backpain in the futu...
Result 3 (Score: 0.563): Backache can also be relieved by the use of the herb musta, which is a muscle painkiller....
Result 4 (Score: 0.529): Some gentle yoga exercises can help with back pain....
Result 5 (Score: 0.519): For extra healing and muscle relaxation, apply the mahanarayan oil on your back and then follow with...


## Connect LLM model

In [18]:
import replicate
from replicate.client import Client
import os

# Function to generate an answer using RAG enabled LLama2 from Replicate
def generate_RAG_answer(question, max_context_length=1000):
    # Assume semantic_search is defined and returns relevant context as a single string
    context_results = semantic_search(question, top_k=1)
    context = context_results[0][2]  # Get the text of the top result
    # Truncate context if it exceeds the maximum length
    if len(context) > max_context_length:
        context = context[:max_context_length]

    rag_prompt = f"[INST]\nQuestion: {question}\nContext: {context}\n[/INST]"
    print(rag_prompt)

    client = Client(api_token=API_TOKEN)

    # Generate the answer using LLama2 from Replicate
    output = client.run(
        "nwhitehead/llama2-7b-chat-gptq:8c1f632f7a9df740bfbe8f6b35e491ddfe5c43a79b43f062f719ccbe03772b52",
        input={
            "seed": -1,
            "top_k": 20,
            "top_p": 1,
            "prompt": rag_prompt,
            "max_tokens": 1024,
            "min_tokens": 1,
            "temperature": 0.5,
            "repetition_penalty": 1
        }
    )
    answer = ""
    for item in output:
        answer += item
        
    # Handle the case where the answer is empty
    if answer == "":
        answer = "Sorry, I don't have an answer for that."

    return answer

# Function to generate an answer using LLama2 from Replicate
def generate_non_RAG_answer(question):
    # Assume semantic_search is defined and returns relevant context as a single string
    non_rag_prompt = f"[INST]\nQuestion: {question}\n[/INST]"  # Fallback in case no context is found
    print(non_rag_prompt)

    client = Client(api_token=API_TOKEN)

    # Generate the answer using LLama2 from Replicate
    output = client.run(
        "nwhitehead/llama2-7b-chat-gptq:8c1f632f7a9df740bfbe8f6b35e491ddfe5c43a79b43f062f719ccbe03772b52",
        input={
            "seed": -1,
            "top_k": 20,
            "top_p": 1,
            "prompt": non_rag_prompt,
            "max_tokens": 1024,
            "min_tokens": 1,
            "temperature": 0.5,
            "repetition_penalty": 1
        }
    )
    answer = ""
    for item in output:
        answer += item
        
    # Handle the case where the answer is empty
    if not answer:
        answer = "Sorry, I don't have an answer for that."

    return answer

In [31]:
# Example query
query = "How to make agni tea?"
rag_answer = generate_RAG_answer(query)

print(rag_answer)

[INST]
Question: How to make agni tea?
Context: Or try this recipe for a tea to stimulate agni, the digestive fire: Agni Tea 1 quart water pinch cayenne pepper handful minced ginger root 2 tablespoons Sucanat or other sweetener to teaspoon rock salt Put all the above ingredients in a pot and boil for 20 minutes.
[/INST]
  Great! Here's a step-by-step guide on how to make Agni Tea:
Ingredients:
* 1 quart water
* Pinch of cayenne pepper
* Handful of minced ginger root
* 2 tablespoons Sucanat or other sweetener (optional)
* Rock salt (optional)

Instructions:

1. Start by bringing the water to a boil in a pot.
2. Once the water is boiling, add the pinch of cayenne pepper and stir well.
3. Next, add the handful of minced ginger root to the pot and stir again.
4. Add the 2 tablespoons of Sucanat or other sweetener (if using) and stir well.
5. If desired, add a small amount of rock salt to the pot and stir to combine.
6. Reduce the heat to a simmer and let the tea steep for 20 minutes.
7. St

In [32]:
# Example query
query = "How to make agni tea?"
non_rag_answer = generate_non_RAG_answer(query)

print(non_rag_answer)

[INST]
Question: How to make agni tea?
[/INST]
  Agni tea is a traditional Ayurvedic tea that is made by infusing herbs and spices in hot water. Here is a simple recipe for making agni tea at home:
Ingredients:

* 2 teaspoons of ginger powder
* 2 teaspoons of cinnamon powder
* 2 teaspoons of black pepper
* 1 teaspoon of turmeric powder
* 1 teaspoon of ginger oil
* 1 teaspoon of cinnamon oil
* 1 teaspoon of black pepper oil
* 1 teaspoon of honey (optional)

Instructions:

1. Boil water in a pot and let it cool to a comfortable temperature for drinking.
2. In a small bowl, mix together the ginger powder, cinnamon powder, black pepper, and turmeric powder.
3. Add the herbal mixture to the boiling water and stir well.
4. Reduce the heat to low and let the tea steep for 5-10 minutes, or until the desired flavor is achieved.
5. Strain the tea into a cup and discard the solids.
6. Add the ginger oil, cinnamon oil, and black pepper oil to the tea and stir well.
7. If desired, add honey to swee

## Evaluating RAG vs Non-RAG Approach

In order to quantify this, I will take the following steps:
1. Think of a query which has a well defined answer in the book.
2. Find the true answer to a query from the actual source (book pdf). 
3. Then pass the query through the RAG based LLM and the regular LLM.
4. Save both generated answers along with the true answer and find word embeddings for each. 
5. Finally, compare the word embeddings of the true answer with the RAG vs Non-RAG based LLM word embeddings using cosine similarity. 

Following these steps will help quantify the performace and accuracy of information in the two answers.

In [33]:
# Query from book "The Complete Book of Ayurvedic Home Remedies" by Vasant Lad
query = "How to make agni tea?"

# True Answer from the book.
true_answer = """1 quart water
1⁄8 pinch cayenne pepper
1⁄2 handful minced ginger root
2 tablespoons Sucanat or other sweetener 1⁄8 to 1⁄2 teaspoon rock salt
Put all the above ingredients in a pot and boil for 20 minutes.
Take the pot o􏰂 the burner, cool for a few minutes, then add the juice of half a lime. Do not boil the lime juice."""

### Implementing functions to evaluate performance. 

In [34]:
import torch

def get_embedding(text):
    """
    Generate an embedding for a given text.

    Args:
    - text (str): The input text.
    
    Returns:
    - The sentence embedding.
    """
    # Load the sentence transformer model
    model = SentenceTransformer('all-MiniLM-L6-v2')
    # Generate the sentence embeddings
    embeddings = model.encode(text, convert_to_tensor=True)
    return embeddings


def calculate_cosine_similarity(embedding1, embedding2):
    """
    Calculate the cosine similarity between two embeddings.

    Args:
    - embedding1 (torch.Tensor): The first embedding.
    - embedding2 (torch.Tensor): The second embedding.

    Returns:
    - The cosine similarity score.
    """
    # Calculate the cosine similarity
    if len(embedding1.shape) == 1:
        embedding1 = embedding1.unsqueeze(0)
    if len(embedding2.shape) == 1:
        embedding2 = embedding2.unsqueeze(0)
    similarity = torch.nn.functional.cosine_similarity(embedding1, embedding2, dim=1)
    return similarity.item()


def calculate_similarity_scores(true_answer, rag_answer, non_rag_answer):
    """
    Calculate the cosine similarity scores between the true answer and both RAG-based and non-RAG-based answers.

    Args:
    - true_answer (str): The true answer text.
    - rag_answer (str): The RAG-based model's answer text.
    - non_rag_answer (str): The non-RAG-based model's answer text.

    Returns:
    - A dictionary with cosine similarity scores.
    """
    # Convert the answers to embeddings
    true_answer_embedding = get_embedding(true_answer)
    rag_answer_embedding = get_embedding(rag_answer)
    non_rag_answer_embedding = get_embedding(non_rag_answer)
    
    # Calculate cosine similarity scores
    rag_similarity = calculate_cosine_similarity(true_answer_embedding, rag_answer_embedding)
    non_rag_similarity = calculate_cosine_similarity(true_answer_embedding, non_rag_answer_embedding)

    
    # Return the scores
    return {
        "RAG Similarity Score": rag_similarity,
        "Non-RAG Similarity Score": non_rag_similarity
    }


# Calculate the similarity scores
similarity_scores = calculate_similarity_scores(true_answer, rag_answer, non_rag_answer)
print(similarity_scores)


{'RAG Similarity Score': 0.6470919847488403, 'Non-RAG Similarity Score': 0.5113146305084229}
