In [None]:
# pip install -U sentence-transformers

In [2]:
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
from IPython.display import  clear_output
import time
import PyPDF2
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm

device = 'cuda' if torch.cuda.is_available() else 'cpu'

DEFAULT_MODEL = "meta-llama/Llama-3.2-3B-Instruct"


model = AutoModelForCausalLM.from_pretrained(
    DEFAULT_MODEL,
    torch_dtype=torch.bfloat16,
    use_safetensors=True,
    device_map=device,
)

tokenizer = AutoTokenizer.from_pretrained(DEFAULT_MODEL, use_safetensors=True)
tokenizer.pad_token_id = tokenizer.eos_token_id

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [3]:
pdf_path = 'bhagavad-gita-in-english-source-file.pdf'
pdf_reader = PyPDF2.PdfReader(pdf_path)
# Get total number of pages
num_pages = len(pdf_reader.pages)
print(f"Processing PDF with {num_pages} pages...")

full_text = []

for page_num in range(4,num_pages-1):
    page = pdf_reader.pages[page_num]
    text = page.extract_text()
    full_text.append(text)

full_text = '\n'.join(full_text)


Processing PDF with 53 pages...


In [4]:
tokenized_text = tokenizer(full_text).input_ids

In [5]:
chunk_size = 100
chunk_overlap = 20

chunk_list = []

for i in tqdm(range(0, len(tokenized_text), chunk_size - chunk_overlap)):
    chunk = tokenized_text[i:i+chunk_size]
    decoded_text = tokenizer.decode(chunk, skip_special_tokens=True)
    chunk_list.append(decoded_text)
    # break



100%|██████████| 431/431 [00:00<00:00, 3368.96it/s]


# Creating Vector database

In [6]:
from sentence_transformers import SentenceTransformer
# 1. Load a pretrained Sentence Transformer model
embedding_model = SentenceTransformer("all-MiniLM-L6-v2")

In [136]:
# 2. Calculate embeddings by calling model.encode()
embeddings = embedding_model.encode(chunk_list, convert_to_tensor=True)
print(embeddings.shape)
torch.save(embeddings, "vector_embeddings.pt")
# 3. Calculate the embedding similarities
similarities = embedding_model.similarity(embeddings, embeddings)
# print(similarities)
# scores, indices = torch.topk(similarities[0], k=3)

torch.Size([431, 384])


In [135]:
embeddings

tensor([[-0.0253, -0.0037, -0.1071,  ...,  0.0555, -0.0278, -0.0122],
        [-0.0423,  0.0094, -0.1253,  ...,  0.0205,  0.0118, -0.0231],
        [-0.0480,  0.0126, -0.1059,  ...,  0.0215, -0.0151,  0.0115],
        ...,
        [-0.0890,  0.0388, -0.0783,  ..., -0.1082, -0.0055,  0.0149],
        [ 0.0365,  0.0021, -0.0728,  ...,  0.0051, -0.0080,  0.0133],
        [ 0.0168, -0.0290, -0.0380,  ..., -0.0720, -0.0740, -0.0149]],
       device='cuda:0')

In [130]:
query = "How do I deal with expectations in relationships?"
# conversation = [{"role": "user", "content": f'''Rephrase the given question to make it more detailed, respectful, and profound. The question should be **at least 100 words long**. Do not change the meaning of the original question, only enhance its depth and clarity.  

# Do not use markdown formatting in your response.
                 
# the question: {query} '''},]
# # 
# prompt = tokenizer.apply_chat_template(conversation, tokenize=False)
# inputs = tokenizer(prompt, return_tensors="pt").to(device)
# # print(prompt)

# with torch.no_grad():
#     output = model.generate(
#         **inputs,
#         do_sample=True,
#         max_new_tokens=50
#     )


# query = tokenizer.decode(output[0][len(inputs.input_ids[0]):], skip_special_tokens=True)

# print(query)

In [131]:

query_encoded = embedding_model.encode([query], convert_to_tensor=True)
similarities = embedding_model.similarity(query_encoded, embeddings)
scores, top_5_indices = torch.topk(similarities[0], k=3)
print(scores, top_5_indices)

# Step 6: Display results
print("Top 5 similar chunks:")
for idx in top_5_indices:
    print(f"Score: {similarities[0][idx]:.4f} | Chunk: {chunk_list[idx]} \n")

tensor([0.2130, 0.1857, 0.1754], device='cuda:0') tensor([405, 282, 288], device='cuda:0')
Top 5 similar chunks:
Score: 0.2130 | Chunk:  all actions to Me, set 
Me as your supreme goal, and completely depend on Me. Always 
fix your mind on Me and resort to Karma -yoga. (18.57) When your 
mind becomes fixed on Me, you shal l overcome all difficulties by 
My grace. But, if you do not listen to Me due to ego, you shall per-
ish. (18.58)  
Karmic bondage and the free will  
46                           International Gita Society 

Score: 0.1857 | Chunk:  
working for Me as an instrument, just to ser ve and please Me, with-
out selfish motives. (12.10) If you are unable to work for Me, then 
just surrender unto My will and renounce the attachment to and 
anxiety for the fruits of all work with subdued mind — by learning 
to accept all results,  as God's grace. (12.11)  
Karma -yoga  is the best Path  
The knowledge of scriptures is better 

Score: 0.1754 | Chunk: , and full of devotion — th

In [132]:
CONTEXT_TEXT = '\n'.join([chunk_list[idx] for idx in top_5_indices if similarities[0][idx]>0])
# print(CONTEXT_TEXT)

In [133]:

conversation = [
    {"role": "system", "content": f'''You are a compassionate guide. You answer questions based only on the given context of Gita and do not add any extra information. Your goal is to first interprete the writtings of gita in the given context and then try your best to relate to user's query.  Answer in simple english that a 15 year old can undrestand using your interpretation of the given text. \n CONTEXT : {CONTEXT_TEXT} '''},
    {"role": "user", "content": f'''{query}. Answer should not be longer than 100 words. Do not use markdown format'''},
]
# 
prompt = tokenizer.apply_chat_template(conversation, tokenize=False)
inputs = tokenizer(prompt, return_tensors="pt").to(device)


with torch.no_grad():
    output = model.generate(
        **inputs,
        do_sample=True,
        max_new_tokens=128
    )


processed_text = tokenizer.decode(output[0][len(inputs.input_ids[0])+3:], skip_special_tokens=True)

print(processed_text)

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


According to the Bhagavad Gita, when you fix your mind on God as your ultimate goal, you'll overcome difficulties. In relationships, this means letting go of expectations from others. Don't try to please others to gain something in return, but do it as an instrument of God's will. Surrender to God's will and accept the results with a subdued mind. This way, you'll be free from attachment and anxiety about the fruits of your actions. Remember, relationships are a means to serve and please God, not to fulfill your own expectations.


In [119]:
from kokoro import KPipeline
from IPython.display import display, Audio
import soundfile as sf

tts_pipeline = KPipeline(lang_code='a') # <= make sure lang_code matches voice



  WeightNorm.apply(module, name, dim)


In [134]:
generator = tts_pipeline(processed_text,voice='hm_psi')

audios = []
for i, (gs, ps, audio) in enumerate(generator):
    audios.append(audio)
    
AUDIO = torch.cat(audios, dim=0)

display(Audio(data=AUDIO, rate=24000))