In [1]:
from projectgurukul import scriptures, corelib, prompt_templates
from llama_index import StorageContext, load_index_from_storage

from llama_index.llms import ChatMessage, MessageRole
from llama_index.prompts import ChatPromptTemplate
from llama_index.schema import MetadataMode
from llama_index.query_engine import RetrieverQueryEngine
from llama_index.response.notebook_utils import display_response
# apply nested async to run in a notebook
import nest_asyncio
import llama_index
from llama_index import ServiceContext
from llama_index.llms import OpenAI
import pandas as pd
import numpy as np
from llama_index import QueryBundle
import re
import json
from tqdm import tqdm
tqdm.pandas()


nest_asyncio.apply()

import importlib
importlib.reload(prompt_templates)

<module 'projectgurukul.prompt_templates' from '/Users/krishansubudhi/repos/projectgurukul/projectgurukul/prompt_templates.py'>

In [2]:
data= pd.read_csv("~/Downloads/Gurukul Data - data_train.csv").dropna(how = "all", axis = 1).fillna("")
data.head()

Unnamed: 0,ID,Split,Scripture,Category,Question,Question Context
0,1,Train,Ramayana,From Context,Why does Vasishta refuse to give Sabala to Vis...,Context 1\nहैरण्यानां रथानां च श्वेताश्वानां च...
1,2,Train,Ramayana,From Context,"Did Hanuman's devotion to Rama ever waver, eve...",
2,3,Train,Ramayana,From Context,"What drove Ravana to kidnap Sita, and was it s...",
3,4,Train,Ramayana,From Context,Describe Bharata's character and his approach ...,Context 1\nश्वस्तु गन्तासि तं देशं वसाद्य सह म...
4,5,Train,Ramayana,From Context,How did Ravana react upon learning about Dhumr...,Context 1\n[Ravana comes to know that Rama and...


In [3]:
retriever = corelib.get_fusion_retriever(['gita','ramayana'], is_offline=False, data_dir="../data/")
query_engine = RetrieverQueryEngine.from_args(
    retriever,
    text_qa_template = prompt_templates.custom_text_qa_template)

Using openAI models


In [4]:
gpt4_service_context = ServiceContext.from_defaults(
    llm = OpenAI(model = "gpt-4-1106-preview")
)

trained_model_service_context = ServiceContext.from_defaults(
    llm = OpenAI(model = "ft:gpt-3.5-turbo-1106:macro-mate::8jTl73oZ")
)

query_engine_gpt_4 = RetrieverQueryEngine.from_args(
    retriever,
    text_qa_template = prompt_templates.custom_text_qa_template,
    service_context=gpt4_service_context
)

query_engine_trained_model = RetrieverQueryEngine.from_args(
    retriever,
    text_qa_template = prompt_templates.training_text_qa_template,
    service_context=trained_model_service_context
)



In [5]:
# import logging
# import sys
# logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
# logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

In [None]:
response = query_engine_trained_model.query("Why bibhishana betray ravana?")
print(response)

In [None]:
print(query_engine_trained_model.query("Why Lakshmana betray ravana?"))

In [5]:
def get_prompt(query_str, context_str):
    prompt = prompt_templates.custom_text_qa_template.format_messages(
        query_str = query_str,context_str= context_str)
    return (llama_index.llms.openai_utils.to_openai_message_dicts(prompt))

def get_prompt_str(query_str, context_str):
    prompt = prompt_templates.custom_text_qa_template.format(
        query_str = query_str,context_str= context_str)
    return prompt

def get_contexts(context_str):
    # Define a regex pattern to match lines with "Context" labels
    pattern = re.compile(r'Context \d+')

    # Use re.sub to replace matched lines with an empty string
    cleaned_text = re.sub(pattern, '', context_str)

    # Split the text into contexts using '---' as a separator
    contexts = [context.strip() for context in cleaned_text.split('---')]
    return contexts

def get_response(query, contexts ):
    response = query_engine._response_synthesizer.get_response(
        query,
        contexts
    )
    #query_engine.query(query)
    return response

def get_response_from_row(row):
    query_str = row.Question
    contexts = json.loads(row.used_context)
    return get_response(query_str, contexts)

def get_gpt4_response(query, contexts ):
    response = query_engine_gpt_4._response_synthesizer.get_response(
        query,
        contexts
    )
    #query_engine.query(query)
    return response

def get_trained_model_response(query, contexts ):
    response = query_engine_trained_model._response_synthesizer.get_response(
        query,
        contexts
    )
    #query_engine.query(query)
    return response

def process_row(row):
    new_row = pd.Series(row)
    try:
        query_str = row.Question
        if row.Category == "Outside Context":
            contexts = get_contexts(row["Question Context"])
        else:
            contexts = [result.get_content(metadata_mode=MetadataMode.LLM) for result in retriever.retrieve(query_str)]
        context_str = "\n\n".join(contexts)
        new_row["used_context"] = json.dumps(contexts, ensure_ascii=False)
        new_row["prompt_str"] = get_prompt_str(query_str, context_str)
        new_row["prompt_openai"] = json.dumps(get_prompt(query_str, context_str), ensure_ascii=False)
        # new_row["answer_gpt3"] = get_response(query_str, contexts)
        return new_row
    except:
        return new_row


In [6]:
# processed = data.progress_apply(process_row, axis = 1)
# processed.to_csv("data_inferred.csv", index=False, encoding = 'utf-8')
# processed

In [None]:
processed_data = pd.read_csv("~/Downloads/Gurukul Data - data_labelled.csv")
# gpt3_responses = processed_data.iloc[10:].progress_apply(get_response_from_row, axis = 1)
# gpt3_responses.to_frame(name='gpt3_responses').to_csv("gpt3_responses.csv",encoding = 'utf-8')

In [13]:
processed_data.iloc[53].Question

"Was Rama's decision to send Sita to the forest justified, considering the rumors about her purity?"

In [15]:
def process_for_response_gpt4(row):
    query_str = row.Question
    contexts = json.loads(row.used_context)
    return get_gpt4_response(query_str, contexts)

def process_for_response_trained_model(row):
    query_str = row.Question
    contexts = json.loads(row.used_context)
    return get_trained_model_response(query_str, contexts)

row = processed_data.iloc[54]
# response = process_for_response_gpt4(row)
response = process_for_response_trained_model(row)
print(response)

Rama and Sugriva, along with their army of Vanaras, crossed the ocean to reach Lanka by building a bridge with the help of the Vanara army.

In the Yuddha Kanda, Sarga 22 of the Valmiki Ramayana, it is described how the Vanaras, under the leadership of Nala, constructed the bridge:

> तानि कोटि सहस्राणि वानराणां महौजसाम् ।  
> बध्नन्तः सागरे सेतुं जुग्मुः पारं महोदधेः ।। 6.22.77 ।।  
> "The glorious Vanaras in thousands of crores reached the other side of the sea from the bridge constructed in the great sea."

The bridge was described as:

> विशालः सुकृतश्रशीमान् सुभूमिः सुसमाहितः ।  
> अशोभत महान् सेतुः सीमन्त इव सागरे ।। 6.22.78 ।।  
> "That huge bridge was well built, magnificent, evenly built, and extensive and looked charming like the partition in a woman's hair."

After the bridge was completed, Rama, Lakshmana, and the Vanaras, including Sugriva, crossed the ocean:

> अग्रतः तस्य सैन्यस्य श्रीमान् रामः स लक्ष्मणः ।  
> जगाम धन्वी धर्मात्मा सुग्रीवेण समन्वितः ।। 6.22.82 ।।  
> "A