In [13]:
import os
import openai
from dotenv import load_dotenv
from llama_index.core import Document
from llama_index.core import SimpleDirectoryReader
from llama_index.core.node_parser import SentenceWindowNodeParser
from llama_index.core import load_index_from_storage
from llama_index.llms.openai import OpenAI
from llama_index.core import Settings
from llama_index.core import VectorStoreIndex, StorageContext
from llama_index.core.postprocessor import MetadataReplacementPostProcessor
from llama_index.core.indices.postprocessor import SentenceTransformerRerank

load_dotenv()

openai.api_key = os.getenv("OPENAI_API_KEY")


documents = SimpleDirectoryReader(input_files = ["introduction-to-natural-language-processing.pdf"]).load_data()
document = Document(text="\n\n".join([doc.text for doc in documents]))

def get_sentence_window_index(documents, index_dir, sentence_window_size=3):
    Node_parser = SentenceWindowNodeParser.from_defaults(
        window_size=3,
        window_metadata_key="window",
        original_text_metadata_key="original_sentence",
    )

    Settings.llm = OpenAI()
    Settings.embed_model = "local:BAAI/bge-small-en-v1.5"
    Settings.node_parser = Node_parser

    if not os.path.exists(index_dir):
        sentence_index = VectorStoreIndex.from_documents([document])
        sentence_index.storage_context.persist(persist_dir=index_dir)
        
    else:
        sentence_index = load_index_from_storage(StorageContext.from_defaults(persist_dir=index_dir))
    return sentence_index

def get_sentence_window_engine(sentence_index):
    
    postprocessor = MetadataReplacementPostProcessor(target_metadata_key="window",)
    rerank = SentenceTransformerRerank(top_n=2, model="BAAI/bge-reranker-base") 
    sentence_window_engine = sentence_index.as_query_engine(similarity_top_k=6, node_postprocessors=[postprocessor, rerank])
    
    return sentence_window_engine


In [2]:
index_dir = "./sentence_index_1"
sw_index_1 = get_sentence_window_index(documents, index_dir, sentence_window_size=1)
sw_engine_1 = get_sentence_window_engine(sw_index_1)

In [10]:
from trulens_eval import Tru
from trulens_eval.feedback.provider import OpenAI
from trulens_eval import Feedback
from trulens_eval.app import App
import numpy as np

tru = Tru()

# Initialize provider class
provider = OpenAI()

# Select context to be used in feedback. The location of context is app specific.

def get_evaluation_response(rag_engine, app_id, eval_questions):
    
    context = App.select_context(rag_engine)

    # Define a groundedness feedback function
    f_groundedness = (
        Feedback(provider.groundedness_measure_with_cot_reasons)
        .on(context.collect())  # Collect context chunks into a list
        .on_output()
    )

    # Question/answer relevance between overall question and answer.
    f_answer_relevance = (
        Feedback(provider.relevance)
        .on_input_output()
    )

    # Question/statement relevance between question and each context chunk.
    f_context_relevance = (
        Feedback(provider.context_relevance_with_cot_reasons)
        .on_input()
        .on(context)
        .aggregate(np.mean)
    )

    from trulens_eval import TruLlama

    # Initialize the recorder
    tru_query_engine_recorder = TruLlama(
        rag_engine,
        app_id= app_id,
        feedbacks=[f_groundedness, f_answer_relevance, f_context_relevance])
    
    for question in eval_questions:
        with tru_query_engine_recorder as recording:
            response = rag_engine.query(question)
    records = recording.get()
    
    return records


In [None]:
eval_questions = [
    "Who introduced the notions of finite-state machines and context-free grammar (CFG) to linguistics?",
    "How did linguists test formal rules of grammar according to Chomsky’s approach?",
    "What has contributed to making the vision of computers understanding ordinary language and holding conversations with humans more plausible in the 21st century?",
    "Why is it often necessary to assign a part of speech (POS) to a word based on its function in context rather than its inherent meaning?"
]

records = get_evaluation_response(
    sw_engine_1,
    app_id='sentence window engine 1',
    eval_questions = eval_questions
)

display(records)
tru.run_dashboard()

## window size 3

In [8]:
index_dir = "./sentence_index_2"
sw_index_2 = get_sentence_window_index(documents, index_dir, sentence_window_size=3)
sw_engine_2 = get_sentence_window_engine(sw_index_2)

In [None]:
eval_questions = [
    "Who introduced the notions of finite-state machines and context-free grammar (CFG) to linguistics?",
    "How did linguists test formal rules of grammar according to Chomsky’s approach?",
    "What has contributed to making the vision of computers understanding ordinary language and holding conversations with humans more plausible in the 21st century?",
    "Why is it often necessary to assign a part of speech (POS) to a word based on its function in context rather than its inherent meaning?"
]

records = get_evaluation_response(
    sw_engine_2,
    app_id='sentence window engine 2',
    eval_questions = eval_questions
)

display(records)
tru.run_dashboard()

In [12]:
from trulens.core import TruSession

session = TruSession()
session.reset_database()

Updating app_name and app_version in apps table: 0it [00:00, ?it/s]
Updating app_id in records table: 0it [00:00, ?it/s]
Updating app_json in apps table: 0it [00:00, ?it/s]
