In [None]:
from llama_index import SimpleDirectoryReader

documents = SimpleDirectoryReader(
    input_files=["data/Crayon Kvalitetssystem 02 Personalhåndbok.pdf"]
).load_data()

In [None]:
from llama_index import Document

document = Document(text="\n\n".join([doc.text for doc in documents]))

In [None]:
from llama_index.node_parser import SentenceWindowNodeParser

# create the sentence window node parser w/ default settings
node_parser = SentenceWindowNodeParser.from_defaults(
    window_size=3,
    window_metadata_key="window",
    original_text_metadata_key="original_text",
)

In [None]:
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
embed_model = HuggingFaceEmbedding(
    model_name="BAAI/bge-small-en-v1.5",
)

In [None]:
from llama_index.node_parser import SemanticSplitterNodeParser

# create the sentence window node parser w/ default settings
node_parser = SemanticSplitterNodeParser.from_defaults(
    buffer_size=10,
    embed_model=embed_model,
)

In [None]:
from utils import build_index, get_llm_model
from config import settings
llm=get_llm_model(settings, "an assistant")
index=build_index(document, node_parser, llm=llm, save_dir="semantic_splitter_index")

In [None]:
# Build different indexes

In [None]:
import os
from config import settings
os.environ["AZURE_OPENAI_API_KEY"]=settings.AZURE_OPENAI_API_KEY
os.environ["AZURE_OPENAI_ENDPOINT"]=settings.AZURE_OPENAI_API_ENDPOINT
os.environ["OPENAI_API_VERSION"]=settings.AZURE_OPENAI_API_VERSION

In [None]:
from utils import get_sentence_window_query_engine
from utils_trulens import get_prebuilt_trulens_recorder

In [None]:
semanticparser_query_engine = get_sentence_window_query_engine(
    index
)

recorder_1 = get_prebuilt_trulens_recorder(
    semanticparser_query_engine,
    app_id='Semantic parser 10'
)

In [None]:
eval_questions = []
with open('generated_questions.text', 'r') as file:
    for line in file:
        # Remove newline character and convert to integer
        item = line.strip()
        eval_questions.append(item)

In [None]:
from trulens_eval import Tru

def run_evals(eval_questions, tru_recorder, query_engine):
    for question in eval_questions:
        with tru_recorder as recording:
            response = query_engine.query(question)

In [None]:
run_evals(eval_questions, recorder_1, semanticparser_query_engine)

In [None]:
Tru().run_dashboard()