In [1]:

from openai import OpenAI
import voyageai as vo  
import os
from dotenv import load_dotenv
load_dotenv()

client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
client_vo = vo.Client(api_key=os.getenv("VOYAGE_API_KEY"))  
chat_model = "gpt-3.5-turbo"
embedding_model = "voyage-context-3"


In [2]:
k = 3
a = 0.6 #alpha for semantic (1.0) and keyword search (0.0)
lambda_parameter = 0.7 #lambda for diversity (1.0 is all diverse) and 0.0 is chunks are the same

In [3]:
import os
from ingestion import load_documents
base_path = os.path.abspath("..")
md_texts = load_documents(base_path)

In [4]:
from langchain_text_splitters import CharacterTextSplitter

character_text_splitter = CharacterTextSplitter.from_tiktoken_encoder(
    encoding_name="cl100k_base",    # tokenizer to count tokens
    chunk_size=1500,                # size of each chunk in tokens - good size from https://arxiv.org/pdf/2407.01219
    chunk_overlap=150,              # overlap to preserve context across chunks
)

all_chunks = []

for doc in md_texts:
    chunks = character_text_splitter.create_documents([doc["text"]])
    for c in chunks:
        c.metadata = {"source": doc["file"]}  # tag chunk with original file name (not necessary here but just in case i think)
    all_chunks.extend(chunks)
print(len(all_chunks), "total chunks created")
#print(all_chunks[0].page_content)

37 total chunks created


In [5]:
import numpy as np

chunk_texts = [c.page_content for c in all_chunks]

resp = client_vo.contextualized_embed(
    inputs=[[text] for text in chunk_texts],
    model=embedding_model, 
    input_type="document"
)

chunk_embeddings = np.array(
    [r.embeddings[0] for r in resp.results],
    dtype=np.float32
)

chunk_embeddings = chunk_embeddings / np.linalg.norm(chunk_embeddings, axis=1, keepdims=True)


In [6]:
from rag import response_llm, embed_query
import json

with open("qa_long_dataset.json", "r") as f:
    qa_data = json.load(f)
questions = [q["question"] for q in qa_data]

# Run
llm_results = response_llm(
    questions=questions,
    client=client,
    chat_model=chat_model,
    chunk_embeddings=chunk_embeddings,
    chunk_texts=chunk_texts,
    embed_query=embed_query,
    k=k,
    alpha=a,
    lambda_param=lambda_parameter
)




Q1: Write a 500 word report on what are the best practices for developing and implementing reproductive and fertility health policies in the workplace to ensure inclusivity and support for all employees?
Answer: Developing and implementing reproductive and fertility health policies in the workplace to ensure inclusivity and support for all employees requires a comprehensive approach that considers the diverse needs and challenges individuals may face. By following best practices, organizations can create a supportive and inclusive environment that promotes employee well-being and productivity.

1. Inclusivity and Accessibility:
Ensure that the policies are inclusive of all genders, sexual orientations, and family structures. Address cultural and religious considerations around reproductive health to accommodate diverse beliefs and practices. Offer the policies in multiple formats (digital, print, audio) to enhance accessibility and ensure that all employees can easily access the infor

In [7]:
with open("qa_long_dataset.json", "r") as f:
    qa_data = json.load(f)

ragas_data = []
for ref, pred in zip(qa_data, llm_results):
    ragas_data.append({
        "question": ref["question"],
        "answer": pred["answer"],
        "contexts": [pred["context_used"]],
        "ground_truth": ref["ground_truth_answer"]
    })

from datasets import Dataset
ragas_dataset = Dataset.from_list(ragas_data)

# Evaluate
from langchain_openai import ChatOpenAI
llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)

from ragas.metrics import (
    answer_relevancy,
    context_precision,
    context_recall,
    faithfulness
)
from ragas import evaluate

evaluation_scores = evaluate(
    ragas_dataset,
    metrics=[answer_relevancy, context_precision, context_recall, faithfulness],
    llm=llm
)

print(evaluation_scores)


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Evaluating:   0%|          | 0/16 [00:00<?, ?it/s]

LLM returned 1 generations instead of requested 3. Proceeding with 1 generations.
LLM returned 1 generations instead of requested 3. Proceeding with 1 generations.
LLM returned 1 generations instead of requested 3. Proceeding with 1 generations.
LLM returned 1 generations instead of requested 3. Proceeding with 1 generations.


{'answer_relevancy': 0.9158, 'context_precision': 1.0000, 'context_recall': 1.0000, 'faithfulness': 0.9728}
