In [None]:
import os
from dotenv import load_dotenv

# Get the current file's directory (all_rag_techniques)
current_dir = os.path.dirname(os.path.abspath('__file__'))

# Get the parent directory (RAG_Techniques)
parent_dir = os.path.dirname(current_dir)

# Construct the path to the .env file in the parent directory
dotenv_path = os.path.join(parent_dir, '.env')

# Load the .env file
load_dotenv(dotenv_path)

# Now you can use os.getenv to get your environment variables
api_key = os.getenv('OPENAI_API_KEY')

if api_key:
    print(f"API key loaded successfully. First 5 characters: {api_key[:5]}")
else:
    print("Failed to load API key from environment.")

print(f"Current working directory: {os.getcwd()}")
print(f"Parent directory (where .env should be): {parent_dir}")
print(f".env file path: {dotenv_path}")
print(f".env file exists: {os.path.exists(dotenv_path)}")

# If the .env file exists, print its contents (excluding the actual API key)
if os.path.exists(dotenv_path):
    with open(dotenv_path, 'r') as file:
        contents = file.read()
        print("\nContents of .env file (API key redacted):")
        for line in contents.split('\n'):
            if line.startswith('OPENAI_API_KEY='):
                print('OPENAI_API_KEY=[REDACTED]')
            else:
                print(line)

In [None]:
import os
import sys
from dotenv import load_dotenv


sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..'))) # Add the parent directory to the path since we work with notebooks
from helper_functions import *
from evaluation.evalute_rag import *

# Check if the API key is loaded correctly
api_key = os.getenv('OPENAI_API_KEY')
if api_key:
    print("API key loaded successfully")
    print(f"API key: {api_key[:5]}...{api_key[-5:]}")  # Print first and last 5 characters
else:
    print("Failed to load API key")

# Load environment variables from a .env file
load_dotenv()

# Set the OpenAI API key environment variable
os.environ["OPENAI_API_KEY"] = os.getenv('OPENAI_API_KEY')

### Read Docs

In [None]:
path = "../data/Hire_Vue_terms.txt"

### Encode Document

In [None]:
def encode_text_file(path, chunk_size=1000, chunk_overlap=200):
    """
    Encodes a text file into a vector store using OpenAI embeddings.

    Args:
        path: The path to the text file.
        chunk_size: The desired size of each text chunk.
        chunk_overlap: The amount of overlap between consecutive chunks.

    Returns:
        A FAISS vector store containing the encoded file content.
    """

    # Load text document
    with open(path, 'r', encoding='utf-8') as file:
        text = file.read()

    # Split text into chunks
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=chunk_size, chunk_overlap=chunk_overlap, length_function=len
    )
    texts = text_splitter.split_text(text)

    # Create embeddings and vector store
    embeddings = OpenAIEmbeddings()
    vectorstore = FAISS.from_texts(texts, embeddings)

    return vectorstore

In [None]:
chunks_vector_store = encode_text_file(path, chunk_size=1000, chunk_overlap=200)

### Create Retriever

In [None]:
chunks_query_retriever = chunks_vector_store.as_retriever(search_kwargs={"k": 2})

### Test Retriever

In [None]:
test_query = "What are the key points in the terms of service agreement?"
context = retrieve_context_per_question(test_query, chunks_query_retriever)
show_context(context)

### Evaluate Results

In [None]:
evaluate_rag(chunks_query_retriever)