In [1]:
import os
from dotenv import load_dotenv

# Get the current file's directory (all_rag_techniques)
current_dir = os.path.dirname(os.path.abspath('__file__'))

# Get the parent directory (RAG_Techniques)
parent_dir = os.path.dirname(current_dir)

# Construct the path to the .env file in the parent directory
dotenv_path = os.path.join(parent_dir, '.env')

# Load the .env file
load_dotenv(dotenv_path)

# Now you can use os.getenv to get your environment variables
api_key = os.getenv('OPENAI_API_KEY')

if api_key:
    print(f"API key loaded successfully. First 5 characters: {api_key[:5]}")
else:
    print("Failed to load API key from environment.")

print(f"Current working directory: {os.getcwd()}")
print(f"Parent directory (where .env should be): {parent_dir}")
print(f".env file path: {dotenv_path}")
print(f".env file exists: {os.path.exists(dotenv_path)}")

# If the .env file exists, print its contents (excluding the actual API key)
if os.path.exists(dotenv_path):
    with open(dotenv_path, 'r') as file:
        contents = file.read()
        print("\nContents of .env file (API key redacted):")
        for line in contents.split('\n'):
            if line.startswith('OPENAI_API_KEY='):
                print('OPENAI_API_KEY=[REDACTED]')
            else:
                print(line)

API key loaded successfully. First 5 characters: sk-pr
Current working directory: /Users/lasyaedunuri/Documents/ApplOfLLMs/t-c
Parent directory (where .env should be): /Users/lasyaedunuri/Documents/ApplOfLLMs
.env file path: /Users/lasyaedunuri/Documents/ApplOfLLMs/.env
.env file exists: False


In [2]:
import os
import sys
from dotenv import load_dotenv

sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..'))) # Add the parent directory to the path since we work with notebooks
from helper_functions import *
from evaluate_rag import *

# Check if the API key is loaded correctly
api_key = os.getenv('OPENAI_API_KEY')
if api_key:
    print("API key loaded successfully")
    print(f"API key: {api_key[:5]}...{api_key[-5:]}")  # Print first and last 5 characters
else:
    print("Failed to load API key")

# Load environment variables from a .env file
load_dotenv()

# Set the OpenAI API key environment variable
os.environ["OPENAI_API_KEY"] = os.getenv('OPENAI_API_KEY')


For example, replace imports like: `from langchain_core.pydantic_v1 import BaseModel`
with: `from pydantic import BaseModel`
or the v1 compatibility namespace if you are working in a code base that has not been fully upgraded to pydantic 2 yet. 	from pydantic.v1 import BaseModel

  from helper_functions import *


API key loaded successfully
API key: sk-pr...o16sA


In [4]:

path = "data/"

In [None]:
import os
from langchain.vectorstores import FAISS
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter

def encode_documents(path, chunk_size=1000, chunk_overlap=200):
    """
    Encodes all text files into a vector store using OpenAI embeddings.

    Args:
        path: The path to the directory of text files.
        chunk_size: The desired size of each text chunk.
        chunk_overlap: The amount of overlap between consecutive chunks.

    Returns:
        A FAISS vector store containing the encoded content of the files.
    """
    # Initialize text splitter and embeddings
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=chunk_size, chunk_overlap=chunk_overlap, length_function=len
    )
    embeddings = OpenAIEmbeddings()

    # Initialize an empty list to store all text chunks
    all_texts = []
    
    # Iterate through all files in the directory
    for filename in os.listdir(path):
        if filename.endswith('.txt'):
            file_path = os.path.join(path, filename)
            
            # Load text document
            with open(file_path, 'r', encoding='utf-8') as file:
                text = file.read()
            
            # Split text into chunks and add to all_texts
            texts = text_splitter.split_text(text)
            all_texts.extend(texts)
    
    # Create vector store from all texts
    vectorstore = FAISS.from_texts(all_texts, embeddings)
    
    return vectorstore

## Includes Metadata
# def encode_documents(folders, chunk_size=1000, chunk_overlap=200):
#     text_splitter = RecursiveCharacterTextSplitter(
#         chunk_size=chunk_size, chunk_overlap=chunk_overlap, length_function=len
#     )
#     embeddings = OpenAIEmbeddings()
#     all_texts = []
#     metadata = []

#     for folder in folders:
#         for filename in os.listdir(folder):
#             if filename.endswith(".txt"):
#                 file_path = os.path.join(folder, filename)
#                 with open(file_path, "r", encoding="utf-8") as file:
#                     text = file.read()
#                 texts = text_splitter.split_text(text)
#                 all_texts.extend(texts)
#                 metadata.extend([{"file_name": filename, "folder": folder}] * len(texts))
    
#     vectorstore = FAISS.from_texts(all_texts, embeddings, metadatas=metadata)
#     return vectorstore

In [6]:
chunks_vector_store = encode_documents(path, chunk_size=1000, chunk_overlap=200)
print("FAISS vector store created successfully!")

  embeddings = OpenAIEmbeddings()


FAISS vector store created successfully!


In [7]:
chunks_query_retriever = chunks_vector_store.as_retriever(search_kwargs={"k": 2})

In [8]:
test_query = "What rights do users grant Amazon when posting reviews or comments?"
context = retrieve_context_per_question(test_query, chunks_query_retriever)
show_context(context)

  docs = chunks_query_retriever.get_relevant_documents(question)


Context 1:
If you do post content or submit material, and unless we indicate otherwise, you grant Amazon a nonexclusive, royalty-free, perpetual, irrevocable, and fully sublicensable right to use, reproduce, modify, adapt, publish, perform, translate, create derivative works from, distribute, and display such content throughout the world in any media. You grant Amazon and sublicensees the right to use the name that you submit in connection with such content, if they choose. You represent and warrant that you own or otherwise control all of the rights to the content that you post; that the content is accurate; that use of the content you supply does not violate this policy and will not cause injury to any person or entity; and that you will indemnify Amazon for all claims resulting from content you supply. Amazon has the right but not the obligation to monitor and edit or remove any activity or content. Amazon takes no responsibility and assumes no liability for any content posted by yo

In [9]:
evaluate_rag(chunks_query_retriever)

Answering the question from the retrieved context...
Answering the question from the retrieved context...
Answering the question from the retrieved context...
Answering the question from the retrieved context...
Answering the question from the retrieved context...
Answering the question from the retrieved context...
Answering the question from the retrieved context...
Answering the question from the retrieved context...
Answering the question from the retrieved context...
Answering the question from the retrieved context...
LLMTestCase(input="Does METTLER TOLEDO's waive any provision?", actual_output='The context does not explicitly mention any waiver of provisions by Mettler-Toledo. It outlines the governing law and jurisdiction for disputes but does not indicate any waiver of rights or provisions. Here are the key points:\n\n- The contract is governed by the laws of the country where the Mettler-Toledo office on the order documents is located.\n- If the office is in the USA, Delaware

Event loop is already running. Applying nest_asyncio patch to allow async execution...


Evaluating 10 test case(s) in parallel: |█         | 10% (1/10) [Time Taken: 00:15, 15.32s/test case]ERROR:root:OpenAI rate limit exceeded. Retrying: 1 time(s)...
ERROR:root:OpenAI rate limit exceeded. Retrying: 1 time(s)...
ERROR:root:OpenAI rate limit exceeded. Retrying: 1 time(s)...
ERROR:root:OpenAI rate limit exceeded. Retrying: 1 time(s)...
ERROR:root:OpenAI rate limit exceeded. Retrying: 1 time(s)...
ERROR:root:OpenAI rate limit exceeded. Retrying: 1 time(s)...
Evaluating 10 test case(s) in parallel: |██        | 20% (2/10) [Time Taken: 00:28, 14.02s/test case]ERROR:root:OpenAI rate limit exceeded. Retrying: 2 time(s)...
ERROR:root:OpenAI rate limit exceeded. Retrying: 1 time(s)...
ERROR:root:OpenAI rate limit exceeded. Retrying: 1 time(s)...
Evaluating 10 test case(s) in parallel: |███       | 30% (3/10) [Time Taken: 00:33, 10.04s/test case]ERROR:root:OpenAI rate limit exceeded. Retrying: 2 time(s)...
ERROR:root:OpenAI rate limit exceeded. Retrying: 2 time(s)...
ERROR:root:Open



Metrics Summary

  - ❌ Correctness (GEval) (score: 0.25719053320936747, threshold: 0.5, strict: False, evaluation model: gpt-4o, reason: The actual output does not confirm or refute the expected obligation of Audi regarding replacement equipment, lacking direct relevance to the expected statement., error: None)
  - ✅ Faithfulness (score: 1.0, threshold: 0.7, strict: False, evaluation model: gpt-4, reason: None, error: None)
  - ❌ Answer Relevancy (score: 0.6666666666666666, threshold: 0.7, strict: False, evaluation model: gpt-4o, reason: The score is 0.67 because the response partially addresses the question but includes irrelevant information about myAudi and Audi Connect services, which does not directly relate to handling obsolete equipment., error: None)

For test case:

  - input: How does Audi handle situations where a technological change renders certain service-related equipment obsolete?
  - actual output: The context does not provide specific details on how Audi handles sit