### Simple RAG Implementation

This implementation provides a simple RAG system with the following features:

- Document loading and processing
- Text splitting into manageable chunks
- Vector embeddings using OpenAI's embedding model
- Vector storage using Chroma
- Question-answering using LangChain's RetrievalQA chain

In [None]:
import os
from langchain.document_loaders import TextLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.chains import RetrievalQA
from langchain.llms import OpenAI
from dotenv import load_dotenv

# Load environment variables
load_dotenv()

class SimpleRAG:
  def __init__(self, documents_path):
      """
      Initialize the RAG system with a path to documents
      """
      self.documents_path = documents_path
      self.embeddings = OpenAIEmbeddings()
      self.text_splitter = CharacterTextSplitter(
          chunk_size=1000,
          chunk_overlap=200,
          length_function=len
      )
      
  def load_documents(self):
      """
      Load and process documents from the specified path
      """
      documents = []
      for file in os.listdir(self.documents_path):
          if file.endswith('.txt'):
              loader = TextLoader(os.path.join(self.documents_path, file))
              documents.extend(loader.load())
      
      # Split documents into chunks
      texts = self.text_splitter.split_documents(documents)
      return texts
  
  def create_vector_store(self, texts):
      """
      Create a vector store from processed documents
      """
      vectorstore = Chroma.from_documents(
          documents=texts,
          embedding=self.embeddings,
          persist_directory="./chroma_db"
      )
      return vectorstore
  
  def setup_qa_chain(self, vectorstore):
      """
      Set up the question-answering chain
      """
      qa_chain = RetrievalQA.from_chain_type(
          llm=OpenAI(),
          chain_type="stuff",
          retriever=vectorstore.as_retriever(),
          return_source_documents=True
      )
      return qa_chain
  
  def query(self, question):
      """
      Query the RAG system
      """
      texts = self.load_documents()
      vectorstore = self.create_vector_store(texts)
      qa_chain = self.setup_qa_chain(vectorstore)
      
      result = qa_chain({"query": question})
      return {
          "answer": result["result"],
          "source_documents": result["source_documents"]
      }

# Example usage
if __name__ == "__main__":
  # Make sure to set OPENAI_API_KEY in your environment variables
  # Create a directory with some text documents
  docs_path = "./documents"
  
  # Initialize and use the RAG system
  rag = SimpleRAG(docs_path)
  
  # Ask a question
  question = "What are the key points in the document?"
  response = rag.query(question)
  
  print("Answer:", response["answer"])
  print("\nSources:")
  for doc in response["source_documents"]:
      print(f"- {doc.metadata['source']}")