In [1]:
#!pip install langchain
#!pip install langchain-community
#!pip install faiss-cpu
#!pip install openai
#!pip install sentence-transformers
#!pip install tf-keras

In [None]:
import os
from langchain.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.chat_models import ChatOpenAI
from langchain.chains import RetrievalQA

# 1. Load .txt files from 'documents' folder
def load_documents(folder_path="C:/Users/Steffen/Dropbox"):
    docs = []
    for file in os.listdir(folder_path):
        if file.startswith("lex-h") and file.endswith(".txt"):
            path = os.path.join(folder_path, file)
            loader = TextLoader(path, encoding='utf-8')
            docs.extend(loader.load())
    return docs

# 2. Split documents into chunks
def split_documents(documents):
    splitter = RecursiveCharacterTextSplitter(chunk_size=240, chunk_overlap=30)
    return splitter.split_documents(documents)

# 3. Create vector DB with local HuggingFace embeddings
def create_vectorstore(chunks):
    embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
    return FAISS.from_documents(chunks, embeddings)

# 4. Create a RAG chain using OpenAI GPT
def create_qa_chain(vectorstore):
    retriever = vectorstore.as_retriever()
    llm = ChatOpenAI(temperature=0.0, model_name="gpt-3.5-turbo")  # OpenAI API key must be set
    return RetrievalQA.from_chain_type(llm=llm, retriever=retriever)

# 5. Main loop
def main():
    print("Loading documents...")
    docs = load_documents()
    print(f"Loaded {len(docs)} documents.")

    print("Splitting into chunks...")
    chunks = split_documents(docs)
    print(f"{len(chunks)} chunks created.")
    
    print("Creating vector index...")
    vectorstore = create_vectorstore(chunks)

    print("Initializing question-answering chain...")
    with open("C:/Users/Steffen/Desktop/openai_key.txt", "r") as f:
        key = f.read().strip()
    os.environ["OPENAI_API_KEY"] = key
    qa = create_qa_chain(vectorstore)

    print("\nReady! Ask questions about your documents.")
    while True:
        query = input("\nYour question ('exit' to quit): ")
        if query.lower() in ["exit", "quit"]:
            break
        answer = qa.run(query)
        print("\nAnswer:", answer)
    
if __name__ == "__main__":
    main()

Loading documents...
Loaded 37 documents.
Splitting into chunks...
28262 chunks created.
Creating vector index...


  embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")





KeyboardInterrupt: 