In [None]:
%pip install langchain langchain-community openai faiss-cpu langchain-openai

In [25]:
import os
from getpass import getpass
from langchain.text_splitter import CharacterTextSplitter
from langchain.document_loaders import TextLoader
from langchain_openai import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.chains import RetrievalQA
from langchain_openai import ChatOpenAI


In [13]:
# Step 1: Retrieve the OpenAI API key securely
openai_api_key = getpass("Enter your OpenAI API key: ")

In [20]:
# Step 2: Load the text file
file_path = "../0-Data/paul_graham_short.txt"  # Replace with your text file path
loader = TextLoader(file_path)
documents = loader.load()

In [None]:
print(documents[0])

In [22]:
# Step 3: Split the text into manageable chunks
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
split_docs = text_splitter.split_documents(documents)


In [None]:
# Step 4: Generate embeddings for the text chunks
embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key)
vector_store = FAISS.from_documents(split_docs, embeddings)


In [None]:
# Step 5: Set up a retrieval-based QA system
retriever = vector_store.as_retriever()
qa_chain = RetrievalQA.from_chain_type(
    llm=ChatOpenAI(model="gpt-3.5-turbo", openai_api_key=openai_api_key),
    retriever=retriever,
    return_source_documents=True
)

In [None]:
# Step 6: Ask questions
while True:
    query = input("\nEnter your question (or 'exit' to quit): ")
    if query.lower() == "exit":
        print("Exiting...")
        break
    result = qa_chain.invoke({"query": query})
    print("\nAnswer:")
    print(result["result"])