In [1]:
!pip install -q langchain openai chromadb langchain-community langchain-openai tiktoken

from google.colab import userdata
from langchain_openai import OpenAIEmbeddings, OpenAI
from langchain_community.vectorstores import Chroma
from langchain.chains import RetrievalQA
from langchain.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

# 1. Setting up OpenAI API Key:
OPENAI_API_KEY = userdata.get("OpenAI-key")

# 2. Loading Text Documents:
text_file_path = "text-example.txt"
try:
    loader = TextLoader(text_file_path)
    documents = loader.load()
    print(f"Successfully loaded {len(documents)} document(s) from {text_file_path}")
except FileNotFoundError:
    print(f"Error: Text file '{text_file_path}' not found.")
    documents = []

# 3. Splitting Documents:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
texts = text_splitter.split_documents(documents)
print(f"Split text into {len(texts)} chunks.")

# 4. Creating Embeddings:
try:
    embeddings = OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY)
except Exception as e:
    print(f"Error creating embeddings: {e}. Please ensure your OpenAI API key is correctly set.")
    embeddings = None

# 5. Creating Vector Store:
if embeddings:
    db = Chroma.from_documents(texts, embeddings)
    print("Chroma vector store created.")
else:
    db = None
    print("Vector store creation skipped due to embedding error.")

# 6. Creating Retrieval QA Chain:
if db:
    try:
        qa_chain = RetrievalQA.from_chain_type(llm=OpenAI(openai_api_key=OPENAI_API_KEY), chain_type="stuff", retriever=db.as_retriever())
        print("RetrievalQA chain created.")
    except Exception as e:
        print(f"Error creating RetrievalQA chain: {e}")
        qa_chain = None
else:
    qa_chain = None
    print("RetrievalQA chain creation skipped due to vector store error.")

# 7. Query the Chain:
if qa_chain:
    query = "What is this document about?"
    result = qa_chain.invoke(query) # change run to invoke.
    print("\nQuery Result:")
    print(result)
else:
    print("\nQuery skipped due to RetrievalQA chain error.")

# 8. Use Metadata:
if db:
    retriever = db.as_retriever()
    relevant_documents = retriever.invoke(query) # change get_relevant_documents to invoke.
    if relevant_documents:
        print("\nRelevant Document Metadata:")
        print(relevant_documents[0].metadata)
    else:
        print("\nNo relevant documents found.")
else:
  print("\nMetadata retrieval skipped due to vector store error.")

Successfully loaded 1 document(s) from text-example.txt
Split text into 3 chunks.
Chroma vector store created.
RetrievalQA chain created.





Query Result:
{'query': 'What is this document about?', 'result': ' The document is about the landscape of Artificial Intelligence (AI), with a focus on Large Language Models (LLMs) and AI agents. It discusses the limitations of LLMs and the role of AI agents in addressing those limitations. It also mentions the use of LangChain, an open-source framework, in building LLM-powered applications and the potential applications of AI agents in various domains. '}





Relevant Document Metadata:
{'source': 'text-example.txt'}
