In [1]:
# Install required libraries
!pip install -q langchain openai chromadb langchain-community langchain-openai tiktoken

# Upload the text file from your local machine
from google.colab import files
uploaded = files.upload()  # Manually select your 'text-example.txt' file when prompted

# Set up OpenAI API key securely
from google.colab import userdata
OPENAI_API_KEY = userdata.get("OpenAI-key")

# Import LangChain modules
from langchain_openai import OpenAIEmbeddings, OpenAI
from langchain_community.vectorstores import Chroma
from langchain.chains import RetrievalQA
from langchain.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

# Confirm uploaded file exists
import os
file_name = "text-example.txt"

# Load the document
loader = TextLoader(file_name)
documents = loader.load()

# Split into chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
texts = text_splitter.split_documents(documents)

# Create embeddings
embeddings = OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY)

# Create vector store
db = Chroma.from_documents(texts, embeddings)

# Create QA chain
qa_chain = RetrievalQA.from_chain_type(
    llm=OpenAI(openai_api_key=OPENAI_API_KEY),
    chain_type="stuff",
    retriever=db.as_retriever()
)

# Query the document
query = "What is this document about?"
result = qa_chain.invoke(query)
print("\nAnswer:", result)

Saving text-example.txt to text-example (4).txt

Answer: {'query': 'What is this document about?', 'result': '\nThe document is about the role of artificial intelligence in modern education.'}
