In [None]:
!pip install -q langchain openai chromadb langchain-community langchain-openai tiktoken

from google.colab import userdata
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import Chroma
from langchain.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

# 1. Setting up OpenAI API Key:
OPENAI_API_KEY = userdata.get("OpenAI-key")

# 2. Loading Text Documents:
text_file_path = "text-example.txt"
try:
    loader = TextLoader(text_file_path)
    documents = loader.load()
    print(f"Successfully loaded {len(documents)} document(s) from {text_file_path}")
except FileNotFoundError:
    print(f"Error: Text file '{text_file_path}' not found.")
    documents = []

# 3. Splitting Documents:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
texts = text_splitter.split_documents(documents)
print(f"Split text into {len(texts)} chunks.")

# 4. Creating Embeddings:
try:
    embeddings = OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY)
except Exception as e:
    print(f"Error creating embeddings: {e}. Please ensure your OpenAI API key is correctly set.")
    embeddings = None

# 5. Creating Vector Store:
if embeddings:
    db = Chroma.from_documents(texts, embeddings)
    print("Chroma vector store created.")
else:
    db = None
    print("Vector store creation skipped due to embedding error.")

# 6. Performing Similarity Search:
if db:
    query = "What is this document about?"
    results = db.similarity_search(query)
    print("\nSimilarity Search Results:")
    print(results)
else:
    print("\nSimilarity search skipped due to vector store error.")

Successfully loaded 1 document(s) from text-example.txt
Split text into 3 chunks.
Chroma vector store created.





Similarity Search Results:
[Document(metadata={'source': 'text-example.txt'}, page_content='The landscape of Artificial Intelligence (AI) is rapidly evolving, with Large Language Models (LLMs) and intelligent agents at the forefront of innovation. LLMs, like GPT-3 and Llama 2, have demonstrated remarkable capabilities in natural language processing, text generation, and even code synthesis. However, their reliance on static training data poses limitations for applications requiring real-time information or dynamic interaction with external systems. To address these limitations, AI agents are emerging as a crucial component in the AI ecosystem. Agents are designed to interact with their environment, perceive changes, and take actions to achieve specific goals. They often leverage LLMs as their core reasoning engine, but extend their functionality through tools and integrations with external APIs and databases. LangChain, a popular open-source framework, plays a pivotal role in the deve