Source: https://lintang-gilang01.medium.com/simple-rag-using-python-langchain-openai-dan-chroma-baf9a87a056e

Install Libraries

In [None]:
%pip install langchain langchain-core langchain-community langchain-ollama langchain_anthropic langchain-chroma langchain-experimental langchain-text-splitters langgraph

Import Libraries

In [2]:
from langchain.text_splitter import RecursiveCharacterTextSplitter      # To split long text into smaller sections based on specific characters
# from langchain.chat_models import ChatOpenAI    # To interact with OpenAI's large language models (LLMs) in a conversational manner
from langchain.prompts import PromptTemplate    # To create prompt templates
from langchain.chains import RetrievalQA    # To combine the Retriever with the QA chain
from langchain_chroma import Chroma     # Import ChromaDB
import chromadb
import pprint   # To tidy up print output


Use Text

In [3]:
# text information to ingest
texts =  [
    "There is ample evidence to show that the Earth is round.",
    "First, satellite images orbiting the Earth clearly depict our planet's round shape.",
    "Additionally, during a lunar eclipse, the Earth's shadow cast on the Moon is always curved, which can only happen if the Earth is round.",
    "Navigation of ships also provides evidence, as ships moving away from the shore gradually disappear from view bottom first, indicating the Earth's curved surface.",
    "Observing stars from different parts of the world shows that constellations change positions due to the Earth's curvature.",
    "Eratosthenes' ancient experiment measuring the shadow lengths at two different locations in Egypt also provided strong evidence of the Earth's curvature.",
    "If the Earth were flat, the shadow lengths would be the same in both places.",
    "Airplane flights support this fact, as long-distance flight paths often curve rather than follow a straight line to take advantage of the Earth's curvature.",
    "The horizon phenomenon also shows that we cannot see very distant objects because the Earth curves.",
    "GPS satellites that help us navigate can only function optimally if the Earth is round.",
    "Gravity experiments show that gravity pulls towards the center of mass, causing the Earth to be round.",
    "Photos from the Apollo missions that landed on the Moon also show the Earth's round shape from a distance.",
    "Weather observations from satellites show cloud movement and storm patterns consistent with a round Earth.",
    "The light we see at dawn and dusk also indicates the Earth's curvature.",
    "The height of radio towers and antennas is determined by considering the Earth's curvature to optimize signal range.",
    "Experiments using high-flying drones show a curved horizon.",
    "International space missions, like the ISS, also show the Earth as round from low orbit.",
    "The phenomenon of tides is also related to the gravity of a round Earth.",
    "Satellite communication systems orbiting the Earth require coordination that considers the planet's curvature.",
    "Geodesy research, the science of measuring and mapping the Earth, also shows that the Earth is a geoid, or round with slight deviations at the poles.",
    "Moreover, the pattern of day and night distribution around the world is only possible if the Earth is round.",
    "All this evidence consistently supports the fact that our Earth is round."
]

Text Splitter

In [4]:
# Combine all elements in the list into a single string with newline as the separator
combined_text = "\n".join(texts)

# Perform "RecursiveCharacterTextSplitter" so that the data can have an object "page_content"
# This code splits the text into characters separated by "\n", with each character in a separate chunk.
text_splitter = RecursiveCharacterTextSplitter(separators=["\n"], chunk_size=1, chunk_overlap=0)
texts = text_splitter.create_documents([combined_text])

Setup Ollama Agent

In [5]:
# using local ollama server
from langchain_ollama import OllamaEmbeddings

embeddings = OllamaEmbeddings(
    model="qwen2.5-coder:1.5b",
    base_url="localhost"
)

Create Vector Store

In [6]:
# Create a vector store with a sample text
from langchain_core.vectorstores import InMemoryVectorStore

text = "LangChain is the framework for building context-aware reasoning applications"

vectorstore = InMemoryVectorStore.from_texts(
    [text],
    embedding=embeddings,
)

# Use the vectorstore as a retriever
retriever = vectorstore.as_retriever()

# Retrieve the most similar text
retrieved_documents = retriever.invoke("What is LangChain?")

# show the retrieved document's content
retrieved_documents[0].page_content

'LangChain is the framework for building context-aware reasoning applications'

Embed Text to Local Chroma DB

In [7]:
# using local chroma db
persist_directory = "chroma_db"

# save to text local chroma db
db = Chroma.from_documents(
    documents=texts, embedding=embeddings, persist_directory=persist_directory
)

# load data from local chroma db
db = Chroma(persist_directory = persist_directory, embedding_function=embeddings)

Query the LLM

In [8]:
# perform llm query
query = "Why earth like a ball?"
docs = db.similarity_search(query)
print(docs)

[Document(metadata={}, page_content='\nGravity experiments show that gravity pulls towards the center of mass, causing the Earth to be round.'), Document(metadata={}, page_content='\nMoreover, the pattern of day and night distribution around the world is only possible if the Earth is round.'), Document(metadata={}, page_content='\nGravity experiments show that gravity pulls towards the center of mass, causing the Earth to be round.'), Document(metadata={}, page_content='\nMoreover, the pattern of day and night distribution around the world is only possible if the Earth is round.')]
