## Setup and Import Libraries

In [1]:
import os
from langchain.chat_models import init_chat_model
from langchain_core.documents import Document
from langchain_openai import OpenAIEmbeddings
from langchain_astradb import AstraDBVectorStore
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from dotenv import load_dotenv

import warnings
warnings.filterwarnings('ignore')

In [2]:
load_dotenv()

True

In [3]:
os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
os.environ["GROQ_API_KEY"] = os.getenv("GROQ_API_KEY")
os.environ["ASTRA_DB_APPLICATION_TOKEN"] = os.getenv("ASTRA_DB_APPLICATION_TOKEN")
os.environ["ASTRA_DB_ID"] = os.getenv("ASTRA_DB_ID")
os.environ["ASTRA_DB_API_ENDPOINT"] = os.getenv("ASTRA_DB_API_ENDPOINT")

In [4]:
embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
llm = init_chat_model(model="groq:openai/gpt-oss-20b")

## Create AstraDB

In [5]:
vector_store = AstraDBVectorStore(
    embedding=embeddings,
    collection_name="astra_vector_langchain",
    namespace=None,
)

vector_store

<langchain_astradb.vectorstores.AstraDBVectorStore at 0x2413e13bfe0>

In [6]:
documents = [
    Document(
        page_content="I had chocolate chip pancakes and scrambled eggs for breakfast this morning.",
        metadata={"source": "tweet"},
    ),

    Document(
        page_content="The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees.",
        metadata={"source": "news"},
    ),

    Document(
        page_content="Building an exciting new project with LangChain - come check it out!",
        metadata={"source": "tweet"},
    ),

    Document(
        page_content="Robbers broke into the city bank and stole $1 million in cash.",
        metadata={"source": "news"},
    ),

    Document(
        page_content="Wow! That was an amazing movie. I can't wait to see it again.",
        metadata={"source": "tweet"},
    ),

    Document(
        page_content="Is the new iPhone worth the price? Read this review to find out.",
        metadata={"source": "website"},
    ),

    Document(
        page_content="The top 10 soccer players in the world right now.",
        metadata={"source": "website"},
    ),

    Document(
        page_content="LangGraph is the best framework for building stateful, agentic applications!",
        metadata={"source": "tweet"},
    ),

    Document(
        page_content="The stock market is down 500 points today due to fears of a recession.",
        metadata={"source": "news"},
    ),

    Document(
        page_content="I have a bad feeling I am going to get deleted :(",
        metadata={"source": "tweet"},
    ),
]

documents

[Document(metadata={'source': 'tweet'}, page_content='I had chocolate chip pancakes and scrambled eggs for breakfast this morning.'),
 Document(metadata={'source': 'news'}, page_content='The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees.'),
 Document(metadata={'source': 'tweet'}, page_content='Building an exciting new project with LangChain - come check it out!'),
 Document(metadata={'source': 'news'}, page_content='Robbers broke into the city bank and stole $1 million in cash.'),
 Document(metadata={'source': 'tweet'}, page_content="Wow! That was an amazing movie. I can't wait to see it again."),
 Document(metadata={'source': 'website'}, page_content='Is the new iPhone worth the price? Read this review to find out.'),
 Document(metadata={'source': 'website'}, page_content='The top 10 soccer players in the world right now.'),
 Document(metadata={'source': 'tweet'}, page_content='LangGraph is the best framework for building stateful, agentic application

In [7]:
vector_store.add_documents(documents=documents)

['88f9ab1f67794d32ac30a8bed12ab875',
 'eaf8d63cf7064af088ea8098d27d9dc9',
 '2f2bb050c7e14c3db5ef5d4abba8e8eb',
 '092b3e9d3ec946eeac989225dd97e0c5',
 'f0dce50855cc44b39fab02f38cc19bc1',
 '77585976929041aaa7e7a1c05505260d',
 '3657e5d09961490387f85d66cd7e78ec',
 '2a60e2bc86524a4bb5b3b998c4b6f211',
 '22360522ecdd4160949eb23b8bfcca0c',
 'a22ac18f408441eab7ed768c7d965d93']

## Performing Similarity

In [8]:
query = "What is the weather"

vector_store.similarity_search(query=query)

[Document(id='eaf8d63cf7064af088ea8098d27d9dc9', metadata={'source': 'news'}, page_content='The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees.'),
 Document(id='a22ac18f408441eab7ed768c7d965d93', metadata={'source': 'tweet'}, page_content='I have a bad feeling I am going to get deleted :('),
 Document(id='22360522ecdd4160949eb23b8bfcca0c', metadata={'source': 'news'}, page_content='The stock market is down 500 points today due to fears of a recession.'),
 Document(id='3657e5d09961490387f85d66cd7e78ec', metadata={'source': 'website'}, page_content='The top 10 soccer players in the world right now.')]

In [9]:
query = "LangChain provides abstractions to make working with LLMs easy"

results = vector_store.similarity_search(
    query=query,
    k=3,
    filter={"source": "tweet"},
)

for result in results:
    print(f'* "{result.page_content}", metadata={result.metadata}')

* "Building an exciting new project with LangChain - come check it out!", metadata={'source': 'tweet'}
* "LangGraph is the best framework for building stateful, agentic applications!", metadata={'source': 'tweet'}
* "Wow! That was an amazing movie. I can't wait to see it again.", metadata={'source': 'tweet'}


In [10]:
query = "LangChain provides abstractions to make working with LLMs easy"

results = vector_store.similarity_search_with_score(
    query=query,
    k=3,
    filter={"source": "tweet"},
)

for result, score in results:
    print(f'* [Similarity={score:.2f}] "{result.page_content}", metadata={result.metadata}')

* [Similarity=0.71] "Building an exciting new project with LangChain - come check it out!", metadata={'source': 'tweet'}
* [Similarity=0.70] "LangGraph is the best framework for building stateful, agentic applications!", metadata={'source': 'tweet'}
* [Similarity=0.52] "Wow! That was an amazing movie. I can't wait to see it again.", metadata={'source': 'tweet'}


In [12]:
query = "Stealing from the bank is a crime"

retriever=vector_store.as_retriever(
    search_type="mmr",
    search_kwargs={"k": 1},
)

retriever.invoke(input=query, filter={"source": "news"})

[Document(id='092b3e9d3ec946eeac989225dd97e0c5', metadata={'source': 'news'}, page_content='Robbers broke into the city bank and stole $1 million in cash.')]

In [13]:
query = "Stealing from the bank is a crime"

retriever = vector_store.as_retriever(
    search_type="similarity_score_threshold",
    search_kwargs={"k": 1, "score_threshold": 0.5},
)

retriever.invoke(input=query, filter={"source": "news"})

[Document(id='092b3e9d3ec946eeac989225dd97e0c5', metadata={'source': 'news'}, page_content='Robbers broke into the city bank and stole $1 million in cash.')]

## Building RAG Chain

In [14]:
system_prompt = ChatPromptTemplate.from_template("""Use the following context to answer the question. 
If you don't know the answer based on the context, say you don't know.
Provide specific details from the context to support your answer.

Context:
{context}

Question: {question}

Answer:""")

system_prompt

ChatPromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, template="Use the following context to answer the question. \nIf you don't know the answer based on the context, say you don't know.\nProvide specific details from the context to support your answer.\n\nContext:\n{context}\n\nQuestion: {question}\n\nAnswer:"), additional_kwargs={})])

In [15]:
def format_docs(documents):
    return "\n\n".join(document.page_content for document in documents)

In [16]:
rag_chain = ({ 
        "context": retriever | format_docs,
        "question": RunnablePassthrough()
    }
    | system_prompt
    | llm
    | StrOutputParser()
)

rag_chain

{
  context: VectorStoreRetriever(tags=['AstraDBVectorStore', 'OpenAIEmbeddings'], vectorstore=<langchain_astradb.vectorstores.AstraDBVectorStore object at 0x000002413E13BFE0>, search_type='similarity_score_threshold', search_kwargs={'k': 1, 'score_threshold': 0.5})
           | RunnableLambda(format_docs),
  question: RunnablePassthrough()
}
| ChatPromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, template="Use the following context to answer the question. \nIf you don't know the answer based on the context, say you don't know.\nProvide specific details from the context to support your answer.\n\nContext:\n{context}\n\nQuestion: {question}\n\nAnswer:"), additional_kwargs={})])
| ChatGroq(client=<groq.resources.chat.completions.Completions object at 0x000002413E10CEF0>, async_client=<groq.resources.chat.comp

In [17]:
query = "Stealing from the bank is a crime"

response= rag_chain.invoke(query)
response

'Yes. The context states that robbers “broke into the city bank and stole $1\u202fmillion in cash,” which is a clear example of robbery—a criminal act. Thus, stealing from a bank is indeed a crime.'

In [18]:
def query_rag(question):
    print(f"Question: {question}")
    print("-" * 50)
    
    answer = rag_chain.invoke(question)
    print(f"Answer: {answer}")
    
    # Get source documents separately if needed
    docs = retriever.get_relevant_documents(question)
    print("\nSource Documents:")
    for i, doc in enumerate(docs):
        print(f"\n--- Source {i+1} ---")
        print(doc.page_content[:200] + "...")

In [19]:
question = "Stealing from the bank is a crime"

query_rag(question=question)

Question: Stealing from the bank is a crime
--------------------------------------------------
Answer: Yes.  
The context describes robbers breaking into the city bank and stealing $1 million in cash, which is an act of theft and therefore a criminal offense.

Source Documents:

--- Source 1 ---
Robbers broke into the city bank and stole $1 million in cash....
