## Setup and Import Libraries

In [None]:
import os
from langchain.chat_models import init_chat_model
from langchain_openai import OpenAIEmbeddings
from langchain_core.vectorstores import InMemoryVectorStore
from langchain_core.documents import Document
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from dotenv import load_dotenv

import warnings
warnings.filterwarnings('ignore')

In [2]:
load_dotenv()

True

In [3]:
os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
os.environ["GROQ_API_KEY"] = os.getenv("GROQ_API_KEY")

In [4]:
embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
llm = init_chat_model(model="groq:openai/gpt-oss-20b")

## Creating InMemory Vector Store

In [5]:
vector_store = InMemoryVectorStore(embedding=embeddings)

In [6]:
documents = [
    Document(
        page_content="I had chocolate chip pancakes and scrambled eggs for breakfast this morning.",
        metadata={"source": "tweet"},
    ),

    Document(
        page_content="The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees.",
        metadata={"source": "news"},
    ),

    Document(
        page_content="Building an exciting new project with LangChain - come check it out!",
        metadata={"source": "tweet"},
    ),

    Document(
        page_content="Robbers broke into the city bank and stole $1 million in cash.",
        metadata={"source": "news"},
    ),

    Document(
        page_content="Wow! That was an amazing movie. I can't wait to see it again.",
        metadata={"source": "tweet"},
    ),

    Document(
        page_content="Is the new iPhone worth the price? Read this review to find out.",
        metadata={"source": "website"},
    ),

    Document(
        page_content="The top 10 soccer players in the world right now.",
        metadata={"source": "website"},
    ),

    Document(
        page_content="LangGraph is the best framework for building stateful, agentic applications!",
        metadata={"source": "tweet"},
    ),

    Document(
        page_content="The stock market is down 500 points today due to fears of a recession.",
        metadata={"source": "news"},
    ),

    Document(
        page_content="I have a bad feeling I am going to get deleted :(",
        metadata={"source": "tweet"},
    ),
]

documents

[Document(metadata={'source': 'tweet'}, page_content='I had chocolate chip pancakes and scrambled eggs for breakfast this morning.'),
 Document(metadata={'source': 'news'}, page_content='The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees.'),
 Document(metadata={'source': 'tweet'}, page_content='Building an exciting new project with LangChain - come check it out!'),
 Document(metadata={'source': 'news'}, page_content='Robbers broke into the city bank and stole $1 million in cash.'),
 Document(metadata={'source': 'tweet'}, page_content="Wow! That was an amazing movie. I can't wait to see it again."),
 Document(metadata={'source': 'website'}, page_content='Is the new iPhone worth the price? Read this review to find out.'),
 Document(metadata={'source': 'website'}, page_content='The top 10 soccer players in the world right now.'),
 Document(metadata={'source': 'tweet'}, page_content='LangGraph is the best framework for building stateful, agentic application

In [7]:
vector_store.add_documents(documents=documents)

['88f19987-04d7-4e32-90cb-388e787a6eaf',
 'c13dbeb9-7f5a-44d0-90db-1eae33c96b8f',
 'c0ca0253-53e3-4935-bb30-c81193669e2b',
 '93fa7038-052b-4c4d-ba41-41186c32feff',
 'accc42e7-9350-4a65-a350-8ce9f362eec2',
 '49e4e127-1a4d-44bc-9500-3fb91952942e',
 '19b457a5-a43a-4b23-9ddd-471257658225',
 'efa41ad0-7571-48c4-8532-4b23ca0e4ec4',
 '30aef707-f5cd-4fe1-a9b0-df6bb7e1b2b1',
 'f02819b1-fcf0-4b2b-9860-5862265ca60d']

In [8]:
query = "Hows the weather forecast"

vector_store.similarity_search(query=query)

[Document(id='c13dbeb9-7f5a-44d0-90db-1eae33c96b8f', metadata={'source': 'news'}, page_content='The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees.'),
 Document(id='f02819b1-fcf0-4b2b-9860-5862265ca60d', metadata={'source': 'tweet'}, page_content='I have a bad feeling I am going to get deleted :('),
 Document(id='30aef707-f5cd-4fe1-a9b0-df6bb7e1b2b1', metadata={'source': 'news'}, page_content='The stock market is down 500 points today due to fears of a recession.'),
 Document(id='88f19987-04d7-4e32-90cb-388e787a6eaf', metadata={'source': 'tweet'}, page_content='I had chocolate chip pancakes and scrambled eggs for breakfast this morning.')]

In [9]:
query = "Hows the weather forecast"

vector_store.similarity_search(query=query, k=2)

[Document(id='c13dbeb9-7f5a-44d0-90db-1eae33c96b8f', metadata={'source': 'news'}, page_content='The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees.'),
 Document(id='f02819b1-fcf0-4b2b-9860-5862265ca60d', metadata={'source': 'tweet'}, page_content='I have a bad feeling I am going to get deleted :(')]

In [10]:
retriever = vector_store.as_retriever(
    search_kwargs={"k":2}
)

retriever

VectorStoreRetriever(tags=['InMemoryVectorStore', 'OpenAIEmbeddings'], vectorstore=<langchain_core.vectorstores.in_memory.InMemoryVectorStore object at 0x0000023E5ABB3B60>, search_kwargs={'k': 2})

In [11]:
query = "Hows the weather forecast"

retriever.invoke(input=query)

[Document(id='c13dbeb9-7f5a-44d0-90db-1eae33c96b8f', metadata={'source': 'news'}, page_content='The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees.'),
 Document(id='f02819b1-fcf0-4b2b-9860-5862265ca60d', metadata={'source': 'tweet'}, page_content='I have a bad feeling I am going to get deleted :(')]

## Creating RAG Chain

In [None]:
system_prompt = ChatPromptTemplate.from_template("""Use the following context to answer the question. 
If you don't know the answer based on the context, say you don't know.
Provide specific details from the context to support your answer.

Context:
{context}

Question: {question}

Answer:""")

system_prompt

ChatPromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, template="Use the following context to answer the question. \nIf you don't know the answer based on the context, say you don't know.\nProvide specific details from the context to support your answer.\n\nContext:\n{context}\n\nQuestion: {question}\n\nAnswer:"), additional_kwargs={})])

In [13]:
def format_docs(documents):
    return "\n\n".join(document.page_content for document in documents)

In [None]:
rag_chain = ({ 
        "context": retriever | format_docs,
        "question": RunnablePassthrough()
    }
    | system_prompt
    | llm
    | StrOutputParser()
)

rag_chain

{
  context: VectorStoreRetriever(tags=['InMemoryVectorStore', 'OpenAIEmbeddings'], vectorstore=<langchain_core.vectorstores.in_memory.InMemoryVectorStore object at 0x0000023E5ABB3B60>, search_kwargs={'k': 2})
           | RunnableLambda(format_docs),
  question: RunnablePassthrough()
}
| ChatPromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, template="Use the following context to answer the question. \nIf you don't know the answer based on the context, say you don't know.\nProvide specific details from the context to support your answer.\n\nContext:\n{context}\n\nQuestion: {question}\n\nAnswer:"), additional_kwargs={})])
| ChatGroq(client=<groq.resources.chat.completions.Completions object at 0x0000023E5ADA47A0>, async_client=<groq.resources.chat.completions.AsyncCompletions object at 0x0000023E5ADE3E90>, m

In [15]:
query = "Hows the weather forecast"

response= rag_chain.invoke(query)
response

'The forecast for tomorrow is cloudy and overcast, with a high of 62\u202f°F.'

In [None]:
def query_rag(question):
    print(f"Question: {question}")
    print("-" * 50)
    
    answer = rag_chain.invoke(question)
    print(f"Answer: {answer}")
    
    # Get source documents separately if needed
    docs = retriever.get_relevant_documents(question)
    print("\nSource Documents:")
    for i, doc in enumerate(docs):
        print(f"\n--- Source {i+1} ---")
        print(doc.page_content[:200] + "...")

In [17]:
question = "Hows the weather forecast"

query_rag(question=question)

Question: Hows the weather forecast
--------------------------------------------------
Answer: The forecast says tomorrow will be **cloudy and overcast** with a **high of 62 °F**.

Source Documents:

--- Source 1 ---
The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees....

--- Source 2 ---
I have a bad feeling I am going to get deleted :(...
