## Setup and Import Libraries

In [None]:
import os
from langchain.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.chat_models import init_chat_model
from langchain.prompts import PromptTemplate
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains.retrieval import create_retrieval_chain
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnableMap
from dotenv import load_dotenv

import warnings
warnings.filterwarnings("ignore")

In [None]:
load_dotenv()


True

In [3]:
os.environ["GROQ_API_KEY"] = os.getenv("GROQ_API_KEY")
os.environ["HUGGINGFACE_API_KEY"] = os.getenv("HUGGINGFACE_API_KEY")
os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")

In [4]:
embeddings = HuggingFaceEmbeddings(model="all-MiniLM-L6-v2")
# llm = init_chat_model(model="groq:gemma2-9b-it")
llm = init_chat_model(model="openai:o4-mini")

## Document Loading and Splitting

In [5]:
loader = TextLoader(file_path="langchain_crewai_dataset.txt")
documents = loader.load()

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=300, chunk_overlap=50
)

chunks = text_splitter.split_documents(documents=documents)

chunks

[Document(metadata={'source': 'langchain_crewai_dataset.txt'}, page_content='LangChain is an open-source framework designed for developing applications powered by large language models (LLMs). It simplifies the process of building, managing, and scaling complex chains of thought by abstracting prompt management, retrieval, memory, and agent orchestration. Developers can use'),
 Document(metadata={'source': 'langchain_crewai_dataset.txt'}, page_content='and agent orchestration. Developers can use LangChain to create end-to-end pipelines that connect LLMs with tools, APIs, vector databases, and other knowledge sources. (v1)'),
 Document(metadata={'source': 'langchain_crewai_dataset.txt'}, page_content='At the heart of LangChain lies the concept of chains, which are sequences of calls to LLMs and other tools. Chains can be simple, such as a single prompt fed to an LLM, or complex, involving multiple conditionally executed steps. LangChain makes it easy to compose and reuse chains using st

## Create Vector Store and Retriever

In [6]:
vector_store = FAISS.from_documents(
    documents=chunks, 
    embedding=embeddings
)

retriever = vector_store.as_retriever(
    search_type="mmr",
    search_kwargs={"k":5}
)

retriever

VectorStoreRetriever(tags=['FAISS', 'HuggingFaceEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x0000027E4A9A1E80>, search_type='mmr', search_kwargs={'k': 5})

## Query Expansion Prompt and Chain

In [7]:
query_expansion_prompt = PromptTemplate.from_template("""
You are a helpful assistant. Expand the following query to improve document retrieval 
by adding relevant synonyms, technical terms, and useful context.

Original query: "{query}"

Expanded query:
""")

query_expansion_prompt

PromptTemplate(input_variables=['query'], input_types={}, partial_variables={}, template='\nYou are a helpful assistant. Expand the following query to improve document retrieval \nby adding relevant synonyms, technical terms, and useful context.\n\nOriginal query: "{query}"\n\nExpanded query:\n')

In [8]:
query_expansion_chain = query_expansion_prompt | llm | StrOutputParser()

query_expansion_chain

PromptTemplate(input_variables=['query'], input_types={}, partial_variables={}, template='\nYou are a helpful assistant. Expand the following query to improve document retrieval \nby adding relevant synonyms, technical terms, and useful context.\n\nOriginal query: "{query}"\n\nExpanded query:\n')
| ChatOpenAI(client=<openai.resources.chat.completions.completions.Completions object at 0x0000027E47B1EEA0>, async_client=<openai.resources.chat.completions.completions.AsyncCompletions object at 0x0000027E4822B260>, root_client=<openai.OpenAI object at 0x0000027E47160F20>, root_async_client=<openai.AsyncOpenAI object at 0x0000027E47C41340>, model_name='o4-mini', model_kwargs={}, openai_api_key=SecretStr('**********'))
| StrOutputParser()

In [9]:
query = "Langchain memory"

query_expansion_chain.invoke({"query":query})

'Expanded query:  \n“LangChain memory” OR “LangChain memory management” OR “LangChain memory modules” OR “LangChain memory primitives” OR “conversation memory” OR “conversation buffer memory” OR “persistent memory backend” OR “ephemeral memory” OR “long-term memory” OR “memory retrieval” OR “vector-based memory store” OR “RAG (retrieval-augmented generation) pipelines” OR “context window management” OR “memory chain components” OR “LLM memory augmentation” OR “Python LangChain framework” OR “conversational AI memory” OR “chatbot memory strategies” OR “BaseMemory” OR “ConversationBufferMemory” OR “ConversationSummaryMemory” OR “memory backends Redis Chroma Qdrant Weaviate ElasticSearch SQL”'

## RAG Prompt and Chain

In [10]:
final_prompt = PromptTemplate.from_template("""
Answer the question based on the context below.

Context:
{context}

Question: {input}
""")

final_prompt

PromptTemplate(input_variables=['context', 'input'], input_types={}, partial_variables={}, template='\nAnswer the question based on the context below.\n\nContext:\n{context}\n\nQuestion: {input}\n')

In [11]:
document_chain = create_stuff_documents_chain(
    llm=llm,
    prompt=final_prompt
)

document_chain

RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
  context: RunnableLambda(format_docs)
}), kwargs={}, config={'run_name': 'format_inputs'}, config_factories=[])
| PromptTemplate(input_variables=['context', 'input'], input_types={}, partial_variables={}, template='\nAnswer the question based on the context below.\n\nContext:\n{context}\n\nQuestion: {input}\n')
| ChatOpenAI(client=<openai.resources.chat.completions.completions.Completions object at 0x0000027E47B1EEA0>, async_client=<openai.resources.chat.completions.completions.AsyncCompletions object at 0x0000027E4822B260>, root_client=<openai.OpenAI object at 0x0000027E47160F20>, root_async_client=<openai.AsyncOpenAI object at 0x0000027E47C41340>, model_name='o4-mini', model_kwargs={}, openai_api_key=SecretStr('**********'))
| StrOutputParser(), kwargs={}, config={'run_name': 'stuff_documents_chain'}, config_factories=[])

## Full RAG pipeline with query expansion

In [12]:
rag_pipeline = (
    RunnableMap(
        {
            "input": lambda x: x["input"],
            "context": lambda x: retriever.invoke(query_expansion_chain.invoke({"query": x["input"]}))
        }
    )
    | document_chain
)

rag_pipeline

{
  input: RunnableLambda(...),
  context: RunnableLambda(...)
}
| RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
    context: RunnableLambda(format_docs)
  }), kwargs={}, config={'run_name': 'format_inputs'}, config_factories=[])
  | PromptTemplate(input_variables=['context', 'input'], input_types={}, partial_variables={}, template='\nAnswer the question based on the context below.\n\nContext:\n{context}\n\nQuestion: {input}\n')
  | ChatOpenAI(client=<openai.resources.chat.completions.completions.Completions object at 0x0000027E47B1EEA0>, async_client=<openai.resources.chat.completions.completions.AsyncCompletions object at 0x0000027E4822B260>, root_client=<openai.OpenAI object at 0x0000027E47160F20>, root_async_client=<openai.AsyncOpenAI object at 0x0000027E47C41340>, model_name='o4-mini', model_kwargs={}, openai_api_key=SecretStr('**********'))
  | StrOutputParser(), kwargs={}, config={'run_name': 'stuff_documents_chain'}, config_factories=[])

## Run Query

In [13]:
query = "What types of memory does LangChain support?"

print(query_expansion_chain.invoke({"query": {"input":query}}))

Expanded query:

“LangChain memory support” OR “LangChain memory modules” OR “LangChain memory backends” OR “memory strategies in LangChain” OR “session memory management” OR “state management in LangChain” OR “conversation buffer memory” OR “chat memory” OR “short-term memory” OR “long-term memory” OR “ephemeral memory” OR “persistent memory” OR “summary memory” OR “vector memory” OR “external vectorstore” OR “RedisMemory” OR “MongoDBMemory” OR “SQLDatabaseMemory” OR “memory store” OR “memory architecture for LLM agents”


In [15]:
response = rag_pipeline.invoke(input={"input":query})

print("✅ Answer:\n", response)

✅ Answer:
 LangChain’s built-in conversational memories fall into two main categories:

  1. ConversationBufferMemory  
     – Keeps the full dialogue history in memory so the LLM can “see” all prior turns.  

  2. ConversationSummaryMemory  
     – Periodically summarizes the chat history into a concise form, letting you preserve context while staying within token limits.


In [16]:
query = "CrewAI agents?"

print(query_expansion_chain.invoke({"query": {"input":query}}))

Expanded query:

("CrewAI" OR "Crew AI" OR CrewAI-based) AND (agent OR agents OR “autonomous agent” OR “intelligent agent” OR “multi-agent system” OR “agent-based modeling” OR “collaborative AI” OR “AI-driven crew member”)  
AND (“task allocation” OR orchestration OR scheduling OR “workflow automation” OR “decision support” OR “real-time coordination” OR “mission planning”)  
AND (“reinforcement learning” OR “multi-agent reinforcement learning” OR “deep learning” OR “machine learning” OR “neural network” OR RPA OR “robotic process automation”)  
AND (“human-AI teaming” OR “human-machine collaboration” OR “workforce augmentation” OR “crew management” OR “crew scheduling” OR “crew operations”)  
AND (maritime OR aerospace OR aviation OR space OR logistics OR manufacturing OR warehousing OR “mission-critical operations”)  

Key synonyms & terms added:  
• autonomous agents, intelligent agents, multi-agent systems  
• agent-based modeling, collaborative robots (cobots)  
• reinforcement le

In [17]:
response = rag_pipeline.invoke(input={"input":query})

print("✅ Answer:\n", response)

✅ Answer:
 CrewAI agents are purpose‐driven autonomous workers that collaborate in a structured workflow.  Each agent is defined by:  
• A specific role (e.g., researcher, planner, executor)  
• A clear purpose and goal within the overall mission  
• A tailored toolset they may draw on to perform their tasks  

They operate semi-independently—staying on task, using their tools, and coordinating with fellow agents—so that the crew as a whole can efficiently reach its objective.
