In [1]:
#%pip install -qU langchain langchain-ollama langchain-text-splitters

### RAG Pipeline with langchain_ollama

In [3]:
from langchain_ollama import ChatOllama
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.vectorstores import InMemoryVectorStore
from langchain_core.embeddings import FakeEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter

In [4]:
# Step 1: Initialize Ollama Chat Model (NEW IMPORT)
llm = ChatOllama(
    model="qwen2.5:7b",
    temperature=0.7,
)

In [5]:
# Step 2: Prepare Documents
documents = [
    "LangChain is a framework for building applications with LLMs.",
    "It provides tools for agents, memory, and retrieval-augmented generation.",
    "Ollama lets you run open-source models locally on your machine.",
    "Python is a versatile programming language for AI and data science.",
]

In [6]:
# Step 3: Split Documents
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=100,
    chunk_overlap=20,
)
chunks = text_splitter.split_text("\n".join(documents))

In [7]:
# Step 4: Create Vector Store
embeddings = FakeEmbeddings(size=256, model_name="fake-embedding")
vectorstore = InMemoryVectorStore.from_texts(chunks, embeddings)
retriever = vectorstore.as_retriever(k=2)

In [8]:
# Step 5: Define RAG Prompt
prompt = ChatPromptTemplate.from_messages([
    ("system", "You are a helpful assistant. Answer based on the context.\n\nContext: {context}"),
    ("human", "{query}"),
])

In [9]:
# Step 6: Simple RAG Function
def retrieve_and_respond(query: str) -> str:
    """Retrieve docs → format prompt → invoke LLM"""
    context_docs = retriever.invoke(query)
    context = "\n".join([doc.page_content for doc in context_docs])
    
    messages = prompt.format_messages(context=context, query=query)
    response = llm.invoke(messages)
    return response.content

In [10]:
# Step 7: Multi-turn Conversation
class ConversationPipeline:
    def __init__(self, llm, retriever, prompt):
        self.llm = llm
        self.retriever = retriever
        self.prompt = prompt
        self.chat_history = []
    
    def chat(self, user_query: str) -> str:
        """Handle multi-turn conversation"""
        context_docs = self.retriever.invoke(user_query)
        context = "\n".join([doc.page_content for doc in context_docs])
        
        # Format messages with context and conversation history
        messages = prompt.format_messages(context=context, query=user_query)
        
        response = self.llm.invoke(messages)
        response_text = response.content
        
        # Store in history
        self.chat_history.append({"user": user_query, "assistant": response_text})
        
        return response_text

In [11]:
# Step 8: Run Pipeline
if __name__ == "__main__":
    print("=== Single-turn RAG ===")
    result = retrieve_and_respond("What is LangChain?")
    print(f"Response: {result}\n")
    
    print("=== Multi-turn Conversation ===")
    conversation = ConversationPipeline(llm, retriever, prompt)
    
    queries = [
        "What is LangChain?",
        "Can I use it with Python?",
        "How does it relate to Ollama?"
    ]
    
    for query in queries:
        response = conversation.chat(query)
        print(f"User: {query}")
        print(f"Assistant: {response}\n")

=== Single-turn RAG ===
Response: LangChain is a framework designed to help build applications that utilize large language models (LLMs). It provides tools, including support for agents, memory systems, and retrieval-augmented generation techniques, which can enhance the functionality and utility of LLMs in various application scenarios. This framework aims to make it easier for developers to integrate advanced natural language processing capabilities into their projects.

=== Multi-turn Conversation ===
User: What is LangChain?
Assistant: LangChain is a framework designed to help developers build applications that utilize large language models (LLMs). It provides several key tools:

1. **Agents**: These are like assistants or automations within your application that can interact with LLMs.
2. **Memory**: This component helps maintain context and history, allowing the system to have more meaningful interactions over time.
3. **Retrieval-Augmented Generation (RAG)**: This technique comb

### With Tools (Function Calling)

In [12]:
from langchain_core.tools import tool
from langchain_ollama import ChatOllama
from langchain_core.messages import HumanMessage

@tool  # Docstring is mandatory as it becomes the tool description given to the LLM
def multiply(a: int, b: int) -> int:
    """Multiply two integers and return the result."""
    return a * b

llm_with_tools = llm.bind_tools([multiply])

# Ask model
ai_msg = llm_with_tools.invoke([HumanMessage("What is 5 times 3?")])

# Execute tool
tool_call = ai_msg.tool_calls[0]
result = multiply.invoke(tool_call["args"])

print("Tool result:", result)


Tool result: 15


### Streaming

In [14]:
print("=== Streaming ===")

for chunk in llm.stream("Tell me about LangChain"):
    print(chunk.content, end="", flush=True)
print()

=== Streaming ===
LangChain is an open-source library for building and running language models in production. It was created with the goal of making it easier to integrate large pre-trained language models into various applications. Here's a brief overview of what LangChain involves:

### Key Features

1. **Modular Design**: LangChain provides a modular approach, allowing developers to easily plug in different components such as model servers, tokenizers, and data preprocessors.
2. **Integration with Models**: It supports integration with popular large language models (LLMs) like Anthropic CLaude, LLaMA, MPT, and others through various APIs and connectors.
3. **Versatility**: LangChain can be used for a wide range of applications, including chatbots, text summarization, code generation, and more.
4. **Production Readiness**: The library is designed to handle production-scale deployments with features like rate limiting, model versioning, and logging.

### Usage

Here's a basic example 