# LCEL and chains

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
from dotenv import load_dotenv

from langchain_core.prompts import ChatPromptTemplate, PromptTemplate, FewShotChatMessagePromptTemplate
from langchain_core.runnables import RunnableConfig
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain_mistralai import  ChatMistralAI
from src import utils, conf

# Params

In [3]:
conf_settings = conf.load(file="settings.yaml")
conf_settings

LLM_WORKHORSE = conf_settings.llm_workhorse
LLM_FLAGSHIP = conf_settings.llm_flagship
EMBEDDINGS = conf_settings.embeddings

# Environment Variables

In [5]:
load_dotenv()

OPENAI_API_KEY = os.environ["OPENAI_API_KEY"]

# What is a Langchain chain

It is a composition element that allow to build an structured pipeline to perform IA Generative tasks, specially (but not only) for RAGs


LAngchain chains are built (in version 1.x or above) using LCEL (LangChain Expression Language)

Its core principles are: composability, streaming, async, parallelism



## ChatPromptTemplate

In [6]:
lst_chat_hist = [
        ("system", "You are a helpful assistant."),
        ("user", "{question}")
]

prompt = ChatPromptTemplate.from_messages([
    ("system", "You are a helpful assistant."),
    ("user", "{question}")
])

prompt.invoke("Can you tell me the distance from the Earth to the Moon?")

ChatPromptValue(messages=[SystemMessage(content='You are a helpful assistant.', additional_kwargs={}, response_metadata={}), HumanMessage(content='Can you tell me the distance from the Earth to the Moon?', additional_kwargs={}, response_metadata={})])

In [7]:
prompt = ChatPromptTemplate.from_template("Tell me an interesting fact about {topic}")

prompt.format_prompt(topic="Devops")

ChatPromptValue(messages=[HumanMessage(content='Tell me an interesting fact about Devops', additional_kwargs={}, response_metadata={})])

In [8]:
prompt.invoke({"topic": "Devops"})

ChatPromptValue(messages=[HumanMessage(content='Tell me an interesting fact about Devops', additional_kwargs={}, response_metadata={})])

## FewShotChatMessagePromptTemplate

In [9]:
# 1. Define examples
examples = [  # input/output keys
    {"input": "Q: What is LangChain?", "output": "A: LangChain is a framework for building applications powered by large language models (LLMs)."},
    {"input": "Q: What is LCEL?", "output": "A: LCEL (LangChain Expression Language) is a way to build chains using composable operators like | for clarity and power."},
]

# 2. Create an example prompt template
example_prompt = ChatPromptTemplate.from_messages([
    ("human", "{input}"),
    ("ai", "{output}")
])

# 3. Few-shot wrapper
few_shot_prompt = FewShotChatMessagePromptTemplate(
    examples=examples,
    example_prompt=example_prompt,
)

# 4. Final prompt template (instructions + few-shots + new user question)
final_prompt = ChatPromptTemplate.from_messages([
    ("system", "You are a concise AI assistant. Answer clearly.\
     The answer style should be like the following examples:"),
    few_shot_prompt,
    ("human", "{question}")
])

In [10]:
example_prompt.invoke(examples[0])

ChatPromptValue(messages=[HumanMessage(content='Q: What is LangChain?', additional_kwargs={}, response_metadata={}), AIMessage(content='A: LangChain is a framework for building applications powered by large language models (LLMs).', additional_kwargs={}, response_metadata={})])

In [11]:
(few_shot_prompt
          .format_prompt() 
          .to_messages()
)

[HumanMessage(content='Q: What is LangChain?', additional_kwargs={}, response_metadata={}),
 AIMessage(content='A: LangChain is a framework for building applications powered by large language models (LLMs).', additional_kwargs={}, response_metadata={}),
 HumanMessage(content='Q: What is LCEL?', additional_kwargs={}, response_metadata={}),
 AIMessage(content='A: LCEL (LangChain Expression Language) is a way to build chains using composable operators like | for clarity and power.', additional_kwargs={}, response_metadata={})]

In [12]:
final_prompt.invoke("What is langgraph?").to_messages()

[SystemMessage(content='You are a concise AI assistant. Answer clearly.     The answer style should be like the following examples:', additional_kwargs={}, response_metadata={}),
 HumanMessage(content='Q: What is LangChain?', additional_kwargs={}, response_metadata={}),
 AIMessage(content='A: LangChain is a framework for building applications powered by large language models (LLMs).', additional_kwargs={}, response_metadata={}),
 HumanMessage(content='Q: What is LCEL?', additional_kwargs={}, response_metadata={}),
 AIMessage(content='A: LCEL (LangChain Expression Language) is a way to build chains using composable operators like | for clarity and power.', additional_kwargs={}, response_metadata={}),
 HumanMessage(content='What is langgraph?', additional_kwargs={}, response_metadata={})]

# LLMs

In [14]:
from openai import OpenAI

client_openai = OpenAI(
    api_key=OPENAI_API_KEY
)

response = client_openai.responses.create(
    model=LLM_WORKHORSE,
    input="Tell me a joke about devops",
    temperature=0.2,
    max_output_tokens=128,
)

print(response.output_text)


Sure! Here's a DevOps joke for you:

Why do DevOps engineers never get lost?

Because they always follow the *pipeline*! 😄


In [15]:
llm = ChatOpenAI(
    model=LLM_WORKHORSE,
    # temperature=0.2,
    max_tokens=128,
    )

# How to call the LLM?

In [16]:
llm2 = ChatMistralAI(model="mistral-medium-2508")

# How to call the LLM?

## Calling a Chain

**invoke (synchronous single input)**

* Runs the chain once, blocking until it finishes.
* Input = single dict or string (depending on your chain).
* Output = single result.

✅ Use when you just need one response and don’t care about concurrency.

In [18]:
result = llm.invoke("What is LangChain?")
print(result)

content='LangChain is a framework designed to simplify the development of applications that use large language models (LLMs). It provides tools and abstractions for building language model-powered apps that can integrate with various data sources, manage conversations, handle memory, execute chains of prompts, and connect to external APIs and knowledge bases.\n\nKey features of LangChain include:\n\n- **Prompt Management:** Helps structure and manage prompts for better and more consistent outputs from LLMs.\n- **Chains:** Allows developers to create sequences of calls to language models or other components, enabling complex workflows.\n- **Memory:** Maintains context across interactions, useful for chatbots and conversational agents' additional_kwargs={'refusal': None} response_metadata={'token_usage': {'completion_tokens': 128, 'prompt_tokens': 12, 'total_tokens': 140, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_pr

**ainvoke (asynchronous single input)**

* Async version of invoke.
* Returns a coroutine → you must await it (inside async def).
* Non-blocking → allows parallel I/O (important for web apps, APIs).

✅ Use when building async applications (FastAPI, Streamlit, etc.) or when you want multiple requests in parallel.

```python
import asyncio

async def main():
    result = await llm.ainvoke({"question": "What is LCEL?"})
    print(result)

asyncio.run(main())
```

**batch (synchronous multiple inputs)**

* Run the chain on a list of inputs (e.g., multiple questions).
* Executes them one by one under the hood (but can be parallelized with config).
* Returns a list of results in the same order.

✅ Use when you have a list of tasks and don’t need async.

In [20]:
questions = [
    "What is LangChain?",
    "What is LCEL?",
    "What is a vector database?"
]

results = llm.batch(questions,
                    config=RunnableConfig(max_concurrency=10),
                    )
for r in results:
    print(r)

content='LangChain is a framework designed to simplify the development of applications that use large language models (LLMs). It provides tools and abstractions to help developers build complex workflows by integrating LLMs with other components such as data sources, APIs, and external tools.\n\nKey features of LangChain include:\n\n- ** chaining components:** Easily link together calls to LLMs and other utilities in a defined sequence, enabling complex multi-step processes.\n- **Prompt management:** Tools to create, manage, and optimize prompts for better LLM performance.\n- **Memory:** Maintain state or context across interactions, useful for conversational agents.\n- **Integration with external data' additional_kwargs={'refusal': None} response_metadata={'token_usage': {'completion_tokens': 128, 'prompt_tokens': 12, 'total_tokens': 140, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_t

**There is also:**
* abatch → async version of batch.

**stream (synchronous streaming)** 
* Instead of waiting for the entire response, you get tokens/chunks as they arrive.
* Great for CLI apps or cases where you want immediate output.


```python
# Streaming call
for chunk in chain.stream({"question": "Explain LangChain Expression Language in simple terms."}):
    print(chunk, end="", flush=True)
```

Here, token by token results are returned as generated, and the application is blocked. It is usefull when developing a cli

** astream (asynchronous streaming) **
* Same as stream, but async-friendly.
* Perfect for web apps (FastAPI, Streamlit, etc.) where you want token-by-token output and not block the application.

```python 
import asyncio

async def main():
    async for chunk in chain.astream({"question": "Give me a short poem about LCEL."}):
        print(chunk, end="", flush=True)

    print("\n---\nDone!")

asyncio.run(main())
```


| Method    | Input       | Output style              | Use case                   |
| --------- | ----------- | ------------------------- | -------------------------- |
| `invoke`  | 1 input     | 1 final result            | Simple calls               |
| `ainvoke` | 1 input     | 1 final result            | Async apps                 |
| `batch`   | many inputs | list of results           | Bulk jobs                  |
| `abatch`  | many inputs | list of results           | Async bulk                 |
| `stream`  | 1 input     | generator of chunks       | CLI / sync streaming       |
| `astream` | 1 input     | async generator of chunks | Web apps / async streaming |


# Chaining:

* Chaining means linking multiple components (prompt templates, LLMs, output parsers, retrievers, tools, etc.) together into a pipeline.
* The pipe operator (|) is the heart of LCEL — it lets you compose these components like LEGO blocks.
* Each component is a Runnable (anything that can accept input and produce output).

In [21]:
prompt = ChatPromptTemplate.from_template("Tell me an interesting fact about {topic}")

llm = ChatOpenAI(
    model=LLM_WORKHORSE,
    temperature=0.2,
    max_tokens=128,
    )


chat = prompt | llm 

chat.invoke(input="Roman Empire")

AIMessage(content='An interesting fact about the Roman Empire is that it had an extensive and sophisticated network of roads—over 250,000 miles at its peak! These roads were so well constructed that some of them are still in use today. The phrase "All roads lead to Rome" comes from this impressive infrastructure, which helped the Romans efficiently manage their vast empire by facilitating trade, military movement, and communication.', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 79, 'prompt_tokens': 15, 'total_tokens': 94, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4.1-mini-2025-04-14', 'system_fingerprint': 'fp_a150906e27', 'id': 'chatcmpl-CEivPuKevKuh0oEZw5Pk5ZQQqYH1T', 'service_tier': 'default', 'finish_reason': 'stop', 'logprobs': None}, id='run--31c8265b-

In [22]:
chat.invoke(input={"topic": "Roman Empire"})

AIMessage(content='An interesting fact about the Roman Empire is that they built an extensive network of roads—over 250,000 miles at its peak! These roads were so well constructed that many of them are still in use today, forming the basis for modern European road systems. The famous phrase "All roads lead to Rome" reflects how central and connected the city was within this vast network.', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 75, 'prompt_tokens': 15, 'total_tokens': 90, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4.1-mini-2025-04-14', 'system_fingerprint': 'fp_a150906e27', 'id': 'chatcmpl-CEivR6zrUpPFyjVY8As6LYPvhKxR8', 'service_tier': 'default', 'finish_reason': 'stop', 'logprobs': None}, id='run--aa641c7b-33d9-4171-9ea8-267fb2889951-0', usage_metadata