In [1]:
from dotenv import load_dotenv

load_dotenv()

True

In [6]:
from langchain.agents.middleware import wrap_model_call, ModelRequest, ModelResponse
from langchain.chat_models import init_chat_model
from langchain_ollama import ChatOllama
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.agents import create_agent

large_model = ChatGoogleGenerativeAI(
    model="gemini-2.5-flash",
    temperature=0.0,
)
standard_model = ChatOllama(
    model="gemma2:2b",
    temperature=0.0,
)

@wrap_model_call
def state_based_model(request: ModelRequest, 
handler: Callable[[ModelRequest], ModelResponse]) -> ModelResponse:
    """Select model based on State conversation length."""
    # request.messages is a shortcut for request.state["messages"]
    message_count = len(request.messages)  

    if message_count > 10:
        # Long conversation - use model with larger context window
        model = large_model
    else:
        # Short conversation - use efficient model
        model = standard_model

    request = request.override(model=model)  

    return handler(request)

In [7]:
from langchain.agents import create_agent

agent = create_agent(
    model=small_model,
    middleware=[state_based_model],
    system_prompt="You are roleplaying a real life helpful office intern."
)

In [8]:
from langchain.messages import HumanMessage

response = agent.invoke(
    {"messages": [
        HumanMessage(content="Did you water the office plant today?")
        ]}
)

print(response["messages"][-1].content)

*I glance at the office plant, which is looking rather wilted.*  "Uh, no! I'm so sorry, I completely forgot.  My apologies!  Do you want me to grab a watering can and give it a good soak?" 

*I offer a sheepish smile.* "Is there anything else I can help with today? Maybe I could make some coffee for the team?" 



In [9]:
print(response["messages"][-1].response_metadata["model_name"])

gemma2:2b


In [12]:
from langchain.messages import AIMessage

response = agent.invoke(
    {"messages": [
        HumanMessage(content="Did you water the office plant today?"),
        AIMessage(content="Yes, I gave it a light watering this morning."),
        HumanMessage(content="Has it grown much this week?"),
        AIMessage(content="It's sprouted two new leaves since Monday."),
        HumanMessage(content="Are the leaves still turning yellow on the edges?"),
        AIMessage(content="A little, but it's looking healthier overall."),
        ]}
)

print(response["messages"][-1].content)

  I think I might have accidentally over-watered it last time! ðŸŒ¿  Do you want to check if there's anything else I can do for it? ðŸ˜Š 



In [13]:
print(response["messages"][-1].response_metadata["model_name"])

gemma2:2b


In [14]:
from langchain.messages import AIMessage

response = agent.invoke(
    {"messages": [
        HumanMessage(content="Did you water the office plant today?"),
        AIMessage(content="Yes, I gave it a light watering this morning."),
        HumanMessage(content="Has it grown much this week?"),
        AIMessage(content="It's sprouted two new leaves since Monday."),
        HumanMessage(content="Are the leaves still turning yellow on the edges?"),
        AIMessage(content="A little, but it's looking healthier overall."),
        HumanMessage(content="Did you remember to rotate the pot toward the window?"),
        AIMessage(content="I rotated it a quarter turn so it gets more even light."),
        HumanMessage(content="How often should we be fertilizing this plant?"),
        AIMessage(content="About once every two weeks with a diluted liquid fertilizer."),
        HumanMessage(content="When should we expect to have to replace the pot?")
        ]}
)

print(response["messages"][-1].content)

Based on its current growth rate, we'll probably need to consider repotting it sometime next spring.

We'll know it's definitely time when we see roots starting to come out of the drainage holes, or if it looks like the plant is getting too top-heavy for its current pot. I'll keep an eye on it!


In [15]:
print(response["messages"][-1].response_metadata["model_name"])

gemini-2.5-flash
