In [1]:
from langchain.chat_models import init_chat_model
from langchain.agents import create_agent
from langchain.agents.middleware import wrap_model_call, ModelRequest
from typing import Callable, Literal
from langchain.messages import SystemMessage

from pydantic import BaseModel, Field


from dotenv import load_dotenv
import os




  from .autonotebook import tqdm as notebook_tqdm


In [2]:
load_dotenv()
os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY')

In [3]:
llm = init_chat_model(
    model="gpt-4o-mini",
    model_provider="openai",
    temperature=0
)

complexllm = init_chat_model(
    model="gpt-4o",
    model_provider="openai",
    temperature=1.2
)



In [4]:
class Judgement(BaseModel):
    """  
    Class is created to get judgement in structured way on user query from LLM 
    """

    decision: Literal["SIMPLE","COMPLEX"] = Field(description="Field will have value either Simple or Complex.")

In [None]:
@wrap_model_call
def modelcall(request: ModelRequest, handler: Callable) -> Callable:
    """  
        function is a judge that analyze the request and find if request is simple or complex.
        If Request is simple it use small and cheap model to handle the request.
        If request is complex it will use expensive model to handle the request.

    """

    msg = request.messages[-1].content

    prompt = f""" 
                You are a Smart Judge which analyze the query received from User. Decide if Query is Simple of Complex.
                If you found Query is simple Query return SIMPLE
                If you found Query is Complex Return COMPLEX

                Query: {msg}

                Example:
                What is Power BI?
                Output: SIMPLE
            """
    
    judge = llm.with_structured_output(Judgement)
    judgement = judge.invoke([SystemMessage(content=prompt)])
    print(judgement.decision)



    if judgement.decision == "COMPLEX":
        request.override(model=complexllm)
    else:
        request.override(model=llm)
    
    return handler(request)

In [6]:
reactAgent = create_agent(
    model= llm,
    middleware=[modelcall]
)

In [7]:
result = reactAgent.invoke({"messages" : "What is LLM ? "})
result['messages'][-1].response_metadata

SIMPLE


{'token_usage': {'completion_tokens': 284,
  'prompt_tokens': 13,
  'total_tokens': 297,
  'completion_tokens_details': {'accepted_prediction_tokens': 0,
   'audio_tokens': 0,
   'reasoning_tokens': 0,
   'rejected_prediction_tokens': 0},
  'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}},
 'model_provider': 'openai',
 'model_name': 'gpt-4o-mini-2024-07-18',
 'system_fingerprint': 'fp_c4585b5b9c',
 'id': 'chatcmpl-CoxaIAFxNo0b1gt3ZZxe5WLxMh3Db',
 'service_tier': 'default',
 'finish_reason': 'stop',
 'logprobs': None}

In [8]:
for item in result['messages']:
    item.pretty_print()


What is LLM ? 

LLM stands for "Large Language Model." It refers to a type of artificial intelligence model that is designed to understand and generate human language. These models are typically trained on vast amounts of text data and use deep learning techniques, particularly neural networks, to learn patterns, grammar, facts, and even some reasoning abilities from the data.

Key characteristics of LLMs include:

1. **Scale**: They are called "large" because they often have billions or even trillions of parameters, which are the weights and biases that the model learns during training.

2. **Versatility**: LLMs can perform a wide range of language-related tasks, such as text generation, translation, summarization, question answering, and more.

3. **Contextual Understanding**: They can generate coherent and contextually relevant text based on the input they receive, making them useful for applications like chatbots, content creation, and more.

4. **Pre-training and Fine-tuning**: L