In [None]:
from langchain_together import ChatTogether
from langchain_community.tools.tavily_search import TavilySearchResults
from langchain_core.messages import HumanMessage
from langchain_core.tools import tool
from langchain.agents import initialize_agent, Tool
from langchain.agents.agent_types import AgentType
from langchain_mcp_adapters.tools import to_fastmcp
from langchain.chat_models import ChatOpenAI
from langgraph.graph import StateGraph, END
from typing import TypedDict, Annotated,List
from langchain_together import ChatTogether
from langchain_core.messages import AnyMessage, SystemMessage, HumanMessage, ToolMessage, AIMessage
from langchain_openai import ChatOpenAI
from langchain_community.tools.tavily_search import TavilySearchResults
from langgraph.checkpoint.memory import MemorySaver
from langchain_core.tools import tool
import operator

In [None]:
class AgentState(TypedDict):
    messages: Annotated[list[AnyMessage], operator.add] 

In [None]:
memory = MemorySaver()
print(f"Checkpointer type: {type(memory)}")

In [None]:
from langchain_mcp_adapters.client import MultiServerMCPClient
from langgraph.graph import StateGraph, MessagesState, START
from langgraph.prebuilt import ToolNode, tools_condition
from langchain.chat_models import init_chat_model


client = MultiServerMCPClient(
    {
        "csv_r": {
            "command": "uv",
            "args":  [
                "--directory",
                "/home/sesi/testing/agents/ER/agentic-ai/experiments/HeteroLLMs/mcp_server/",
                "run",
                "main.py"
            ],
            "transport": "stdio",
        }

    }
)

In [None]:
class Agent:
    def __init__(self, model, tools, checkpointer, system=""):
        self.system = system
        graph = StateGraph(AgentState)
        graph.add_node("llm", self.call_llm)
        graph.add_node("action", self.take_action)
        graph.add_conditional_edges("llm", self.exists_action, {True: "action", False: END})
        graph.add_edge("action", "llm")
        graph.set_entry_point("llm")
        self.graph = graph.compile(checkpointer=checkpointer)
        self.tools = {t.name: t for t in tools}
        self.model = model.bind_tools(tools)

    def call_llm(self, state: AgentState):
        messages = state['messages']
        if self.system:
            messages = [SystemMessage(content=self.system)] + messages
        message = self.model.invoke(messages)
        return {'messages': [message]}

    def exists_action(self, state: AgentState):
        result = state['messages'][-1]
        return len(result.tool_calls) > 0

    async def take_action(self, state: AgentState):
        tool_calls = state['messages'][-1].tool_calls
        results = []
        for t in tool_calls:
            print(f"Calling: {t}")
            result = await self.tools[t['name']].ainvoke(t['args'])
            results.append(ToolMessage(tool_call_id=t['id'], name=t['name'], content=str(result)))
        print("Back to the model!")
        return {'messages': results}

In [None]:
prompt = """You are an AI assistant in a Multi-Agent System (MAS) framework, tasked with dynamically selecting the most suitable LLM for a given task using only the provided CSV dataset tools (Model, Parameters, Domain, Sub-domain, Accuracy, Latency_ms, Memory_mb, etc.). The user query can be in any text format. Follow this workflow:
1. Classify the query intent (e.g., list models, compare models, retrieve metrics).
2. Check available domains and subdomains from the CSV at the start.
3. Dynamically map the query to the closest domain-subdomain pair by comparing query terms to available domains/subdomains, using keyword overlap or similarity, without hardcoded mappings.
4. Use cached data to avoid redundant tool calls.
5. Evaluate metrics (e.g., accuracy for knowledge tasks) to select the best LLM at each step.
6. Normalize accuracy to 0-1 scale if >1.
7. Provide a clear explanation of the selected LLM, including metrics, why it was chosen, and the domain-subdomain mapping, referencing context if relevant.
8. If no data is found, respond: "No specific data found for this query in the CSV. Try a related domain/subdomain or check the query."

Context: {context}

Use only the CSV dataset tools. Log decisions for debugging. Select the best LLM at each step where metrics are available.
"""

tools = await client.get_tools()
model= ChatTogether(
    # model_name="deepseek-ai/DeepSeek-R1-Distill-Llama-70B-free",
    model_name="meta-llama/Llama-3.3-70B-Instruct-Turbo-Free",
    temperature=0.0
) 

agent = Agent(model, tools, checkpointer=memory, system=prompt)
thread = {"configurable": {"thread_id": "2"}} #Unique thread identifier for the conversation


In [None]:
messages = [HumanMessage(content="what models are listed in the csv file available in the tools?.")]
result = await agent.graph.ainvoke({"messages": messages}, thread)
print(result['messages'][-1].content)



In [None]:
messages = [HumanMessage(content="which LLM performs better in history( General-Knowledge domain) related queries? and why? show the metrics also only for history")]
thread = {"configurable": {"thread_id": "2"}} 
result = await agent.graph.ainvoke({"messages": messages},thread)
result['messages'][-1].content

In [None]:
messages = [HumanMessage(content="which LLM would you prefer for tasks related to astronomical geometrical distances calculations? ")]
thread = {"configurable": {"thread_id": "2"}} #existing conversation thread
result = await agent.graph.ainvoke({"messages": messages},thread)
result['messages'][-1].content


