# v1

In [50]:
# open yaml
import yaml
import os

def load_yaml(file_path):
    if not os.path.exists(file_path):
        print(f"Error: The file {file_path} does not exist.")
    try:
        with open(file_path, 'r') as file:
            data = yaml.safe_load(file)
            return data
    except yaml.YAMLError as e:
        print(f"Error parsing YAML file: {e}")

In [51]:
data = load_yaml('agents_behaviour.yaml')
# data

In [52]:
# data[0]

In [53]:
# extract the agent's name and description
def extract_agent_info(agent):
    name = agent.get('name', 'Unknown')
    description = agent.get('description', 'No description provided')
    return name, description

# Extract information for each agent
agent_info = []
for agent in data:
    name, description = extract_agent_info(agent)
    agent_info.append({'name': name, 'description': description})
# Display the extracted information
# for info in agent_info:
#     print(f"Agent Name: {info['name']}")
#     print(f"Description: {info['description']}\n")

In [54]:

tools_details = []

# list all the tools used by the agents
def extract_tools(agent):
    return agent.get('tools', [])

# Extract tools for each agent
for agent in data:
    tools = extract_tools(agent)
    # print(tools)
    for tool in tools:
        # print(tool['name'], tool['description'])
        tools_details.append({'name': tool['name'], 'description': tool['description']})

# for tool in tools_details:
#     print(f"Tool Name: {tool['name']}")
#     print(f"Description: {tool['description']}\n")

In [55]:
from apicall import get_embedding

In [56]:
agent_embeddings = []
# Generate embeddings for each agent's description
for agent in agent_info:
    embedding = get_embedding(f"Name:{agent['name']}.\n Description: {agent['description']}")
    agent_embeddings.append({'name': agent['name'], 'embedding': embedding})

In [57]:
# agent_embeddings

In [58]:
from tools import CosineSimilarityCalculator
query = "A fat man on the rat, emojify this sentence"

query_embedding = get_embedding(query)
# Find the most relevant agent based on the query embedding and return the agent name and simillarity
def find_relevant_agent(query_embedding, agent_embeddings):
    sim_score = []
    for agent in agent_embeddings:
        embedding_agent = agent['embedding']
        # print(embedding_agent.shape, query_embedding.shape)
        similarity = CosineSimilarityCalculator.calculate_similarity(embedding1 = query_embedding, embedding2 = embedding_agent)
        sim_score.append({'name': agent['name'], 'similarity': similarity})
    # Sort agents by similarity score in descending order
    sim_score.sort(key=lambda x: x['similarity'], reverse=True)
    return sim_score
relevant_agent = find_relevant_agent(query_embedding, agent_embeddings)
# print(f"Most relevant agent for the query '{query}':")
relevant_agent

[{'name': 'EmojiWriter', 'similarity': np.float64(0.2870189736889936)},
 {'name': 'TranslatorAgent', 'similarity': np.float64(0.1250715121941039)},
 {'name': 'TextSummarizer', 'similarity': np.float64(0.10902902061560518)},
 {'name': 'CSVDataAgent', 'similarity': np.float64(0.0632980593235556)},
 {'name': 'PDFContentWriter', 'similarity': np.float64(0.05371273767444446)}]

In [59]:
# relevant_agent[0]['name']

In [60]:
# list the tools for a agent
def list_tools_for_agent(agent_name, data):
    for agent in data:
        if agent['name'] == agent_name:
            return agent.get('tools', [])
    return []
# Get tools for the most relevant agent
tools_for_relevant_agent = list_tools_for_agent(relevant_agent[0]['name'], data)
# Display the tools for the most relevant agent
print(f"Tools for the most relevant agent '{relevant_agent[0]['name']}':")
for tool in tools_for_relevant_agent:
    print(f"Tool Name: {tool['name']}")
    print(f"Description: {tool['description']}\n")

Tools for the most relevant agent 'EmojiWriter':
Tool Name: EmojiTranslator
Description: Translates words and concepts to relevant emojis

Tool Name: EmojiMixer
Description: Creates custom emoji combinations for content



In [61]:
from apicall import get_reply

message = [
    {
        "role": "system",
        "content": f"You have to find the best sequence for list of tool to complete the task. List of tools are {tools_for_relevant_agent}"
    },
    {
        "role": "user",
        "content": "I want to extract the text from pdf and refactor the text"
    }
]

from pydantic import BaseModel

class ToolsSeqFinder(BaseModel):
    tools_name_in_seq: list[str]


tools_order = get_reply(message,ToolsSeqFinder )
tools_order


ToolsSeqFinder(tools_name_in_seq=['EmojiTranslator', 'EmojiMixer'])

In [62]:
# print the input schema for the tools function
import inspect
from tools import TextExtractor, ContentReformatter, EmojiTranslator,EmojiMixer

def print_input_schema(func):
    signature = inspect.signature(func)
    # print(f"Input schema for {func.__name__}:")
    function_dict = {}
    for param in signature.parameters.values():
        # print(f"{param.name}: {param.annotation}")
        function_dict[param.name] = str(param.annotation)
    return function_dict
func_dict = print_input_schema(EmojiTranslator.translate_to_emoji)
func_dict

{'text': "<class 'str'>"}

In [None]:

# ask the user to input the tools required to complete the task
message = [
    {
        "role": "system",
        "content": f"You have to ask for the details of the tools required to complete the task. The tools required are {func_dict}"
    },
    {
        "role": "user",
        "content": "I want to extract the text from pdf and summarize the text."
    }
]

class ToolsInput(BaseModel):
    information_tillnow: str
    all_information_gathered: bool
    flow_of_question: str

history = ''

# set False for message['all_information_gathered'] to continue the flow of question
reply = {}
reply['all_information_gathered'] = False

while True:
    # Get the reply from the user
    reply = get_reply(message, ToolsInput)
    print(reply)
    # load as json
    reply = reply.dict()
    if reply['all_information_gathered']:
        break
    else:
        # Update the message with the flow of question
        history = history + reply['flow_of_question'] + '\n'
        message[0]['content'] = f"You have to ask for the details required to complete the task. The tools required are {func_dict}. History of questions: {history}"
        query = input("Please provide the details for the tool: ")
        message[1]['content'] = query

# Print the final details gathered for the tools
print("Final details gathered for the tools:")
print(f"Required Details: {reply['information_tillnow']}")
print(f"All Information Gathered: {reply['all_information_gathered']}")

/tmp/ipykernel_4469/3942844028.py:29: PydanticDeprecatedSince20: The `dict` method is deprecated; use `model_dump` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.11/migration/
  reply = reply.dict()


information_tillnow='The user needs to extract and summarize text from a PDF file.' all_information_gathered=False flow_of_question='Ask about the tools or software the user plans to use for extracting and summarizing the text from the PDF.'
information_tillnow='' all_information_gathered=False flow_of_question='What is the main topic or content of the PDF? This helps in understanding what kind of summary is expected.'


In [None]:
# convert user input to the function input
message = [
    {
        "role": "system",
        "content": f"You have to convert the user input to the function input. The function input is {func_dict}"
    },
    {
        "role": "user",
        "content": f"Convert the user input to the function input as json. The user input is {reply['information_tillnow']} return it as a json object."
    }
]

class FunctionInput(BaseModel):
    function_input: str

function_input = get_reply(message, FunctionInput)
# Print the function input
# print("Function Input:")
# print(function_input.function_input)


In [None]:
text = TextExtractor.extract_text_from_pdf(pdf_path=function_input.function_input)
# Print the extracted text
# print("Extracted Text:")
# print(text)

Successfully extracted text from resume.pdf


In [None]:
tools_order

ToolsSeqFinder(tools_name_in_seq=['TextExtractor', 'ContentReformatter'])

In [None]:
func_dict = print_input_schema(ContentReformatter.reformat_content)
func_dict

{'text': "<class 'str'>", 'format_description': "<class 'str'>"}

In [None]:

# ask the user to input the tools required to complete the task
message = [
    {
        "role": "system",
        "content": f"You have to ask for the details of the tools required to complete the task. The tools required are {func_dict}"
    },
    {
        "role": "user",
        "content": f"I want to extract the text from pdf and reformat the text. The text is already extracted. And the text is {text}"
    }
]

class ToolsInput(BaseModel):
    information_tillnow: str
    all_information_gathered: bool
    flow_of_question: str

history = ''

# set False for message['all_information_gathered'] to continue the flow of question
reply = {}
reply['all_information_gathered'] = False

while True:
    # Get the reply from the user
    reply = get_reply(message, ToolsInput)
    print(reply)
    # load as json
    reply = reply.dict()
    if reply['all_information_gathered']:
        break
    else:
        # Update the message with the flow of question
        history = history + reply['flow_of_question'] + '\n'
        message[0]['content'] = f"You have to ask for the details required to complete the task. The tools required are {func_dict}. History of questions: {history}. The text is already extracted. And the text is {text}"
        query = input("Please provide the details for the tool. The tools required are {func_dict} ")
        message[1]['content'] = query

# Print the final details gathered for the tools
# print("Final details gathered for the tools:")
# print(f"Required Details: {reply['information_tillnow']}")
# print(f"All Information Gathered: {reply['all_information_gathered']}")

/tmp/ipykernel_59356/1840275156.py:29: PydanticDeprecatedSince20: The `dict` method is deprecated; use `model_dump` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.11/migration/
  reply = reply.dict()


information_tillnow='Text has been provided and needs to be reformatted.' all_information_gathered=False flow_of_question='Do you need a specific format for the reformatted text?'
information_tillnow='The user appears to have provided a detailed text including personal information, professional summary, skills, education, work experience, publications, projects, honors and awards, and social experience.' all_information_gathered=False flow_of_question='What specific markdown format or structure do you require for presenting this information?'
information_tillnow='The user needs a markdown format for the provided text.' all_information_gathered=False flow_of_question='What specific sections or headings should the markdown document contain? Do you want any particular styling or features such as bullet points, tables, or links included?'
information_tillnow='- Extracted text contains personal information, summary, core competencies, education details, skills, experience, publications, pro

In [None]:
# convert user input to the function input
message = [
    {
        "role": "system",
        "content": f"You have to convert the user input to the function input. The function input is {func_dict} and the text is {text}"
    },
    {
        "role": "user",
        "content": f"Convert the user input to the function input as json. The user input is {reply['information_tillnow']} return it as a json object."
    }
]

class FunctionInput(BaseModel):
    function_input: str

function_input = get_reply(message, FunctionInput)
# Print the function input
print("Function Input:")
print(function_input.function_input)

Function Input:
Sumit Yadav
Email: rockerritesh4@gmail.com
Portfolio: sumityadav.com.np
Mobile:
+977-9819856148
Github:
github.com/rockerritesh
LinkedIn:
linkedin.com/in/rockerritesh
Summary
AI Engineer specializing in natural language processing (NLP) and AI optimization, with 5+ years of
experience developing production-grade AI systems. Currently architecting:
• Multi-agent RAG systems with guardrails for secure information retrieval.
• Context-aware chatbots with post-conversation analysis capabilities.
• LLM evaluation frameworks for accuracy and reliability testing.
• MCP Server for easy and fast way to integrate Agents.
Proven track record in AI/Machine Learning engineering across many NLP projects including maithili text
classification(low-resources) (0.87 accuracy) and multilingual document analysis systems. Authored 3 peer-reviewed
publications and one open review paper on machine learning optimization/security and low-resource language
processing
Core Competencies:
• AI Prom

# v2

In [14]:
import yaml
import os
import inspect
from pydantic import BaseModel
from typing import List, Dict, Any, Optional, Union
import numpy as np

# Import custom modules
from apicall import get_embedding, get_reply, Reply
from tools import (
    DataframeLoader, ContentExtractor, EmojiTranslator, EmojiMixer,
    KeypointExtractor, ContentExpander, TextExtractor, ContentReformatter,
    MultilingualTranslator, CosineSimilarityCalculator,StylePreserver
)

In [15]:
# Function to load YAML configuration file
def load_yaml(file_path):
    """Load and parse a YAML file."""
    if not os.path.exists(file_path):
        print(f"Error: The file {file_path} does not exist.")
        return None
    try:
        with open(file_path, 'r') as file:
            data = yaml.safe_load(file)
            return data
    except yaml.YAMLError as e:
        print(f"Error parsing YAML file: {e}")
        return None

In [16]:
# Load the agent configuration from YAML
agents_config = load_yaml('agents_behaviour.yaml')

In [17]:
# Extract agent information
class AgentInfo(BaseModel):
    """Model for storing agent information."""
    name: str
    description: str
    id: str
    tools: List[Dict[str, Any]]
    parameters: Optional[Dict[str, Any]] = None
    embedding: Optional[Any] = None

def extract_agent_info(agents_data):
    """Extract detailed information for each agent."""
    agents = []
    for agent_data in agents_data:
        agent = AgentInfo(
            name=agent_data.get('name', 'Unknown'),
            description=agent_data.get('description', 'No description provided'),
            id=agent_data.get('id', 'unknown-id'),
            tools=agent_data.get('tools', []),
            parameters=agent_data.get('parameters', {})
        )
        agents.append(agent)
    return agents

# Create agent information objects
agents = extract_agent_info(agents_config)

In [18]:
agents[0]

AgentInfo(name='CSVDataAgent', description='Specialized agent for loading, processing, and extracting insights from CSV data files', id='agent-010', tools=[{'name': 'DataframeLoader', 'version': '1.2.0', 'description': 'Loads CSV files into pandas dataframes for content extraction', 'usage': 'Use this tool first when working with CSV data files', 'input_schema': {'file_path': 'str'}, 'output_schema': 'pd.DataFrame', 'capabilities': ['csv-import', 'excel-import', 'data-cleaning']}, {'name': 'ContentExtractor', 'version': '1.1.5', 'description': 'Extracts narrative content and insights from structured dataframes', 'usage': 'Use this tool after DataframeLoader to generate narratives from data', 'input_schema': {'dataframe': 'pd.DataFrame', 'columns': 'List[str]', 'sample_rows': 'int'}, 'output_schema': 'str', 'capabilities': ['data-to-text', 'insight-generation', 'story-formatting']}], parameters={'max_file_size': '50MB', 'writing_style': 'informative'}, embedding=None)

In [27]:
# Generate embeddings for each agent based on name and description
def generate_agent_embeddings(agents_list):
    """Generate embeddings for each agent based on their name and description."""
    for agent in agents_list:
        embedding = get_embedding(f"Name: {agent.name}.\nDescription: {agent.description}")
        agent.embedding = embedding
    return agents_list

# Generate embeddings for all agents
agents_with_embeddings = generate_agent_embeddings(agents)

In [28]:
agents_with_embeddings[0]

AgentInfo(name='CSVDataAgent', description='Specialized agent for loading, processing, and extracting insights from CSV data files', id='agent-010', tools=[{'name': 'DataframeLoader', 'version': '1.2.0', 'description': 'Loads CSV files into pandas dataframes for content extraction', 'usage': 'Use this tool first when working with CSV data files', 'input_schema': {'file_path': 'str'}, 'output_schema': 'pd.DataFrame', 'capabilities': ['csv-import', 'excel-import', 'data-cleaning']}, {'name': 'ContentExtractor', 'version': '1.1.5', 'description': 'Extracts narrative content and insights from structured dataframes', 'usage': 'Use this tool after DataframeLoader to generate narratives from data', 'input_schema': {'dataframe': 'pd.DataFrame', 'columns': 'List[str]', 'sample_rows': 'int'}, 'output_schema': 'str', 'capabilities': ['data-to-text', 'insight-generation', 'story-formatting']}], parameters={'max_file_size': '50MB', 'writing_style': 'informative'}, embedding=array([-0.03066171, -0.0

In [None]:
# Function to find the most relevant agent based on a query
def find_relevant_agents(query, agents_list, top_n=3):
    """
    Find the most relevant agents based on the query embedding.
    Returns the top N agents sorted by similarity score.
    """
    query_embedding = get_embedding(query)
    sim_scores = []
    
    for agent in agents_list:
        similarity = CosineSimilarityCalculator.calculate_similarity(
            embedding1=query_embedding, 
            embedding2=agent.embedding
        )
        sim_scores.append({
            'agent': agent,
            'similarity': similarity
        })
    
    # Sort agents by similarity score in descending order
    sim_scores.sort(key=lambda x: x['similarity'], reverse=True)
    
    # if greates similarity is less than 0.3, return empty list
    if sim_scores[0]['similarity'] < 0.3:
        class QA(BaseModel):
            """Model for storing query and answer."""
            answer: str
        message = [
           
            {
                "role": "user",
                "content": f"Query: {query}. No relevant agent found."
            }
        ]
        reply = get_reply(message, QA)
        print(reply.answer)
        return []

    return sim_scores[:top_n]

In [29]:
query = "I want to extract the text from a PDF and summarize it."

agents_with_embeddings = find_relevant_agents(query, agents_with_embeddings)

In [30]:
agents_with_embeddings

[{'agent': AgentInfo(name='TextSummarizer', description='Specialized agent for extracting key points from text and expanding summaries into full content', id='agent-012', tools=[{'name': 'KeypointExtractor', 'version': '1.3.0', 'description': 'Identifies key points and main ideas from text content', 'usage': 'Use this tool to extract key points from lengthy text', 'input_schema': {'text': 'str'}, 'output_schema': 'List[str]', 'capabilities': ['topic-identification', 'relevance-scoring', 'bullet-generation']}, {'name': 'ContentExpander', 'version': '1.1.1', 'description': 'Expands bullet points or brief notes into full paragraphs', 'usage': 'Use this tool after KeypointExtractor to develop full content', 'input_schema': {'points': 'Union[List[str], str]'}, 'output_schema': 'str', 'capabilities': ['detail-addition', 'narrative-flow', 'tone-consistency']}], parameters={'content_type': 'article', 'tone': 'professional'}, embedding=array([-0.01680393, -0.01246005, -0.01141695, ..., -0.00973

In [11]:
# Function to get input schema for a tool function
def get_input_schema(func):
    """Get the input schema for a function based on its signature."""
    signature = inspect.signature(func)
    schema = {}
    for param in signature.parameters.values():
        schema[param.name] = str(param.annotation)
    return schema

get_input_schema(find_relevant_agents)

{'query': "<class 'inspect._empty'>",
 'agents_list': "<class 'inspect._empty'>",
 'top_n': "<class 'inspect._empty'>"}

In [12]:
# Updated function to map tool names to actual functions
def get_tool_function(tool_name):
    """Map a tool name to its actual function implementation."""  
    # Map tools to their wrapped implementations
    tool_map = {
        "DataframeLoader": DataframeLoader.load_csv,
        "ContentExtractor": ContentExtractor.extract_narrative,
        "EmojiTranslator": EmojiTranslator.translate_to_emoji,
        "EmojiMixer": EmojiMixer.create_emoji_mix,
        "KeypointExtractor": KeypointExtractor.extract_keypoints,
        "ContentExpander": ContentExpander().expand_content,
        "TextExtractor": TextExtractor.extract_text_from_pdf,
        "ContentReformatter": ContentReformatter.reformat_content,
        "MultilingualTranslator": MultilingualTranslator().translate,
        "StylePreserver": StylePreserver().preserve_style,
    }
    
    return tool_map.get(tool_name)

# Example usage of the tool function mapping
tool_name = "EmojiWriter"
get_tool_function(tool_name)


In [13]:
# Class for handling tool sequence generation
class ToolsSeqFinder(BaseModel):
    """Model for finding the best sequence of tools to complete a task."""
    tools_name_in_seq: List[str]

# Function to determine the best tool sequence for a task
# Function to determine the best tool sequence for a task
def determine_tool_sequence(agent, query):
    """Determine the best sequence of tools to use for completing a task."""
    # agent = agent['agent']
    
    message = [
        {
            "role": "system",
            "content": f"You have to find the best sequence for list of tools to complete the task. Available tools: {agent}"
        },
        {
            "role": "user",
            "content": query
        }
    ]
    
    tools_order = get_reply(message, ToolsSeqFinder)
    # Access the attribute directly from the Pydantic model
    return tools_order.tools_name_in_seq

# Example usage of tool sequence determination
agent = agents_with_embeddings[0]  # Use the first agent as an example
tools_sequence = determine_tool_sequence(agent, query)
tools_sequence

['EmojiTranslator']

In [23]:
# Class for gathering information required by tools
class ToolsInput(BaseModel):
    """Model for gathering tool input information."""
    information_tillnow: str
    all_information_gathered: bool
    flow_of_question: str

# Improved function to gather input information for a tool
def gather_tool_inputs(tool_name, tool_function, context="", previous_outputs=None):
    """
    Gather inputs required for a specific tool by asking user questions.
    Returns a dictionary of inputs for the tool.
    """
    if previous_outputs is None:
        previous_outputs = {}
    
    # Get input schema for the tool function
    input_schema = get_input_schema(tool_function)
    
    history = ''
    
    # Create initial message
    message = [
        {
            "role": "system",
            "content": f"You have to ask for the details of the tools required to complete the task. The tool required is {tool_name} with inputs: {input_schema}."
        },
        {
            "role": "user",
            "content": f"I want to use the {tool_name} tool. {context}"
        }
    ]
    
    # Set initial state
    all_information_gathered = False
    
    # Interactive loop to gather all required inputs
    while not all_information_gathered:
        # Get the reply with questions
        reply = get_reply(message, ToolsInput)
        
        # Use all_information_gathered attribute directly from the Pydantic model
        all_information_gathered = reply.all_information_gathered
        
        if all_information_gathered:
            break
        else:
            # Update the message with the flow of question
            history = history + f"Context:{context}" + reply.flow_of_question + '\n'
            message[0]['content'] = f"You have to ask for the details required to complete the task. The tool required is {tool_name} with inputs: {input_schema}. History of questions: {history}"
            
            # Get input from user
            query = input(f"[Tool: {tool_name}] {reply.flow_of_question} ")
            message[1]['content'] = query + f"History of questions: {history}"

            # append the user query to the history
            history += f"User input: {query}\n"
    
    # Debug print
    print(f"Information gathered: {reply.information_tillnow}")
    
    
    # Convert user input to function input format
    message = [
        {
            "role": "system",
            "content": f"Convert the user's input into a valid JSON object that matches this function schema: {input_schema}. Return ONLY the JSON object and nothing else."
        },
        {
            "role": "user",
            "content": f"User's input: {reply.information_tillnow}. Create a JSON object that matches the function input schema."
        }
    ]
    
    class FunctionInput(BaseModel):
        function_input: str
    
    function_input = get_reply(message, FunctionInput)
    
    return function_input.function_input
    
# Example usage of gathering tool inputs
tool_name = "StylePreserver"
tool_function = get_tool_function(tool_name)
context = "hi"
tool_inputs = gather_tool_inputs(tool_name, tool_function, context)
print(f"Gathered inputs for {tool_name}: {tool_inputs}")

Information gathered: 1. User requested key points to be extracted from a text about life and structured appropriately.
2. User suggested a '--' transformation on all words in a text, similar to '--R--A--M--'.
3. User's example specified 'ORIGINAL_TEXT' as 'ram' and 'TRANSFORMED_TEXT' as '--r--a--m--'.
4. User confirmed wanting bullet points for keypoints structuring.
Gathered inputs for StylePreserver: {"original_text": "ram", "transformed_text": "--r--a--m--"}


In [29]:
# Improved function to execute a tool with given inputs
def execute_tool(tool_name, inputs):
    """Execute a tool with the given inputs and return the output."""
    # tool_function = get_tool_function(tool_name)
    print(f"Tool function for {tool_name}: {tool_function}")
    if not tool_function:
        return f"Error: Tool '{tool_name}' not found."
    
    # Debug information
    print(f"Executing {tool_name} with inputs: {inputs}")
    
    try:
        # Check if inputs is a string and convert to dict if necessary
        if isinstance(inputs, str):
            inputs = yaml.safe_load(inputs)
        elif not isinstance(inputs, dict):
            raise ValueError("Inputs must be a dictionary or a YAML string.")
        # Call the tool function with the provided inputs
        output = tool_function(**inputs)
        return output
    except Exception as e:
        return f"Error executing tool '{tool_name}': {str(e)}"
    
# Example usage of executing a tool
tool_name = "MultilingualTranslator"
inputs = {
    "text": "I am sumit yadav",
    "target_language": "nepali"
}
output = execute_tool(tool_name, inputs)

Tool function for MultilingualTranslator: <function StylePreserver.preserve_style at 0x7cebc1b60b80>
Executing MultilingualTranslator with inputs: {'text': 'I am sumit yadav', 'target_language': 'nepali'}
