In [44]:
import os
import sys
from glob import glob
import pandas as pd
from openai_client import client, call_openai_api
from load_configs import openai_api_key

# RunnablePassthrough

In [7]:
from langchain_core.runnables import (
    RunnableLambda,
    RunnableParallel,
    RunnablePassthrough,
)

runnable = RunnableParallel(
    origin=RunnablePassthrough(),
    modified=lambda x: x+1
)

runnable.invoke(1) # {'origin': 1, 'modified': 2}


def fake_llm(prompt: str) -> str: # Fake LLM for the example
    return "completion"

chain = RunnableLambda(fake_llm) | {
    'original': RunnablePassthrough(), # Original LLM output
    'parsed': lambda text: text[::-1] # Parsing logic
}

chain.invoke('hello') # {'original': 'completion', 'parsed': 'noitelpmoc'}

{'original': 'completion', 'parsed': 'noitelpmoc'}

In [8]:
def fake_llm(prompt: str) -> str: # Fake LLM for the example
    return "completion"

runnable = {
    'llm1':  fake_llm,
    'llm2':  fake_llm,
} | RunnablePassthrough.assign(
    total_chars=lambda inputs: len(inputs['llm1'] + inputs['llm2'])
)

runnable.invoke('hello')
# {'llm1': 'completion', 'llm2': 'completion', 'total_chars': 20}

{'llm1': 'completion', 'llm2': 'completion', 'total_chars': 20}

# oo

In [24]:
data_path = os.path.join("../data")
os.path.exists(data_path)
csv_files = glob(f"{data_path}/*.csv")
print(csv_files)

['../data/country_eng.csv', '../data/custom_2020.csv', '../data/Mobile-Price-Prediction-cleaned_data.csv']


In [40]:
sfile = csv_files[0]
df_small = pd.read_csv(sfile)
df_small.count()

Unnamed: 0      232
Country         232
Country_name    232
Area            232
dtype: int64

In [41]:
lfile = csv_files[1]
df_large = pd.read_csv(lfile)
df_large.count()

exp_imp    3299804
Year       3299804
month      3299804
ym         3299804
Country    3299804
Custom     3299804
hs2        3299804
hs4        3299804
hs6        3299804
hs9        3299804
Q1         3299804
Q2         3299804
Value      3299804
dtype: int64

In [46]:
from langchain_openai import ChatOpenAI, OpenAI
llm = ChatOpenAI(
    api_key=openai_api_key,
    model="gpt-4o-mini-2024-07-18",
    # model="gpt-4o-mini-2024-07-18",
    # model="gpt-4o"
    )

In [39]:
from langchain.agents.agent_types import AgentType
from langchain_experimental.agents.agent_toolkits import create_csv_agent


In [52]:
sfile

'../data/country_eng.csv'

In [53]:
agent = create_csv_agent(
    llm,
    sfile,
    agent_type=AgentType.OPENAI_FUNCTIONS,
    allow_dangerous_code=True,
    verbose=True
)
agent.run("how many rows are there?")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `python_repl_ast` with `{'query': 'len(df)'}`


[0m[36;1m[1;3m232[0m[32;1m[1;3mThere are 232 rows in the dataframe.[0m

[1m> Finished chain.[0m


'There are 232 rows in the dataframe.'

In [54]:
agent = create_csv_agent(
    llm,
    sfile,
    agent_type=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
    allow_dangerous_code=True,
    verbose=True
)
agent.run("how many rows are there?")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: To determine the number of rows in the dataframe `df`, I can use the shape attribute, which returns a tuple representing the dimensions of the dataframe. The first element of this tuple indicates the number of rows. 

Action: I will check the shape of the dataframe.
Action Input: `df.shape`[0mI will check the shape of the dataframe. is not a valid tool, try one of [python_repl_ast].[32;1m[1;3mThought: To determine the number of rows in the dataframe `df`, I should use the shape attribute. However, I need to execute this command in the Python shell provided.

Action: I will check the shape of the dataframe using the appropriate tool.
Action Input: `df.shape`[0mI will check the shape of the dataframe using the appropriate tool. is not a valid tool, try one of [python_repl_ast].[32;1m[1;3mTo determine the number of rows in the dataframe `df`, I need to execute the command to get its shape using the correct tool.



'Agent stopped due to iteration limit or time limit.'

In [88]:
agent = create_csv_agent(
    llm,
    sfile,
    agent_type="tool-calling", #AgentType.OPENAI_FUNCTIONS,
    allow_dangerous_code=True,
    verbose=True,
    # return_intermediate_steps=True,
    # return_direct=True,
    # response_format='content_and_artifact'
)
res = agent.run(input="how many rows are there?", 
                # return_direct=True,
    # response_format='content_and_artifact'
                # return_intermediate_steps=True,
               )
print(res)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `python_repl_ast` with `{'query': 'len(df)'}`


[0m[36;1m[1;3m232[0m[32;1m[1;3mThere are 232 rows in the dataframe.[0m

[1m> Finished chain.[0m
There are 232 rows in the dataframe.


'There are 232 rows in the dataframe.'

In [90]:
# agent.get_allowed_tools()
agent.OutputType

typing.Dict[str, typing.Any]

In [91]:
agent.input_keys

['input']

In [92]:
agent.output_keys

['output']

In [111]:
# agent.return_values

In [104]:
agent.tools[0].return_direct = True
agent.tools[0].response_format='content'#'content_and_artifact'

In [105]:
res = agent.run(input="how many rows are there?",)
print(res)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `python_repl_ast` with `{'query': 'len(df)'}`


[0m[36;1m[1;3m232[0m
[32;1m[1;3m[0m

[1m> Finished chain.[0m
232


In [107]:
res = agent.run(input="list all the countries",)
print(res)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `python_repl_ast` with `{'query': "df['Country_name'].tolist()"}`


[0m[36;1m[1;3m['Korea', 'North_Korea', 'China', 'Taiwan', 'Mongolia', 'Hong_Kong', 'Viet_Nam', 'Thailand', 'Singapore', 'Malaysia', 'Brunei', 'Philippines', 'Indonesia', 'Cambodia', 'Lao', 'Myanmar', 'India', 'Pakistan', 'Sri_Lanka', 'Maldives', 'Bangladesh', 'Timor-Leste', 'Macao', 'Afghanistan', 'Nepal', 'Bhutan', 'Iran', 'Iraq', 'Bahrain', 'Saudi_Arabia', 'Kuwait', 'Qatar', 'Oman', 'Israel', 'Jordan', 'Syria', 'Lebanon', 'United_Arab_Emirates', 'Yemen', 'Azerbaijan', 'Armenia', 'Uzbekistan', 'Kazakhstan', 'Kyrgyz', 'Tajikistan', 'Turkmenistan', 'Georgia', 'West_Bank_and_Gaza_Strip', 'Iceland', 'Norway', 'Sweden', 'Denmark', 'United_Kingdom', 'Ireland', 'Netherlands', 'Belgium', 'Luxembourg', 'France', 'Monaco', 'Andorra', 'Germany', 'Switzerland', 'Azores_(Portugal)', 'Portugal', 'Spain', 'Gibraltar_(UK)', 'Italy', 'Malta', 'Finland', 'Poland

In [109]:
res = agent.run(input=" give me the 10 country name and the area both",)
print(res)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `python_repl_ast` with `{'query': "df[['Country_name', 'Area']].head(10)"}`


[0m[36;1m[1;3m  Country_name  Area
0        Korea  Asia
1  North_Korea  Asia
2        China  Asia
3       Taiwan  Asia
4     Mongolia  Asia
5    Hong_Kong  Asia
6     Viet_Nam  Asia
7     Thailand  Asia
8    Singapore  Asia
9     Malaysia  Asia[0m
[32;1m[1;3m[0m

[1m> Finished chain.[0m
  Country_name  Area
0        Korea  Asia
1  North_Korea  Asia
2        China  Asia
3       Taiwan  Asia
4     Mongolia  Asia
5    Hong_Kong  Asia
6     Viet_Nam  Asia
7     Thailand  Asia
8    Singapore  Asia
9     Malaysia  Asia


In [112]:
res = agent.run(input=" give me the 10 country name first and then give me the 10 area",)
print(res)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `python_repl_ast` with `{'query': "df['Country_name'].head(10).tolist()"}`


[0m[36;1m[1;3m['Korea', 'North_Korea', 'China', 'Taiwan', 'Mongolia', 'Hong_Kong', 'Viet_Nam', 'Thailand', 'Singapore', 'Malaysia'][0m
[32;1m[1;3m
Invoking: `python_repl_ast` with `{'query': "df['Area'].head(10).tolist()"}`


[0m[36;1m[1;3m['Asia', 'Asia', 'Asia', 'Asia', 'Asia', 'Asia', 'Asia', 'Asia', 'Asia', 'Asia'][0m
[32;1m[1;3mThe first 10 country names are:
1. Korea
2. North Korea
3. China
4. Taiwan
5. Mongolia
6. Hong Kong
7. Viet Nam
8. Thailand
9. Singapore
10. Malaysia

The first 10 areas are:
1. Asia
2. Asia
3. Asia
4. Asia
5. Asia
6. Asia
7. Asia
8. Asia
9. Asia
10. Asia[0m

[1m> Finished chain.[0m
The first 10 country names are:
1. Korea
2. North Korea
3. China
4. Taiwan
5. Mongolia
6. Hong Kong
7. Viet Nam
8. Thailand
9. Singapore
10. Malaysia

The first 10 areas are:
1. Asia
2. Asia
3. Asia
4. Asia
5. Asia
6.

In [122]:
class MyTest:
    def __call__(self):
        self._call()

    def _call(self):
        print('I am called')

# Create an instance
mt = MyTest()

# Call the instance
mt()  # This will print 'I am called'


I am called


In [102]:
agent.dict()

{'name': None,
 'memory': None,
 'verbose': True,
 'tags': None,
 'metadata': None,
 'agent': {'runnable': {'name': None,
   'first': {'name': None,
    'mapper': {'name': None,
     'steps__': {'agent_scratchpad': RunnableLambda(lambda x: message_formatter(x['intermediate_steps']))}}},
   'middle': [{'name': None,
     'input_variables': ['agent_scratchpad', 'input'],
     'optional_variables': [],
     'output_parser': None,
     'partial_variables': {},
     'metadata': None,
     'tags': None,
     'messages': [{'content': '\nYou are working with a pandas dataframe in Python. The name of the dataframe is `df`.\nThis is the result of `print(df.head())`:\n|    |   Unnamed: 0 |   Country | Country_name   | Area   |\n|---:|-------------:|----------:|:---------------|:-------|\n|  0 |            0 |       103 | Korea          | Asia   |\n|  1 |            1 |       104 | North_Korea    | Asia   |\n|  2 |            2 |       105 | China          | Asia   |\n|  3 |            3 |       1

- _iter_next_step :  Override this to take control of how the agent makes and acts on choices.Take a single step in the thought-action-observation loop
    - inputs: Dict[str, str] 
    - intermediate_steps: List[Tuple[AgentAction, str]]
    - intermediate_steps = self._prepare_intermediate_steps(intermediate_steps)
    - output = self.agent.plan(intermediate_steps,callbacks=run_manager.get_child() if run_manager else None,
                **inputs, )
    
- self._perform_agent_action-> AgentStep:
    - tool_run_kwargs = self.agent.tool_run_logging_kwargs()
    - if return_direct: tool_run_kwargs["llm_prefix"] = ""
    - observation = tool.run(agent_action.tool_input, **tool_run_kwargs)
    - AgentStep(action=agent_action, observation=observation)
- 


In [125]:
# https://github.com/langchain-ai/langchain/blob/02c35da445b5a8cbd3d47ba468243f9297795124/libs/core/langchain_core/tools.py#L1352
from inspect import signature
from typing import List
def render_text_description_and_args(tools) -> str:
    """Render the tool name, description, and args in plain text.

    Args:
        tools: The tools to render.

    Returns:
        The rendered text.

    Output will be in the format of:

    .. code-block:: markdown

        search: This tool is used for search, args: {"query": {"type": "string"}}
        calculator: This tool is used for math, \
args: {"expression": {"type": "string"}}
    """
    tool_strings = []
    for tool in tools:
        args_schema = str(tool.args)
        if hasattr(tool, "func") and tool.func:
            sig = signature(tool.func)
            description = f"{tool.name}{sig} - {tool.description}"
        else:
            description = f"{tool.name} - {tool.description}"
        tool_strings.append(f"{description}, args: {args_schema}")
    return "\n".join(tool_strings)

https://github.com/langchain-ai/langchain/blob/02c35da445b5a8cbd3d47ba468243f9297795124/libs/langchain/langchain/agents/react/agent.py#L13
def create_react_agent(
    llm: BaseLanguageModel,
    tools: Sequence[BaseTool],
    prompt: BasePromptTemplate,
    output_parser: Optional[AgentOutputParser] = None,
    tools_renderer: ToolsRenderer = render_text_description,
    *,
    stop_sequence: Union[bool, List[str]] = True,
) -> Runnable:
    """Create an agent that uses ReAct prompting.

    Based on paper "ReAct: Synergizing Reasoning and Acting in Language Models"
    (https://arxiv.org/abs/2210.03629)

    Args:
        llm: LLM to use as the agent.
        tools: Tools this agent has access to.
        prompt: The prompt to use. See Prompt section below for more.
        output_parser: AgentOutputParser for parse the LLM output.
        tools_renderer: This controls how the tools are converted into a string and
            then passed into the LLM. Default is `render_text_description`.
        stop_sequence: bool or list of str.
            If True, adds a stop token of "Observation:" to avoid hallucinates.
            If False, does not add a stop token.
            If a list of str, uses the provided list as the stop tokens.

            Default is True. You may to set this to False if the LLM you are using
            does not support stop sequences.

    Returns:
        A Runnable sequence representing an agent. It takes as input all the same input
        variables as the prompt passed in does. It returns as output either an
        AgentAction or AgentFinish.

    Examples:

        .. code-block:: python

            from langchain import hub
            from langchain_community.llms import OpenAI
            from langchain.agents import AgentExecutor, create_react_agent

            prompt = hub.pull("hwchase17/react")
            model = OpenAI()
            tools = ...

            agent = create_react_agent(model, tools, prompt)
            agent_executor = AgentExecutor(agent=agent, tools=tools)

            agent_executor.invoke({"input": "hi"})

            # Use with chat history
            from langchain_core.messages import AIMessage, HumanMessage
            agent_executor.invoke(
                {
                    "input": "what's my name?",
                    # Notice that chat_history is a string
                    # since this prompt is aimed at LLMs, not chat models
                    "chat_history": "Human: My name is Bob\\nAI: Hello Bob!",
                }
            )

    Prompt:

        The prompt must have input keys:
            * `tools`: contains descriptions and arguments for each tool.
            * `tool_names`: contains all tool names.
            * `agent_scratchpad`: contains previous agent actions and tool outputs as a string.

        Here's an example:

        .. code-block:: python

            from langchain_core.prompts import PromptTemplate

            template = '''Answer the following questions as best you can. You have access to the following tools:

            {tools}

            Use the following format:

            Question: the input question you must answer
            Thought: you should always think about what to do
            Action: the action to take, should be one of [{tool_names}]
            Action Input: the input to the action
            Observation: the result of the action
            ... (this Thought/Action/Action Input/Observation can repeat N times)
            Thought: I now know the final answer
            Final Answer: the final answer to the original input question

            Begin!

            Question: {input}
            Thought:{agent_scratchpad}'''

            prompt = PromptTemplate.from_template(template)
    """  # noqa: E501
    missing_vars = {"tools", "tool_names", "agent_scratchpad"}.difference(
        prompt.input_variables + list(prompt.partial_variables)
    )
    if missing_vars:
        raise ValueError(f"Prompt missing required variables: {missing_vars}")

    prompt = prompt.partial(
        tools=tools_renderer(list(tools)),
        tool_names=", ".join([t.name for t in tools]),
    )
    if stop_sequence:
        stop = ["\nObservation"] if stop_sequence is True else stop_sequence
        llm_with_stop = llm.bind(stop=stop)  ##### UNDERSTAND THIS IN DETAIL ############################
    else:
        llm_with_stop = llm
    output_parser = output_parser or ReActSingleInputOutputParser()
    agent = (
        RunnablePassthrough.assign(
            agent_scratchpad=lambda x: format_log_to_str(x["intermediate_steps"]),
        )
        | prompt
        | llm_with_stop
        | output_parser
    )
    return agent
    
SINCE LLM IS A RUNNABEL WE UNDERSTAND RUNNNABLE AND THE BIND OPERATION TO RUNNABLE FIRST

https://github.com/langchain-ai/langchain/blob/d77c7c4236df8e56fbe3acc8e0a71b57b48f1678/libs/core/langchain_core/runnables/base.py#L1335



Runnable(Generic[Input, Output], ABC):
    """A unit of work that can be invoked, batched, streamed, transformed and composed.

    

In [141]:
import langchain_core


<module 'langchain_core' from '/mnt/d/myDev/llmapps/venv/lib/python3.9/site-packages/langchain_core/__init__.py'>

In [166]:
from langchain_core.runnables.base import Runnable
from typing import Any, Dict, Optional, Type

class TextTransformationRunnable(Runnable[str, str]):
    """
    A Runnable that transforms input text by appending a specified suffix.
    """
    def __init__(self, suffix: str):
        super().__init__()
        self.suffix = suffix

    def invoke(self, input: str, config: Dict[str, Any] = None) -> str:
        """
        Appends the suffix to the input text.
        """
        return input + self.suffix

    def batch(self, inputs: list[str], config: Dict[str, Any] = None) -> list[str]:
        """
        Processes a batch of inputs by appending the suffix to each input text.
        """
        return [self.invoke(input_text, config) for input_text in inputs]

# Usage example
suffix_adder = TextTransformationRunnable(" world!")

# Single invocation
result = suffix_adder.invoke("Hello")
print(result)  # Outputs: Hello world!

# Batch invocation
batch_result = suffix_adder.batch(["Hello", "Goodbye"])
print(batch_result)  # Outputs: ['Hello world!', 'Goodbye world!']


Hello world!
['Hello world!', 'Goodbye world!']


 the bind() method in Langchain serves a few key purposes:
• It allows you to bind arguments to a Runnable, returning a new Runnable. This is useful when a Runnable in a chain requires an argument that is not in the output of the previous Runnable or included in the user input.0• It can be used to bind model-specific tools or default invocation arguments to a Runnable. This allows you to configure the Runnable with certain parameters that will be used each time it is invoked, without having to pass those parameters in manually. [2,3]
• When working with chat models, the bind_tools() method can be used to handle converting custom tool schemas (like the ones used by OpenAI) and binding them to the model, so the model can call those tools as part of its response. [4,5]
In summary, the bind() method in Langchain provides a way to configure a Runnable with additional parameters or functionality, without having to pass those in manually each time the Runnable is used. This helps make Runnables more reusable and composable within larger chains or workflows.

In [169]:
from typing import Any, Dict, Optional, Type
from langchain_core.runnables import Runnable

class TextProcessorRunnable(Runnable[str, str]):
    def __init__(self, to_uppercase: bool = False) -> None:
        self.to_uppercase = to_uppercase

    def invoke(self, input: str, config: Optional[Dict[str, Any]] = None, **kwargs: Any) -> str:
        # Merge the instance-level and kwargs-level configuration
        to_uppercase = kwargs.get('to_uppercase', self.to_uppercase)
        
        # Process the input text
        if to_uppercase:
            return input.upper()
        return input

    @property
    def InputType(self) -> Type[str]:
        return str

    @property
    def OutputType(self) -> Type[str]:
        return str


In [170]:
# Create an instance of TextProcessorRunnable without binding
processor = TextProcessorRunnable()

# Invoke the processor without binding any arguments
output = processor.invoke("Hello World")
print(output)  # Output: "Hello World"

# Now, use the bind method to bind the 'to_uppercase' argument
bound_processor = processor.bind(to_uppercase=True)

# Invoke the bound processor
bound_output = bound_processor.invoke("Hello World")
print(bound_output)  # Output: "HELLO WORLD"


Hello World
HELLO WORLD
