# Understand python code agent using OpenAI

In [25]:
import dotenv

dotenv.load_dotenv()

True

In [26]:
from langchain_openai import ChatOpenAI

llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0)

In [27]:
from langchain.agents import tool
from langchain_community.document_loaders.generic import GenericLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders.parsers import LanguageParser
from langchain.text_splitter import Language
from langchain_openai import OpenAIEmbeddings

@tool
def get_parsed_python_file(path: str) -> int:
    """Python Parser Tool: Returns the content of the python code file"""
    parser = LanguageParser(language=Language.PYTHON, parser_threshold=500)
    loader = GenericLoader.from_filesystem(path, glob="**/*", suffixes=[".py"], parser=parser)
    documents = loader.load()
    return documents[0].page_content

get_parsed_python_file.invoke("../docs/sample.py")

tools = [get_parsed_python_file]

In [28]:
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder

prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are very powerful code documenting assistant, but don't know the user's source code",
        ),
        ("user", "{input}"),
        MessagesPlaceholder(variable_name="agent_scratchpad"),
    ]
)

In [29]:
from langchain.agents.format_scratchpad.openai_tools import (
    format_to_openai_tool_messages,
)
from langchain.agents.output_parsers.openai_tools import OpenAIToolsAgentOutputParser

llm_with_tools = llm.bind_tools(tools)

agent = (
    {
        "input": lambda x: x["input"],
        "agent_scratchpad": lambda x: format_to_openai_tool_messages(
            x["intermediate_steps"]
        ),
    }
    | prompt
    | llm_with_tools
    | OpenAIToolsAgentOutputParser()
)

In [30]:
from langchain.agents import AgentExecutor

agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)

In [31]:
list(agent_executor.stream({"input": "Write a docstring for the source code defined in the file ../docs/sample.py"}))



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `get_parsed_python_file` with `{'path': '../docs/sample.py'}`


[0m[36;1m[1;3mimport os

def addNumbers(a, b):
    sum = a + b
    return sum

def printEnv():
    print(os.getenv("TEST"))[0m[32;1m[1;3mHere is the docstring for the source code defined in the file `../docs/sample.py`:

```python
import os

def addNumbers(a, b):
    """
    Adds two numbers and returns the sum.

    Parameters:
    - a (int): The first number.
    - b (int): The second number.

    Returns:
    int: The sum of the two numbers.
    """
    sum = a + b
    return sum

def printEnv():
    """
    Prints the value of the environment variable "TEST".
    """
    print(os.getenv("TEST"))
```

The `addNumbers` function takes two integers as input and returns their sum. The `printEnv` function prints the value of the environment variable "TEST".[0m

[1m> Finished chain.[0m


[{'actions': [OpenAIToolAgentAction(tool='get_parsed_python_file', tool_input={'path': '../docs/sample.py'}, log="\nInvoking: `get_parsed_python_file` with `{'path': '../docs/sample.py'}`\n\n\n", message_log=[AIMessageChunk(content='', additional_kwargs={'tool_calls': [{'index': 0, 'id': 'call_K8k0a1i2d6Dy8JbmK23CNd0m', 'function': {'arguments': '{\n  "path": "../docs/sample.py"\n}', 'name': 'get_parsed_python_file'}, 'type': 'function'}]})], tool_call_id='call_K8k0a1i2d6Dy8JbmK23CNd0m')],
  'messages': [AIMessageChunk(content='', additional_kwargs={'tool_calls': [{'index': 0, 'id': 'call_K8k0a1i2d6Dy8JbmK23CNd0m', 'function': {'arguments': '{\n  "path": "../docs/sample.py"\n}', 'name': 'get_parsed_python_file'}, 'type': 'function'}]})]},
 {'steps': [AgentStep(action=OpenAIToolAgentAction(tool='get_parsed_python_file', tool_input={'path': '../docs/sample.py'}, log="\nInvoking: `get_parsed_python_file` with `{'path': '../docs/sample.py'}`\n\n\n", message_log=[AIMessageChunk(content='', 

In [32]:
list(agent_executor.stream({"input": "Write a docs for the file ../docs/sample.py"}))



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `get_parsed_python_file` with `{'path': '../docs/sample.py'}`


[0m[36;1m[1;3mimport os

def addNumbers(a, b):
    sum = a + b
    return sum

def printEnv():
    print(os.getenv("TEST"))[0m[32;1m[1;3m## sample.py

This file contains two functions:

### addNumbers(a, b)

This function takes two parameters `a` and `b` and returns their sum.

#### Parameters

- `a` (int): The first number.
- `b` (int): The second number.

#### Returns

- `sum` (int): The sum of `a` and `b`.

### printEnv()

This function prints the value of the environment variable `TEST`.

#### Parameters

None

#### Returns

None[0m

[1m> Finished chain.[0m


[{'actions': [OpenAIToolAgentAction(tool='get_parsed_python_file', tool_input={'path': '../docs/sample.py'}, log="\nInvoking: `get_parsed_python_file` with `{'path': '../docs/sample.py'}`\n\n\n", message_log=[AIMessageChunk(content='', additional_kwargs={'tool_calls': [{'index': 0, 'id': 'call_7FpvgcLY3hxSpnTcqLtJ0QqU', 'function': {'arguments': '{\n  "path": "../docs/sample.py"\n}', 'name': 'get_parsed_python_file'}, 'type': 'function'}]})], tool_call_id='call_7FpvgcLY3hxSpnTcqLtJ0QqU')],
  'messages': [AIMessageChunk(content='', additional_kwargs={'tool_calls': [{'index': 0, 'id': 'call_7FpvgcLY3hxSpnTcqLtJ0QqU', 'function': {'arguments': '{\n  "path": "../docs/sample.py"\n}', 'name': 'get_parsed_python_file'}, 'type': 'function'}]})]},
 {'steps': [AgentStep(action=OpenAIToolAgentAction(tool='get_parsed_python_file', tool_input={'path': '../docs/sample.py'}, log="\nInvoking: `get_parsed_python_file` with `{'path': '../docs/sample.py'}`\n\n\n", message_log=[AIMessageChunk(content='', 

# Understand Python using LLama2

In [33]:
from langchain.llms import CTransformers

llm = CTransformers(model='../models/llama-2-7b-chat.ggmlv3.q8_0.bin', model_type='llama', config={'max_new_tokens': 256, 'temperature': 0.01})

In [39]:
import re
from langchain.prompts import StringPromptTemplate, PromptTemplate
from typing import List

template = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
Answer the following questions as best you can. You have access to the following tools:

Python Parser Tool: A tool to load python files from the local source code. The input is the path to the python file

Strictly use the following format:

Question: the input question you must answer
Thought: you should always think about what to do
Action: the action to take, should be one of [Python Parser Tool]
Action Input: the input to the action, should be the file needed to generate documentation for.
Observation: the result of the action
... (this Thought/Action/Action Input/Observation can repeat N times)
Thought: I now know the final answer
Final Answer: the final answer to the original input question

For examples:
Question: Write me a documentation of the python file ../test.py
Thought: First, I need to read the python file ../test.py
Action: Python Parser Tool
Action Input: ../test.py
Observation: def addNumbers(a, b):
                 sum = a + b
                 return sum
Thought: I should generate documentation of this code.
Action: Generate the documentation of the code
Action Input: def addNumbers(a, b):
                 sum = a + b
                 return sum
Observation: This file contains two functions:

            ### addNumbers(a, b)

            This function takes two parameters `a` and `b` and returns their sum.

            #### Parameters

            - `a` (int): The first number.
Thought: I now know the final answer.
Final Answer: This file contains two functions:

            ### addNumbers(a, b)

            This function takes two parameters `a` and `b` and returns their sum.

            #### Parameters

            - `a` (int): The first number.

### Input:
{input}

### Response:
{agent_scratchpad}"""

temp_Ins = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
Question: {thought}
Query: {query}
Observation: {observation}

### Input:
Make a short summary of useful information from the result observation that is related to the question.

### Response:"""

prompt_Ins = PromptTemplate(
    input_variables=["thought", "query", "observation"],
    template=temp_Ins,
)


class CustomPromptTemplate(StringPromptTemplate):
    """Schema to represent a prompt for an LLM.

    Example:
        .. code-block:: python

            from langchain import PromptTemplate
            prompt = PromptTemplate(input_variables=["foo"], template="Say {foo}")
    """

    input_variables: List[str]
    """A list of the names of the variables the prompt template expects."""

    template: str
    """The prompt template."""

    template_format: str = "f-string"
    """The format of the prompt template. Options are: 'f-string', 'jinja2'."""

    validate_template: bool = False
    """Whether or not to try validating the template."""
    

    def format(self, **kwargs) -> str:
        # Get the intermediate steps (AgentAction, Observation tuples)
        # Format them in a particular way
        intermediate_steps = kwargs.pop("intermediate_steps")
        thoughts = ""
        if len(intermediate_steps) > 0:
            regex = r"Thought\s*\d*\s*:(.*?)\nAction\s*\d*\s*:(.*?)\nAction\s*\d*\s*Input\s*\d*\s*:[\s]*(.*)\nObservation"
            text_match = intermediate_steps[-1][0].log
            if len(intermediate_steps) > 1:
                text_match = 'Thought: ' + text_match
            match = re.search(regex, text_match, re.DOTALL)            
            my_list = list(intermediate_steps[-1])
            p_INS_temp = prompt_Ins.format(thought=match.group(1).strip(), query=match.group(3).strip(), observation=my_list[1])
            my_list[1] = llm(p_INS_temp)
            my_tuple = tuple(my_list)            
            intermediate_steps[-1] = my_tuple
            
        for action, observation in intermediate_steps:
            thoughts += action.log
            thoughts += f" {observation}\nThought:"
        # Set the agent_scratchpad variable to that value
        kwargs["agent_scratchpad"] = thoughts
        return self.template.format(**kwargs)
    
prompt = CustomPromptTemplate(input_variables=["input", "intermediate_steps"],
                              template=template,validate_template=False)

In [40]:
from langchain.agents import AgentOutputParser
from langchain.schema import AgentAction, AgentFinish
from typing import Union, Any

class CustomOutputParser(AgentOutputParser):
    
    def parse(self, llm_output: str) -> Union[AgentAction, AgentFinish]:
        # Check if agent should finish
        if "Final Answer:" in llm_output:
            return AgentFinish(
                # Return values is generally always a dictionary with a single `output` key
                # It is not recommended to try anything else at the moment :)
                return_values={"output": llm_output.split("Final Answer:")[-1].strip()},
                log=llm_output,
            )
        # Parse out the action and action input
        regex = r"Action\s*\d*\s*:(.*?)\nAction\s*\d*\s*Input\s*\d*\s*:[\s]*(.*)"
        match = re.search(regex, llm_output, re.DOTALL)
        if not match:
            raise ValueError(f"Could not parse LLM output: `{llm_output}`")
        action = match.group(1).strip()
        action_input = match.group(2)
        # Return the action and action input
        return AgentAction(tool=action, tool_input=action_input.strip(" ").strip('"'), log=llm_output)
output_parser = CustomOutputParser()

In [41]:
from langchain import LLMChain
from langchain.agents import LLMSingleActionAgent

llm_chain = LLMChain(llm=llm, prompt=prompt)
agent = LLMSingleActionAgent(
    llm_chain=llm_chain, 
    output_parser=output_parser,
    stop=["\nObservation:", "\nObservations:"], 
    allowed_tools=tools
)
agent_executor = AgentExecutor.from_agent_and_tools(agent=agent, tools=tools, verbose=True)

In [42]:
agent_executor.run("Write a docs for the file ../docs/sample.py")



[1m> Entering new AgentExecutor chain...[0m


Number of tokens (513) exceeded maximum context length (512).
Number of tokens (514) exceeded maximum context length (512).
Number of tokens (515) exceeded maximum context length (512).
Number of tokens (516) exceeded maximum context length (512).
Number of tokens (517) exceeded maximum context length (512).
Number of tokens (518) exceeded maximum context length (512).
Number of tokens (519) exceeded maximum context length (512).
Number of tokens (520) exceeded maximum context length (512).
Number of tokens (521) exceeded maximum context length (512).
Number of tokens (522) exceeded maximum context length (512).
Number of tokens (523) exceeded maximum context length (512).
Number of tokens (524) exceeded maximum context length (512).
Number of tokens (525) exceeded maximum context length (512).
Number of tokens (526) exceeded maximum context length (512).
Number of tokens (527) exceeded maximum context length (512).
Number of tokens (528) exceeded maximum context length (512).
Number o

ValueError: Could not parse LLM output: `Question: Write me a documentation of the python file ../docs/sample.py
Thought: First, I need to read the read the read the read the read the read the read the read the read the read the read the read the read the read the read the read the read the read the read the read the read the read the read the read the read the file path of read the read the read the read the read the read the read the read the read the read the read the read the read the read the read the read the read the read the read the read the read the read the read the read the read the read the read the read the read the read the read the file to read the read the read the read the read the read the read the read the read the read the read the read the read the read the read the read the read the read the read the read the read the read the read the read the read the read the read the read the read the read the read the read the file to read the read the read the read the read the read the read the read the read the read the read the read the read the read the read the read the read the read the read the read the read the read the read the read the`