### BreachSeek Redo

<img src="https://arxiv.org/html/2409.03789v1/extracted/5823759/Graph.png" width=500px />

In [1]:
import warnings

warnings.filterwarnings('ignore')

In [2]:
# utils/model.py
from langchain_openai import ChatOpenAI
# from langchain_anthropic import ChatAnthropic

def _get_model(model_name: str = "openai"):
    # if model_name == "anthropic":
    #     model = ChatAnthropic(temperature=0, model_name="claude-3-5-sonnet-20240620")
    if model_name == "openai":
        model = ChatOpenAI(temperature=0, model_name="gpt-4")
    else:
        raise ValueError(f"Unsupported model type: {model_name}")

    return model

In [3]:
# utils/tools.py
from langchain_community.tools import DuckDuckGoSearchRun
from langchain_community.tools import ShellTool
from langgraph.prebuilt import ToolNode

tools = [DuckDuckGoSearchRun(), ShellTool()]
tool_node = ToolNode(tools)

In [4]:
# utils/state.py
from langgraph.graph import add_messages
from langchain_core.messages import AnyMessage, BaseMessage
from typing_extensions import TypedDict, Annotated, Sequence, List,  Literal, Optional, Dict
import operator

class AgentState(TypedDict):
    messages: Annotated[Sequence[BaseMessage], add_messages]


class RecoderOptions(TypedDict):
    report: str
    generate_final_report: bool
    file_names: List[str]


class PentestState(TypedDict):
    messages: Annotated[Sequence[AnyMessage], operator.add]
    current_step: Literal['pentester', 'evaluator', 'recorder', '__end__']
    pentest_results: dict
    pentest_tasks: list
    task: str 
    evaluation: str 
    model_status: str
    findings: Dict[str, List[str]]
    command: str
    tool_name: str
    tool_results: str 
    recorder_options: Optional[RecoderOptions]


class GraphConfig(TypedDict):
    model_name: Literal["anthropic", "openai"]
    pentester_model: Literal["ollama"," anthropic", "openai"]

In [5]:
# utils/nodes.py
from langgraph.prebuilt import ToolNode


def should_continue(state):
    messages = state["messages"]
    last_message = messages[-1]
    # If there are no tool calls, then we finish
    if not last_message.tool_calls:
        return "end"
    # Otherwise if there is, we continue
    else:
        return "continue"


system_prompt = """
Be a helpful agent, you can search the web and you can run shell commands.
If you receive the output from a tool
show the output of the commands in markdown for debugging purposes prefix your
response after the tool call with DEBUG: keyword.
"""


def call_model(state, config):
    messages = state["messages"]
    messages = [{"role": "system", "content": system_prompt}] + messages
    model_name = config.get('configurable', {}).get("model_name", "openai")
    model = _get_model(model_name)
    response = model.invoke(messages)
    # We return a list, because this will get added to the existing list
    return {"messages": [response]}

In [6]:
from langchain_core.messages import SystemMessage, HumanMessage

# supervisor.py
supervisor_system_prompt = SystemMessage("""
You are the supervisor of AI agents responsible for overseeing their performance, 
you send the pentesting tasks to pentester agent, and you receive the response 
from an evaluator agent, if the evaluator says everything is done, then end the program,

Try to end as soon as possible. 
""")


class SupervisorTask(TypedDict):
    supervisor_thought: Annotated[str, ..., "supervisor thoughts that includes decision making based on evaluator prompt if he responded"]
    tasks: list
    next_agent: Literal['pentester', 'evaluator', 'recorder', '__end__']
    done: bool



# Don't think we need this one
def _swap_messages(messages):
    new_messages = []
    for m in messages:
        if m['role'] == 'assistant':
            new_messages.append({"role": "user", "content": m['content']})
        else:
            new_messages.append({"role": "assistant", "content": m['content']})
    return new_messages



def supervisor(state: PentestState) -> PentestState:
    # If all evaluator concludes all tasks are done -> Termination
    evaluation = state.get('evaluation', {})
    if evaluation and 'end' in evaluation:
        return {
            'messages': [{'role': 'assistant', 'content': 'done, bye!'}],
            'current_step': '__end__',
        }

    messages = [system_prompt] + state.get('messages', [])

    # Pentester ran, and evaluation has been made (Yet done)
    if evaluation:
        messages = [system_prompt] + [HumanMessage(
            content=f"based on {evaluation}, what should we do now to finish the tasks: {state.get('pentest_tasks', [])}"
        )]

    model = _get_model().with_structured_output(SupervisorTask)

    try:
        response = model.invoke(messages)
    except Exception as e:
        return {
            'messages': [{'role': 'assistant', 'content': f'Model error: {e}'}],
            'current_step': '__end__',
        }

    # If supervisor concludes all tasks are done -> Termination
    if response.get('done'):
        return {
            'messages': [{'role': 'assistant', 'content': 'done, bye!'}],
            'current_step': '__end__',
        }

    # In case the pentester should be run
    next_agent = response.get('next_agent')
    if next_agent == 'pentester':
        return {
            'messages': [{'role': 'assistant', 
                          'content': response.get('supervisor_thought', '')}],
            'current_step': 'pentester',
            'pentest_tasks': response.get('tasks', []),
        }

    # Base condition (fallback)
    return {
        'messages': [{'role': 'assistant', 'content': 'done, bye!'}],
        'current_step': '__end__',
    }

In [7]:
# recorder.py
class Report(TypedDict):
    report: Annotated[str, "This is the summary written in Latex"]
    file_name: Annotated[str, "This is the filename of the Latex report"]



def recorder_summary(state: PentestState, model):
    prompt = '''You are tasked with recording and summarizing information of a chat between a human and an LLM in \
    which the LLM is utilizing the command line to execute tasks. You have as an input the history of the last command that ran \
    which inclueds the output logs and the message prompt for previous commands:
     
    <history>
    {history}
    </history>

    Generate a summary for this using latex.
    '''
    formats = {
            'history': {'user_prompts': state['messages']} | state['pentest_results'],
        }
    prompt = prompt.format(**formats)

    response = model.invoke(prompt)

    state['recorder_options']['file_names'].append(response.file_name)
    
    with open(response.file_name, 'w') as f:
        f.write(response.report)

    return state

def recorder_final(state: PentestState, model):
    prompt = '''You are tasked with summarizing and reporting information of a chat between a human and an LLM in \
    whihc the LLM is utilizing the command line to execute tasks. You have as input the history of summaries of all previous \
    outputs and interactions between the human and the LLM:

    <history>
    {history}
    </history>

    You are tasked with generating a final report in a Latex format. Also return the file path as a relative path.
    '''

    summaries = []
    file_names = state['recorder_options']['file_names']
    for file_name in file_names:
        with open(file_name) as f:
            summary = f.read()
            summaries.append(summary)

    formats = {
            'history': summaries
        }
    prompt = prompt.format(**formats)

    response = model.invoke(prompt)
    state['recorder_options']['report'] = response.report
    with open(response.file_name, 'w') as f:
        f.write(response.report)

    return state


def recorder(state: PentestState):
    model = _get_model().with_structured_output(Report)

    recorder_options = state.get('recorder_options', {})
    generate_final_report = recorder_options.get('generate_final_report', False)

    if generate_final_report:
        return recorder_final(state, model)
    else:
        state = recorder_summary(state, model)
        return recorder_final(state, model)

In [8]:
# pentester.py
from langchain_core.messages import SystemMessage, HumanMessage, AIMessage
from typing_extensions import Annotated, TypedDict, List, Literal, Optional 


class PentesterResults(TypedDict):
    """The results that the pentester needs to work"""
    message: Annotated[str, ..., "Pentester thought process"]
    phase: Annotated[str, ..., "Current phase of pentesting, such as scanning or exploitation"]
    tasks: Annotated[List[str], ..., "Tasks to perform to complete the current phase"]
    results: Annotated[List[str], ..., "Summary of results"]
    tool_use: Annotated[bool, ..., "Indicates if tools are still needed to complete a task"]
    software: Annotated[List[str], ..., "program names that will be used to perform the tasks"]
    command: Annotated[str, ..., "possible command that will be run"]


def _swap_messages(messages):
    new_messages = []
    for m in messages:
        if m['role'] == 'assistant':
            new_messages.append({"role": "user", "content": m['content']})
        else:
            new_messages.append({"role": "assistant", "content": m['content']})
    return new_messages



def pentester(state: PentestState) -> PentestState:
    tasks_content = state.get('pentest_tasks', [])
    tool_results = state.get('tool_results', [])
    previous_messages = state.get('messages', [])

    tasks = f"""
    Do the following tasks:

    {tasks_content}
    """

    # Temporary shortcut for debugging
    if tool_results:
        return {'current_step': 'evaluator'}

    # Build the message list safely
    messages = [system_prompt] + [{'role': 'user', 'content': tasks}] + _swap_messages(previous_messages)

    model = _get_model().with_structured_output(PentesterResults)

    try:
        response = model.invoke(messages)
        # print(f'pentester:\n{response = }')
    except Exception as e:
        return {
            'messages': [{'role': 'assistant', 'content': f'Model error: {e}'}],
            'current_step': 'evaluator'
        }

    # In case that the tool needs to be used
    if response.get('tool_use'):
        return {
            'messages': [{'role': 'assistant',
                          'content': response.get('message', '')}],
            'current_step': 'pentester',
            'tool_name': response.get('software', [None])[0],
            'task': response.get('tasks', [None])[0],
            'command': response.get('command'),
        }

    return {'current_step': 'evaluator'}

In [9]:
# tools.py
class ToolUsage(TypedDict):
    """The task that the tool needs to do"""
    message: str
    results: str


def tools_node(state: PentestState) -> PentestState:
    system_prompt = SystemMessage(f"""
    You are an AI agent who is an expert in using a tool named {state['tool_name']}, 
    to do {state['pentest_tasks']}. You are working with a team of AI agents, 
    you will receive a task, and possibly hints, constrains, and args. 
    You will have to use your expertise in {state['tool_name']} to finish
    the task. When you are done pass the results to the calling agent. 

    you have access to bash shell using shell_tool
    """)
    # print(f'{system_prompt = }')
    human_msg = HumanMessage(content=f"Use {state['tool_name']} to do {state['task']}")
    messages = [system_prompt] + [human_msg]
    model = _get_model().bind_tools(tools)
    ai_msg = model.invoke(messages)
    messages.append(ai_msg)
    for tool_call in ai_msg.tool_calls:
        selected_tool = {tool.name: tool for tool in tools}[tool_call["name"].lower()]
        tool_msg = selected_tool.invoke(tool_call)
        # tool_msg = selected_tool.invoke(tool_call)
        messages.append(tool_msg)
    # tool_msg = shell_tool.invoke(ai_msg.tool_calls)
    # messages.append(tool_msg)
    messages.append(HumanMessage(content='summarize before parsing'))
    model = _get_model().with_structured_output(ToolUsage)
    response = model.invoke(messages)
    # print(f'tools:\n{response = }')
    return {
            'messages': [{'role': 'assistant',
                          'content': '\n\n'.join(response.values())}],
            'tool_results': response['results'],
            }

In [10]:
class Eval(TypedDict):
    """Task evaluation"""
    message: Annotated[str, ..., "The message to respond to the user"]
    evaluation: Annotated[str, ..., "Evaluate the pentester response, see if he finished the task"]
    done: Annotated[bool, ..., "True if the pentester finished the task"]
    fail: Annotated[bool, ..., "True if the pentester can't finish the task, this will end the program"]
    findings: Annotated[Dict[Literal['critical', 'high', 'medium', 'low'], List[str]], ..., "denotes findings worth of mention"]
    # message: str
    # phase: Annotated[str, ... , "Current phase of pentesting"]
    # task: Annotated[str, ... , "Task to perform so that the tool agent succeeds."]
    # hints: Annotated[Optional[List[str]], ... , "May contain hints about the task"]
    # constraints: Annotated[Optional[List[str]], ... , "Contains constraints that the tool user agent should be aware of"]
    # program_name: Annotated[str, ... , "Program name that the tool uses to finish a task"]
    # args: Annotated[Optional[Dict], ... , "Args that might help the tool user agent"]
    # results: Annotated[Dict, ... , "Title of findings as keys, and a description as their value"]
    # results: str

def _swap_messages(messages):
    new_messages = []
    for m in messages:
        if m['role'] == 'assistant':
            new_messages.append({"role": "user", "content": m['content']})
        else:
            new_messages.append({"role": "assistant", "content": m['content']})
    return new_messages


def evaluator(state: PentestState) -> PentestState:
    system_message = SystemMessage("""
    You are evaluating the response from a pentester agent, 
    if the pentester brings back some results, give him a pass,
    if he did very bad, and you think he can do better, point 
    out his mistake and make him redo it again, if the pentester
    can't do the task. End the program, and let the supervisor know. 
    """)

    model = _get_model().with_structured_output(Eval)

    tool_results = state.get('tool_results', [])
    messages = [system_message, HumanMessage(content=f"evaluate pentester results {tool_results}")]

    try:
        response = model.invoke(messages)
    except Exception as e:
        return {
            'messages': [{'role': 'assistant', 'content': f'Model error during evaluation: {e}'}],
            'evaluation': 'evaluation failed due to error',
            'current_step': 'supervisor',
            'findings': {},
        }

    done = response.get('done', False)
    fail = response.get('fail', False)
    evaluation = response.get('evaluation', '')

    if done or fail:
        evaluation += ' end program now'

    return {
        'messages': [{'role': 'assistant', 'content': response.get('message', '')}],
        'evaluation': evaluation,
        'current_step': 'supervisor' if (done or fail) else 'pentester',
        'findings': response.get('findings', {}),
    }


In [11]:
from langgraph.graph import StateGraph, END

def recorder(state: PentestState) -> PentestState:
    # Logic for recorder node
    return {'report': 'Pentest Report: Critical SQL Injection vulnerability found', 'current_step': 'end'}

def route_supervisor(state: PentestState) -> Literal['pentester', 'evaluator', 'recorder', '__end__']:
    return state['current_step']

def route_pentester(state: PentestState) -> Literal['tools_node', 'evaluator']:
    return 'tools_node' if state['current_step'] == 'pentester' else 'evaluator' 


# Create the graph
workflow = StateGraph(PentestState, config_schema=StateGraph)

# Add nodes
workflow.add_node('supervisor', supervisor)
workflow.add_node('pentester', pentester)
workflow.add_node('tools_node', tools_node)
workflow.add_node('evaluator', evaluator)
workflow.add_node('recorder', recorder)

# Add edges
workflow.set_entry_point('supervisor')
workflow.add_conditional_edges('supervisor', route_supervisor)
workflow.add_conditional_edges('pentester', route_pentester)
workflow.add_edge('tools_node', 'pentester')
workflow.add_edge('evaluator', 'supervisor')
workflow.add_edge('recorder', 'supervisor')

# Compile the graph
pentest_graph = workflow.compile()

In [12]:
# query = "Ping to 8.8.8.8 4 times, and check if the host is up."
# query = "Visit https://dreamhack.io and tell me what this site is about."
query = "Do fast port scanning on 10.10.11.18 to discover which tcp ports are open."

initial_state = {
        "messages": [{"role": "user", "content": query}],
        }


for chunk in pentest_graph.stream(initial_state, stream_mode='values'):
    from pprint import pprint
    pprint(chunk['messages'][-1])

{'content': 'Do fast port scanning on 10.10.11.18 to discover which tcp ports '
            'are open.',
 'role': 'user'}
{'content': 'The pentester should use a tool like nmap to perform a fast port '
            'scan on the target IP address.',
 'role': 'assistant'}
{'content': "I'm going to perform a fast port scan on the target IP address "
            '10.10.11.18 using nmap.',
 'role': 'assistant'}
Executing command:
 ['nmap -F 10.10.11.18']
{'content': 'The nmap scan on the IP address 10.10.11.18 has been completed. '
            'The scan was a fast port scan (-F option).\n'
            '\n'
            'The scan found that the host is up with a latency of 0.25s. Out '
            'of the scanned ports, 98 were closed. However, two ports were '
            'found to be open: Port 22 (ssh) and Port 80 (http).',
 'role': 'assistant'}
{'content': 'The nmap scan on the IP address 10.10.11.18 has been completed. '
            'The scan was a fast port scan (-F option).\n'
         

In [None]:
query = "Find vHost of the current domain usage.htb of which ip is 10.10.11.18."

initial_state = {
        "messages": [{"role": "user", "content": query}],
        }


for chunk in pentest_graph.stream(initial_state, stream_mode='values'):
    from pprint import pprint
    pprint(chunk['messages'][-1])