# CloudAEye Simple Agent + Tool Log Analyzer

In [2]:
%pip install -q langchain_community langchain_core langchain_groq langgraph
%pip install -q google-api-python-client

import os

from langchain_community.utilities import GoogleSearchAPIWrapper
from langchain_core.tools import Tool

from typing import TypedDict, Annotated, List
from langgraph.graph import StateGraph, END

from langchain.prompts import PromptTemplate

from langchain_core.output_parsers import StrOutputParser, JsonOutputParser
from langchain_groq import ChatGroq

chat = ChatGroq(
    model="llama3-70b-8192",
    groq_api_key = ''
)

search = GoogleSearchAPIWrapper(google_api_key='',
                                google_cse_id="",
                                k=1)

tool = Tool(
    name="google_search",
    description="Search Google for recent results.",
    func=search.run,
)


  warn_deprecated(


## Creating function to isolate logs at User-Provided TimeStamp

In [3]:
## Example logs:

log_data = """
 01
03/22 08:51:01 INFO   :.main: *************** RSVP Agent started ***************
 02
03/22 08:51:01 INFO   :...locate_configFile: Specified configuration file: /u/user10/rsvpd1.conf
03/22 08:51:01 INFO   :.main: Using log level 511
03/22 08:51:01 INFO   :..settcpimage: Get TCP images rc - EDC8112I Operation not supported on socket.
 03
03/22 08:51:01 INFO   :..settcpimage: Associate with TCP/IP image name = TCPCS
03/22 08:51:02 INFO   :..reg_process: registering process with the system
03/22 08:51:02 INFO   :..reg_process: attempt OS/390 registration
03/22 08:51:02 INFO   :..reg_process: return from registration rc=0
 04
03/22 08:51:06 TRACE  :...read_physical_netif: Home list entries returned = 7
03/22 08:51:06 INFO   :...read_physical_netif: index #0, interface VLINK1 has address 129.1.1.1, ifidx 0
03/22 08:51:06 INFO   :...read_physical_netif: index #1, interface TR1 has address 9.37.65.139, ifidx 1
03/22 08:51:06 INFO   :...read_physical_netif: index #2, interface LINK11 has address 9.67.100.1, ifidx 2
03/22 08:51:06 INFO   :...read_physical_netif: index #3, interface LINK12 has address 9.67.101.1, ifidx 3
03/22 08:51:06 INFO   :...read_physical_netif: index #4, interface CTCD0 has address 9.67.116.98, ifidx 4
03/22 08:51:06 INFO   :...read_physical_netif: index #5, interface CTCD2 has address 9.67.117.98, ifidx 5
03/22 08:51:06 INFO   :...read_physical_netif: index #6, interface LOOPBACK has address 127.0.0.1, ifidx 0
03/22 08:51:06 INFO   :....mailslot_create: creating mailslot for timer
03/22 08:51:06 INFO   :...mailbox_register: mailbox allocated for timer
 05
03/22 08:51:06 INFO   :.....mailslot_create: creating mailslot for RSVP
03/22 08:51:06 INFO   :....mailbox_register: mailbox allocated for rsvp
03/22 08:51:06 INFO   :.....mailslot_create: creating mailslot for RSVP via UDP
 06
03/22 08:51:06 WARNING:.....mailslot_create: setsockopt(MCAST_ADD) failed - EDC8116I Address not available.
03/22 08:51:06 INFO   :....mailbox_register: mailbox allocated for rsvp-udp
03/22 08:51:06 TRACE  :..entity_initialize: interface 129.1.1.1, entity for rsvp allocated and initialized
03/22 08:51:06 INFO   :.....mailslot_create: creating mailslot for RSVP
03/22 08:51:06 INFO   :....mailbox_register: mailbox allocated for rsvp
03/22 08:51:06 INFO   :.....mailslot_create: creating mailslot for RSVP via UDP
03/22 08:51:06 WARNING:.....mailslot_create: setsockopt(MCAST_ADD) failed - EDC8116I Address not available.
03/22 08:51:06 INFO   :....mailbox_register: mailbox allocated for rsvp-udp
03/22 08:51:06 TRACE  :..entity_initialize: interface 9.37.65.139, entity for rsvp allocated and
initialized
"""

def get_nearby_logs(logs, timestamp, num_lines=2):
    list_logs = logs.strip().split("\n")
    res = []
    i = 0
    while i < len(list_logs):
        if list_logs[i].startswith(timestamp):
            start_index = max(0, i - num_lines)
            end_index = min(len(list_logs) - 1, i + num_lines + 1)
            for string in list_logs[start_index:end_index]:
                res.append(string)

            i += (2 * num_lines + 1)
        else:
            i += 1

    return res

timestamp = "03/22 08:51:06"

nearby_logs = get_nearby_logs(log_data, timestamp)

to_str = "\n".join(nearby_logs)
nearby_logs

['03/22 08:51:02 INFO   :..reg_process: return from registration rc=0',
 ' 04',
 '03/22 08:51:06 TRACE  :...read_physical_netif: Home list entries returned = 7',
 '03/22 08:51:06 INFO   :...read_physical_netif: index #0, interface VLINK1 has address 129.1.1.1, ifidx 0',
 '03/22 08:51:06 INFO   :...read_physical_netif: index #1, interface TR1 has address 9.37.65.139, ifidx 1',
 '03/22 08:51:06 INFO   :...read_physical_netif: index #2, interface LINK11 has address 9.67.100.1, ifidx 2',
 '03/22 08:51:06 INFO   :...read_physical_netif: index #3, interface LINK12 has address 9.67.101.1, ifidx 3',
 '03/22 08:51:06 INFO   :...read_physical_netif: index #4, interface CTCD0 has address 9.67.116.98, ifidx 4',
 '03/22 08:51:06 INFO   :...read_physical_netif: index #5, interface CTCD2 has address 9.67.117.98, ifidx 5',
 '03/22 08:51:06 INFO   :...read_physical_netif: index #6, interface LOOPBACK has address 127.0.0.1, ifidx 0',
 '03/22 08:51:06 INFO   :...mailbox_register: mailbox allocated for ti

## Categorizing Logs

### After selecting only nearby logs, we use a llama3 model to filter anamolous logs

In [4]:
# Categorize logs

prompt = PromptTemplate(
    template="""system
You are a Log Categorizer Agent. You excel at interpreting log messages. Your task is to categorize the given log into one of two categories: \\

- 'anamolous_log': Use this category for any log that indicates a warning or error.\
- 'standard_log': Use this category for logs that provide information about an ongoing process or indicate a process starting or completing.\\

Output a single category only from the types ('anamolous_log', 'standard_log').\
Example output:
'anamolous_log'\

LOG CONTENT:\n\n{initial_log}\n\n

assistant
""",
    input_variables=["initial_log"],
)


log_category_generator = prompt | chat | StrOutputParser()

try:
    final_logs = []
    for log in nearby_logs:
        log_category = str(log_category_generator.invoke({"initial_log": log}))
        if log_category == "'anamolous_log'":
            final_logs.append(log)
            print(log)
except Exception as e:
    print(e)
    pass





## Generating Search Tool Keywords

In [5]:
# Search keywords
search_keyword_prompt = PromptTemplate(
    template="""system
You are an expert at identifying the best keywords for a web search to find information about a log.

Given the INITIAL_LOG, determine the most effective keywords that will help in explaining and providing potential fixes for the warning or error.

Ignore specific memory addresses, IP addresses, and other details that are not useful for a general search.

Return a JSON object with a single key 'keywords' containing no more than 5 keywords. Provide no preamble or explanation.

user
INITIAL_LOG: {initial_log} \n
""",
    input_variables=["initial_log"],
)

search_keyword_chain = search_keyword_prompt | chat | JsonOutputParser()

for log in final_logs:
    search_keywords = search_keyword_chain.invoke({"initial_log": log})
    print(search_keywords)

{'keywords': ['mailslot_create', 'setsockopt', 'MCAST_ADD', 'EDC8116I', 'Address not available']}
{'keywords': ['setsockopt', 'MCAST_ADD', 'EADDRNOTAVAI', 'multicast', 'socket']}


## Creating the First Draft
### Using information from the google tool, this agent creates the initial draft explaining each anamalous log. It will also use previous explanations to help generate later explanations

In [6]:
explanation_writer_prompt = PromptTemplate(
    template="""system
    You are the solution WRITER Agent which takes the anomalous INITIAL_LOG below and the NEARBY_LOGS surrounding the anomalous log.\
    Write an explanation for the anomalous log and possible solutions to the problem within the context of the NEARBY_LOGS.\

    You must take into account the subsequent outputs in the NEARBY_LOGS to determine whether the potential issue has been automatically resolved.
    If it has, you can simply let the user know there is likely no need for them to alter their code and do not go on to provide a solution.\

    If a previous explanation is highly related, you must, before saying anything else, acknowledge that the issue is similar to a previous one and keep the explanation and solution extremely brief by paraphrasing the previous explanation.\

    If the provided RESEARCH_INFO is relevant to the log, you should use it to help you write the explanation and solution.\

    You never make up information that hasn't been provided by the INITIAL_LOG, RESEARCH_INFO, PREVIOUS EXPLANATIONS, or in the NEARBY_LOGS.

    Return the string as a JSON with a single key 'log_explanation'.

    user
    INITIAL_LOG: {initial_log} \n
    NEARBY_LOGS: {nearby_logs} \n
    PREVIOUS_EXPLANATIONS: {prev_explanations} \n
    RESEARCH_INFO: {research_info} \n
    assistant""",
    input_variables=["initial_log", 'nearby_logs', 'prev_explanations', "research_info"],
)

draft_writer_chain = explanation_writer_prompt | chat | JsonOutputParser()

try:
    prev_explanations = ""
    for log in final_logs:
        curr_explanation = draft_writer_chain.invoke({"initial_log": log, "nearby_logs":to_str,"research_info":None, "prev_explanations": prev_explanations})
        print(curr_explanation)
        prev_explanations += curr_explanation["log_explanation"] + "\n"
except Exception as e:
    print(e)
    pass

{'log_explanation': 'This issue is similar to a previous one. The anomalous log indicates that the `setsockopt(MCAST_ADD)` function failed with error `EDC8116I Address not available`. This suggests that the system is trying to add a multicast address to a socket, but the address is not available. Since there are no subsequent error messages or indications of a critical failure, it is likely that the system has automatically recovered from this issue. Therefore, there may be no need for the user to alter their code.'}


## Critiquing Agent
### This agent reads the outputs drafted and decides whether to send them to agent that rewrites the explanantion

In [7]:
rewrite_router_prompt = PromptTemplate(
    template="""system
You are an expert at evaluating the quality of explanations/solutions for software logs intended to help developers improve their code. Your task is to determine whether the DRAFT_EXPLANATION needs to be rewritten.

Use the following criteria to make your decision:

1. If the INITIAL_LOG only requires a simple response which the DRAFT_EXPLANATION contains, then it doesn't need to be rewritten.
2. If the DRAFT_EXPLANATION addresses all the concerns of the INITIAL_LOG, then it doesn't need to be rewritten.
3. If the DRAFT_EXPLANATION is missing information that the INITIAL_LOG requires, then it needs to be rewritten.

Ensure that the explanation is clear and concise. It need not be too wordy.

Give a binary choice: 'rewrite' (for needs to be rewritten) or 'no_rewrite' (for doesn't need to be rewritten) based on the DRAFT_EXPLANATION and the criteria.

Return a JSON object with a single key 'router_decision' and no preamble or explanation.

user
INITIAL_LOG: {initial_log} \n
DRAFT_EXPLANATION: {draft_explanation} \n
assistant
""",
    input_variables=["initial_log", "draft_explanation"],
)

rewrite_router = rewrite_router_prompt | chat | JsonOutputParser()

try:
    draft_explanation = "This log is bananas lmao."

    print(rewrite_router.invoke({"initial_log": final_logs[0], "draft_explanation":draft_explanation, "nearby_logs":to_str}))
except Exception as e:
    print(e)
    pass

{'router_decision': 'rewrite'}


## Draft Analysis Agent

### Agent that suggests pointers to improve current draft's explanation of log

In [8]:
draft_analysis_prompt = PromptTemplate(
    template="""system
You are the Quality Control Agent tasked with analyzing the INITIAL_LOG from a software's log dump, the DRAFT_EXPLANATION from the solution writer agent, and the RESEARCH_INFO from the research agent.

Your goal is to assess how well the DRAFT_EXPLANATION addresses the issues in the INITIAL_LOG and whether it provides an effective solution.

Specifically, check if the DRAFT_EXPLANATION:
1. Addresses the potential issues identified in the log.
2. Provides a clear and effective solution.

Give feedback on how the explanation can be improved, specifying what should be added or changed to make it more effective at addressing the software's issues.

Do not make up or add any information that hasn't been provided in the RESEARCH_INFO or INITIAL_LOG.

Return the analysis as a JSON object with a single key 'draft_analysis' and no preamble or explanation.

user
INITIAL_LOG: {initial_log} \n\n
RESEARCH_INFO: {research_info} \n\n
DRAFT_EXPLANATION: {draft_explanation} \n\n
PREVIOUS_EXPLANATIONS: {prev_explanations} \n\n
assisstant
""",
    input_variables=["initial_log", "research_info", "draft_explanation", "num_steps", "prev_explanations"],
)

draft_analysis_chain = draft_analysis_prompt | chat | JsonOutputParser()

try:
    draft_explanation = "Yo we can't help you with this log."

    explanation_analysis = draft_analysis_chain.invoke({"initial_log": final_logs[0],
                                    "research_info":"",
                                    "draft_explanation": draft_explanation, })

    print(explanation_analysis)
except Exception as e:
    print(e)
    pass

"Input to PromptTemplate is missing variables {'prev_explanations'}.  Expected: ['draft_explanation', 'initial_log', 'prev_explanations', 'research_info'] Received: ['initial_log', 'research_info', 'draft_explanation']"


## Rewriting Agent

### Final node of the graph. Takes a draft and rewrites it to address issues raised by draft_analysis agent

In [9]:
rewrite_explanation_prompt = PromptTemplate(
    template="""system
You are the Final Explanation Agent. Your task is to read the log analysis from the QC Agent and use it to rewrite and improve the draft_explanation, creating a final explanation.

Do not make up or add any information that hasn't been provided by the research_info, nearby_logs, or initial_log.

Consider the outputs in the NEARBY_LOGS to determine whether the potential issue has been automatically resolved.

Return the final explanation as a JSON object with a single key 'final_explanation' which is a string, and no preamble or explanation.

user
RESEARCH_INFO: {research_info} \n\n
DRAFT_EXPLANATION: {draft_explanation} \n\n
DRAFT_EXPLANATION_FEEDBACK: {explanation_analysis} \n\n
NEARBY_LOGS: {nearby_logs} \n\n
assisstant
""",
    input_variables=["initial_log", "research_info", "explanation_analysis", "draft_explanation", "nearby_logs"],
)


rewrite_chain = rewrite_explanation_prompt | chat | JsonOutputParser()


try:
    research_info = None
    draft_explanation = "Yo we can't help you with this log."

    final_explanation = rewrite_chain.invoke({"initial_log": final_logs[0],
                                    "research_info":research_info,
                                    "draft_explanation": draft_explanation,
                                    "nearby_logs":to_str,
                                    "explanation_analysis": explanation_analysis["draft_analysis"]})

    print(final_explanation)
except Exception as e:
    print(e)
    pass

name 'explanation_analysis' is not defined


## Defining Graph State and Nodes

In [10]:
class GraphState_pt1(TypedDict):
    """
    Represents the state of our graph.

    Attributes:
        initial_log: log output
        explanation: LLM generation
        research_info: list of documents
        nearby_logs: str of logs
        num_steps: number of steps
    """
    initial_log : str
    explanation : str
    prev_explanations : str
    nearby_logs : str
    research_info : List[str]
    num_steps : int

class GraphSate_pt2(TypedDict):
    initial_log : str
    draft_explanation: str
    nearby_logs: str
    prev_explanations: str
    research_info : List[str]
    draft_explanation_feedback: str
    final_explanation: str
    num_steps: int


pt1 = StateGraph(GraphState_pt1)
pt2 = StateGraph(GraphSate_pt2)

In [11]:
def categorize_log(state):
    """Decide if log is standard or anamolous"""
    print("---CATEGORIZING INITIAL LOG---")
    initial_log = state['initial_log']
    num_steps = int(state['num_steps'])
    num_steps += 1

    log_category = log_category_generator.invoke({"initial_log": initial_log})
    if log_category != "'anamolous_log'":
      print("No problems here.")

    return {"log_category": log_category, "num_steps":num_steps}

pt1.add_node("categorize_log", categorize_log)

In [12]:
def research_info_search(state):
    print("---SEARCHING FOR RESEARCH INFO---")
    initial_log = state['initial_log']
    num_steps = int(state['num_steps'])
    num_steps += 1
    full_searches = []
    search_keywords = search_keyword_chain.invoke({"initial_log": initial_log})
    for keyword in search_keywords["keywords"]:
        temp_docs = tool.invoke({"query": keyword})

        if full_searches:
            full_searches.append(temp_docs)
        else:
            full_searches = [temp_docs]
    print(full_searches)
    return {"research_info": full_searches, "num_steps":num_steps}

pt1.add_node("research_info_search", research_info_search)

In [13]:
def draft_explanation_writer(state):
    print("---WRITING DRAFT EXPLANATION---")
    initial_log = state['initial_log']
    nearby_logs = state['nearby_logs']
    prev_explanations = state['prev_explanations']
    research_info = state['research_info']
    num_steps = int(state['num_steps'])
    num_steps += 1

    draft_explanation = draft_writer_chain.invoke({"initial_log": initial_log, "nearby_logs": nearby_logs,"research_info": research_info, "prev_explanations": prev_explanations})

    draft_explanation = draft_explanation['log_explanation']

    return {"explanation": draft_explanation}

pt1.add_node("draft_explanation_writer", draft_explanation_writer)

In [14]:
def analyze_draft_explanation(state):
    print("---ANALYZING DRAFT EXPLANATION---")
    initial_log = state['initial_log']
    draft_explanation = state['draft_explanation']
    prev_explanations = state['prev_explanations']
    research_info = state['research_info']
    num_steps = int(state['num_steps'])
    num_steps += 1

    explanation_analysis = draft_analysis_chain.invoke({"initial_log": initial_log,
                                 "research_info":research_info,
                                 "draft_explanation": draft_explanation, "prev_explanations": prev_explanations})

    explanation_analysis = explanation_analysis['draft_analysis']

    return {"draft_explanation_feedback": explanation_analysis, "num_steps":num_steps}

pt2.add_node("analyze_draft_explanation", analyze_draft_explanation)

In [15]:
def rewrite_explanation(state):
    print("---REWRITING EXPLANATION---")
    initial_log = state['initial_log']
    draft_explanation = state['draft_explanation']
    explanation_analysis = state['draft_explanation_feedback']
    research_info = state['research_info']
    nearby_logs = state['nearby_logs']
    num_steps = int(state['num_steps'])
    num_steps += 1

    final_explanation = rewrite_chain.invoke({"initial_log": initial_log,
                                 "research_info":research_info,
                                 "draft_explanation": draft_explanation,
                                 "explanation_analysis": explanation_analysis, "nearby_logs": nearby_logs})

    final_explanation = final_explanation['final_explanation']

    return {"final_explanation": final_explanation, "num_steps":num_steps}

pt2.add_node("rewrite_explanation", rewrite_explanation)

In [16]:
# def no_rewrite(state):
#     print("---NO REWRITE NEEDED---")
#     draft_explanation = state['draft_explanation']
#     num_steps = int(state['num_steps'])
#     num_steps += 1

#     prev_explanations = state['prev_explanations'] + draft_explanation + "\n"

#     print(f"Final Explanation: {draft_explanation}")

#     return {"final_explanation": draft_explanation, "prev_explanations": prev_explanations, "num_steps":num_steps}


# pt2.add_node("no_rewrite", no_rewrite)

In [17]:
def state_printer(state):
    print(f"Initial Log: {state['initial_log']}")
    print(f"Draft Explanation: {state['draft_explanation']}")
    print(f"Final Explanation: {state['final_explanation']}")
    print(f"Research Info: {state['research_info']}")
    print(f"Num Steps: {state['num_steps']}")
    return

pt2.add_node("state_printer", state_printer)

## Linking the Nodes

In [18]:
pt1.set_entry_point("categorize_log")

# if categorize log is anamolous, search for research info, otherwise END
pt1.add_conditional_edges("categorize_log", lambda state: state["log_category"] == "'anamolous_log'", {
    True: "research_info_search",
    False: END
})

pt1.add_edge("research_info_search", "draft_explanation_writer")

# if draft requires rewrite, send to analyze draft explanation

pt2.set_entry_point("analyze_draft_explanation")

pt2.add_edge("analyze_draft_explanation", "rewrite_explanation")

pt2.add_edge("rewrite_explanation", "state_printer")

pt2.add_edge("state_printer", END)

In [19]:
# Compile
graph1 = pt1.compile()

graph2 = pt2.compile()

## Demo

In [20]:
if not timestamp:
    timestamp = input("Enter timestamp: ")

nearby_logs = "\n".join(get_nearby_logs(log_data, timestamp))
prev_explanations = ""

for log in log_data.split("\n"):
  inputs = {"initial_log": log, "nearby_logs": nearby_logs, "num_steps": 0, "prev_explanations": prev_explanations}
  state = graph1.invoke(inputs)
  if 'explanation' in state.keys():
    out = state['explanation']
    print(f"Anamolous Log: {log}")
    print(f"Draft Explanation: {out}")

    decision = rewrite_router.invoke({"initial_log": log, "draft_explanation": out})["router_decision"]

    if decision == "rewrite":
      state2 = graph2.invoke({"initial_log": log, "draft_explanation": out, "nearby_logs": nearby_logs, "prev_explanations": prev_explanations, "num_steps" : state['num_steps'], "research_info": state['research_info']})
      out = state2['final_explanation']
    else:
      prev_explanations += out + "\n"
      out = state['explanation']
      print("No need to rewrite")
    print(f"Final Explanation: {out}")




---CATEGORIZING INITIAL LOG---
No problems here.
---CATEGORIZING INITIAL LOG---
No problems here.
---CATEGORIZING INITIAL LOG---
No problems here.
---CATEGORIZING INITIAL LOG---
No problems here.
---CATEGORIZING INITIAL LOG---
No problems here.
---CATEGORIZING INITIAL LOG---
No problems here.
---CATEGORIZING INITIAL LOG---
---SEARCHING FOR RESEARCH INFO---
['Jun 15, 2018 ... ... : EZA2590E listen error from initDsConnection - EDC8112I Operation not supported on socket. (errno2=0x744C7332)', 'Mar 31, 2016 ... Amazingly, nothing in the python socket docs for listen --- socket.listen([backlog]) Enable a server to accept connections. If backlog is\xa0...', 'Apr 15, 2009 ... 5 Answers 5 ... You should consider using Real-time Transport Protocol (aka RTP). The underlying IP protocol used by RTP is UDP, but it has\xa0...', '... settcpimage: Get TCP images rc - EDC8112I Operation not supported on socket. 03 03/22 08:51:01 INFO :..settcpimage: Associate with TCP/IP image name = TCPCS\xa0...', '