In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import sys
from pathlib import Path

# Add the parent directory to the system path
sys.path.append(str(Path.cwd().parent))

In [3]:
# Configure the log file path
LOG_FILE_PATH = Path("/Users/suyog/personal/sreAgent/output/logs.log")

In [4]:
from langchain import hub
from langchain.agents import AgentExecutor, create_react_agent
from langchain_openai import ChatOpenAI
from dotenv import load_dotenv

# Import custom tools
from tools.oncall_employees import GetOncallEmployeesTool
from tools.jira import CreateJiraTicketTool
from tools.file import FilteredLogReaderTool
from tools.stack_exchange import StackExchangeTool


def setup_agent():
    """Set up the ReAct agent with the necessary tools."""
    llm = ChatOpenAI(model="gpt-4o-mini")

    # Load ReAct prompt
    prompt = hub.pull("hwchase17/react")

    # Initialize tools
    tools = [
        FilteredLogReaderTool(log_file_path=LOG_FILE_PATH),
        GetOncallEmployeesTool(),
        CreateJiraTicketTool(),
    ]

    # Create the agent
    agent = create_react_agent(llm, tools, prompt)

    # Create the agent executor
    agent_executor = AgentExecutor(
        agent=agent,
        tools=tools,
        verbose=True,
        handle_parsing_errors=True,
        max_iterations=10,
    )

    return agent_executor


agent_executor = setup_agent()

[32m2025-05-02 16:30:13.752[0m | [34m[1mDEBUG   [0m | [36mtools.file[0m:[36m__init__[0m:[36m40[0m - [34m[1mFilteredLogReaderTool initialized with log path: /Users/suyog/personal/sreAgent/output/logs.log[0m


In [5]:
prompt = (
    "Read logs from 2025-05-01 17:44:43 to 2025-05-01 17:44:44 and check for any errors or critical issues. "
    "Use the filtered_log_reader tool with these exact timestamps to only look at new logs since the last check."
    "You need to then idenify the potential cause and the possible solution for each of the error you saw."
    "After idenifying the potential cause and possible solution use get_oncall_employees tool to find filter out on-call employees best suited to handle each error"
    "Finally use create_jira_ticket to create appropriate tickets and assign it to the right employee"
)
response = agent_executor.invoke({"input": prompt})



[1m> Entering new AgentExecutor chain...[0m


[32m2025-05-02 16:30:16.160[0m | [1mINFO    [0m | [36mtools.base[0m:[36m_input_parser[0m:[36m19[0m - [1mParser recived input of type -> <class 'str'> 
 value -> {'from_time': '2025-05-01 17:44:43', 'to_time': '2025-05-01 17:44:44'}  
[0m
[32m2025-05-02 16:30:16.161[0m | [1mINFO    [0m | [36mtools.base[0m:[36m_input_parser[0m:[36m28[0m - [1mChecking for pattern from_time='([^']+)',\s*to_time='([^']+)'[0m
[32m2025-05-02 16:30:16.162[0m | [1mINFO    [0m | [36mtools.file[0m:[36m_run[0m:[36m68[0m - [1mReading logs with filters - from_time: 2025-05-01 17:44:43, to_time: 2025-05-01 17:44:44[0m
[32m2025-05-02 16:30:16.163[0m | [34m[1mDEBUG   [0m | [36mtools.file[0m:[36m_run[0m:[36m84[0m - [34m[1mParsed from_time to 2025-05-01 17:44:43[0m
[32m2025-05-02 16:30:16.164[0m | [34m[1mDEBUG   [0m | [36mtools.file[0m:[36m_run[0m:[36m92[0m - [34m[1mParsed to_time to 2025-05-01 17:44:44[0m
[32m2025-05-02 16:30:16.164[0m | [34m[1mDEBUG   

[32;1m[1;3mTo address the request, I will first utilize the `filtered_log_reader` tool to obtain the log entries between the specified timestamps. I will look for any errors or critical issues within those logs once retrieved.

Action: filtered_log_reader  
Action Input: {'from_time': '2025-05-01 17:44:43', 'to_time': '2025-05-01 17:44:44'}  
[0m[36;1m[1;3m[2025-05-01 17:44:43.055] [ERROR] [RequestID: 141e496c-6f90-4e69-9cd7-10fd536a631c] [host: suyogs-MacBook-Air.local] Unhandled exception during /api/auth request processing: ConnectionResetError: Connection reset by peer while fetching data from cache server
Traceback (most recent call last):
  File "/app/banking/api/controllers.py", line 83, in process_request
    response = await service.execute_transaction(transaction_data)
  File "/app/banking/service/transaction_service.py", line 191, in execute_transaction
    result = await self.repository.commit(validated_data)
  File "/app/banking/repository/transaction_repo.py", line 1

[32m2025-05-02 16:30:20.937[0m | [1mINFO    [0m | [36mtools.base[0m:[36m_input_parser[0m:[36m19[0m - [1mParser recived input of type -> <class 'str'> 
 value -> {'team_name': 'Infrastructure'}  
[0m
[32m2025-05-02 16:30:20.938[0m | [1mINFO    [0m | [36mtools.base[0m:[36m_input_parser[0m:[36m28[0m - [1mChecking for pattern team_name='([^']+)'[0m


[32;1m[1;3mThe retrieved logs show two errors that need to be addressed:

1. **Error 1** (ConnectionResetError):
   - **Description**: An unhandled exception occurred during the `/api/auth` request processing, indicating that there was a "Connection reset by peer while fetching data from the cache server."
   - **Potential Cause**: This could be caused by the cache server restarting or becoming temporarily unavailable.
   - **Possible Solution**: Check the status of the cache server and ensure it is operational. If necessary, restart the cache server.

2. **Error 2** (ConnectionRefusedError):
   - **Description**: An unhandled exception occurred during the `/api/payments` request processing, indicating a failure to connect to the primary database at the specified IP address.
   - **Potential Cause**: The primary database may be down, or there may be network issues preventing access.
   - **Possible Solution**: Verify the status of the database server and check network connectivity to

[32m2025-05-02 16:30:22.724[0m | [1mINFO    [0m | [36mtools.base[0m:[36m_input_parser[0m:[36m19[0m - [1mParser recived input of type -> <class 'str'> 
 value -> {'team_name': 'Reliability'}  
[0m
[32m2025-05-02 16:30:22.725[0m | [1mINFO    [0m | [36mtools.base[0m:[36m_input_parser[0m:[36m28[0m - [1mChecking for pattern team_name='([^']+)'[0m


[32;1m[1;3mWe have identified the potential on-call employees from the Infrastructure team who can assist with the cache server issue.

Next, I will check for the on-call employees suited to address the database connection issue. This typically falls under either Database or a similar team, so I will retrieve the on-call employees for that team as well.

Action: get_oncall_employees  
Action Input: {'team_name': 'Reliability'}  
[0m[33;1m[1;3mCurrent on-call employees:

Name: Carol Davis
Email: carol.davis@example.com
Role: SRE Manager
Team: Reliability

[0m

[32m2025-05-02 16:30:28.051[0m | [1mINFO    [0m | [36mtools.base[0m:[36m_input_parser[0m:[36m19[0m - [1mParser recived input of type -> <class 'str'> 
 value -> {'summary': 'Connection Reset Error during API Auth Requests', 'description': 'There was an unhandled exception during the `/api/auth` request processing. The error message indicates a connection reset by the peer while fetching data from the cache server. Potential causes could be related to the cache server\'s status. Please investigate and ensure the cache server is operational.', 'assignee': 'alice.johnson@example.com'}  
[0m
[32m2025-05-02 16:30:28.052[0m | [1mINFO    [0m | [36mtools.base[0m:[36m_input_parser[0m:[36m28[0m - [1mChecking for pattern summary='([^']+)',\s*description='([^']+)',\s*issue_type='([^']+)',\s*assignee='([^']+)'[0m
[32m2025-05-02 16:30:28.053[0m | [34m[1mDEBUG   [0m | [36mtools.jira[0m:[36m_run[0m:[36m66[0m - [34m[1mConnecting to JIRA at https://suyoginusa.atlassi

[32;1m[1;3mNow that I have the on-call personnel for both issues, I can construct Jira tickets for each error and assign them accordingly.

**For the first error (ConnectionResetError)**:
- **Summary**: Connection Reset Error during API Auth Requests
- **Description**: There was an unhandled exception during the `/api/auth` request processing. The error message indicates a connection reset by the peer while fetching data from the cache server. Potential causes could be related to the cache server's status. Please investigate and ensure the cache server is operational.

Assigned to: Alice Johnson (Senior SRE, Infrastructure)

**For the second error (ConnectionRefusedError)**:
- **Summary**: Connection Refused Error during API Payments Requests
- **Description**: An unhandled exception occurred during the `/api/payments` request processing, indicating a failure to connect to the primary database at IP: 10.45.2.8:5432. Potential causes include the database being down or network issues. 

[32m2025-05-02 16:30:31.077[0m | [1mINFO    [0m | [36mtools.base[0m:[36m_input_parser[0m:[36m19[0m - [1mParser recived input of type -> <class 'str'> 
 value -> {'summary': 'Connection Refused Error during API Payments Requests', 'description': 'An unhandled exception occurred during the `/api/payments` request processing, indicating a failure to connect to the primary database at IP: 10.45.2.8:5432. Potential causes include the database being down or network issues. Please verify the database status and check network connectivity.', 'assignee': 'carol.davis@example.com'}  
[0m
[32m2025-05-02 16:30:31.077[0m | [1mINFO    [0m | [36mtools.base[0m:[36m_input_parser[0m:[36m28[0m - [1mChecking for pattern summary='([^']+)',\s*description='([^']+)',\s*issue_type='([^']+)',\s*assignee='([^']+)'[0m
[32m2025-05-02 16:30:31.078[0m | [34m[1mDEBUG   [0m | [36mtools.jira[0m:[36m_run[0m:[36m66[0m - [34m[1mConnecting to JIRA at https://suyoginusa.atlassian.net wit

[32;1m[1;3mNow, I will create the second Jira ticket related to the database connection issue.

Action: create_jira_ticket  
Action Input: {'summary': 'Connection Refused Error during API Payments Requests', 'description': 'An unhandled exception occurred during the `/api/payments` request processing, indicating a failure to connect to the primary database at IP: 10.45.2.8:5432. Potential causes include the database being down or network issues. Please verify the database status and check network connectivity.', 'assignee': 'carol.davis@example.com'}  
[0m[38;5;200m[1;3mSuccessfully created Jira ticket: CRM-17. View it here: https://suyoginusa.atlassian.net/browse/CRM-17[0m[32;1m[1;3mI have successfully created the required Jira tickets for the errors identified in the logs.

Final Answer: 
1. **Jira Ticket for Connection Reset Error**: 
   - Summary: Connection Reset Error during API Auth Requests
   - Description: There was an unhandled exception during the `/api/auth` reques

In [6]:
response

{'input': 'Read logs from 2025-05-01 17:44:43 to 2025-05-01 17:44:44 and check for any errors or critical issues. Use the filtered_log_reader tool with these exact timestamps to only look at new logs since the last check.You need to then idenify the potential cause and the possible solution for each of the error you saw.After idenifying the potential cause and possible solution use get_oncall_employees tool to find filter out on-call employees best suited to handle each errorFinally use create_jira_ticket to create appropriate tickets and assign it to the right employee',
 'output': "1. **Jira Ticket for Connection Reset Error**: \n   - Summary: Connection Reset Error during API Auth Requests\n   - Description: There was an unhandled exception during the `/api/auth` request processing. The error message indicates a connection reset by the peer while fetching data from the cache server. Potential causes could be related to the cache server's status. Please investigate and ensure the cac