In [10]:
from dotenv import load_dotenv
load_dotenv()

from langchain_groq import ChatGroq
from langchain_core.prompts import ChatPromptTemplate
from pydantic import BaseModel, Field
from typing import Literal
from app.state import GraphState

# --- 1. Define the LLM ---
llm = ChatGroq(model="llama-3.3-70b-versatile", temperature=0)

class Route(BaseModel):
    next: Literal["Threat_Analyst", "Log_Analyst", "end_investigation"]

# --- 2. Engineer the Supervisor's Prompt ---
supervisor_prompt = ChatPromptTemplate.from_template(
"""You are a senior SOC supervisor. Your role is to orchestrate a team of specialist agents.
Based on the investigation state, you must decide the next step.

Decision-making process:
1. If no threat intelligence has been gathered (`intel_available` is 'No'), you must call the `Threat_Analyst`.
2. If threat intelligence is available (`intel_available` is 'Yes') AND logs are present but unanalyzed (`log_summary_available` is 'No'), you must call the `Log_Analyst`.
3. If all analyses are complete, you must choose `end_investigation`.

Here is the current state:
Intel available: {intel_available}
Log Summary available: {log_summary_available}
Trace: {investigation_trace}

What is the next step?"""
)

# --- 3. Create the Supervisor Chain ---
supervisor_chain = supervisor_prompt | llm.with_structured_output(Route)

def run_supervisor(state: GraphState) -> GraphState:
    """Runs the supervisor to decide the next step."""
    print("---SUPERVISOR---")
    response = supervisor_chain.invoke({
        "intel_available": 'Yes' if state.get('intel') else 'No',
        "log_summary_available": 'Yes' if state.get('log_summary') else 'No',
        "investigation_trace": "\n".join(state.get('investigation_trace', [])),
        "alert": state.get('alert'),
        "indicator": state.get('indicator')
    })
    state['next_node'] = response.next
    return state

In [11]:
initial_state = {
    "alert": {"source": "auth_log", "details": "Multiple failed logins from a new IP"},
    "indicator": "198.51.100.10", # A suspicious IP
    "logs": "[2023-10-27 10:00:04] CMD: User 'admin' executed 'cat /etc/passwd' from IP 198.51.100.10.",
    
    # These fields are empty at the start
    "intel": None,
    "investigation_trace": [],
    "next_node": "",
    "log_summary": None
}

updated_state = run_supervisor(initial_state)

---SUPERVISOR---


In [12]:
updated_state

{'alert': {'source': 'auth_log',
  'details': 'Multiple failed logins from a new IP'},
 'indicator': '198.51.100.10',
 'logs': "[2023-10-27 10:00:04] CMD: User 'admin' executed 'cat /etc/passwd' from IP 198.51.100.10.",
 'intel': None,
 'investigation_trace': [],
 'next_node': 'Threat_Analyst',
 'log_summary': None}

In [13]:
from langchain_groq import ChatGroq
from langchain_core.prompts import ChatPromptTemplate
from pydantic import BaseModel, Field
from langchain.agents import AgentExecutor, create_tool_calling_agent

from dotenv import load_dotenv
load_dotenv()

from app.tools import virustotal_ip_lookup
from app.state import GraphState
import json

# --- 1. Define the LLM and the Output Structure ---
llm = ChatGroq(model="llama-3.3-70b-versatile", temperature=0)

class ThreatIntel(BaseModel):
    """Structured output for the Threat Intelligence Analyst agent."""
    summary: str = Field(description="A concise summary of the threat intelligence findings, including reputation and analysis stats.")
    is_malicious: bool = Field(description="The final verdict on whether the indicator is malicious, based on the analysis.")

# --- 2. Create the "Tool User" Agent ---
# This agent's only job is to call the VirusTotal tool.
tool_user_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", "You are an assistant that uses tools to gather information. You must use the virustotal_ip_lookup tool for the given IP address."),
        ("human", "{input}"),
        ("placeholder", "{agent_scratchpad}"),
    ]
)
tool_user_agent = create_tool_calling_agent(llm, [virustotal_ip_lookup], tool_user_prompt)
tool_user_executor = AgentExecutor(agent=tool_user_agent, tools=[virustotal_ip_lookup], verbose=True)


# --- 3. Create the "Formatter" Chain ---
# This chain's only job is to format the raw tool output into our desired structure.
formatter_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", 
         """You are a senior cybersecurity analyst. Your task is to interpret raw tool output and provide a structured analysis.
          Based on the provided data, determine if the indicator is malicious and provide a concise summary.
          You must respond in the format of the `ThreatIntel` tool."""),
        ("human", "Here is the raw data from the threat intelligence tool:\n\n{raw_data}")
    ]
)
# We cleanly apply structured output here, with no tools to cause conflicts.
formatter_chain = formatter_prompt | llm.with_structured_output(ThreatIntel)


# --- 4. Define the Node that Orchestrates the Two Steps ---
def run_threat_analyst(state: GraphState) -> GraphState: # <-- Returns the whole state
    """Executes the threat intelligence analysis."""
    print("---RUNNING THREAT ANALYST---")
    indicator = state["indicator"]
    
    tool_response = tool_user_executor.invoke({"input": indicator})
    raw_data_str = tool_response["output"]
    structured_output = formatter_chain.invoke({"raw_data": raw_data_str})
    
    # Explicitly update the state
    state["intel"] = structured_output
    trace_message = f"Threat Analyst conclusion: The indicator '{indicator}' is {'malicious' if structured_output.is_malicious else 'benign'}."
    state["investigation_trace"].append(trace_message)
    
    return state

In [15]:
updated_state = run_threat_analyst(updated_state)
updated_state

---RUNNING THREAT ANALYST---


[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `virustotal_ip_lookup` with `{'ip_address': '198.51.100.10'}`


[0m[36;1m[1;3m{"ip_address": "198.51.100.10", "reputation": 0, "analysis_stats": {"harmless": 61, "malicious": 0, "suspicious": 0, "undetected": 34}, "is_malicious": false}[0m[32;1m[1;3mThe IP address 198.51.100.10 has a reputation score of 0, indicating that it is not considered malicious. The analysis stats show that 61 out of 95 scanners detected it as harmless, while 0 scanners detected it as malicious, 0 as suspicious, and 34 as undetected.[0m

[1m> Finished chain.[0m


{'alert': {'source': 'auth_log',
  'details': 'Multiple failed logins from a new IP'},
 'indicator': '198.51.100.10',
 'logs': "[2023-10-27 10:00:04] CMD: User 'admin' executed 'cat /etc/passwd' from IP 198.51.100.10.",
 'intel': ThreatIntel(summary='The IP address 198.51.100.10 has a reputation score of 0, with 61 out of 95 scanners detecting it as harmless and 0 as malicious.', is_malicious=False),
 'investigation_trace': ["Threat Analyst conclusion: The indicator '198.51.100.10' is benign."],
 'next_node': 'Threat_Analyst',
 'log_summary': None}

In [16]:
updated_state = run_supervisor(updated_state)
updated_state

---SUPERVISOR---


{'alert': {'source': 'auth_log',
  'details': 'Multiple failed logins from a new IP'},
 'indicator': '198.51.100.10',
 'logs': "[2023-10-27 10:00:04] CMD: User 'admin' executed 'cat /etc/passwd' from IP 198.51.100.10.",
 'intel': ThreatIntel(summary='The IP address 198.51.100.10 has a reputation score of 0, with 61 out of 95 scanners detecting it as harmless and 0 as malicious.', is_malicious=False),
 'investigation_trace': ["Threat Analyst conclusion: The indicator '198.51.100.10' is benign."],
 'next_node': 'Log_Analyst',
 'log_summary': None}

In [17]:
from langchain_groq import ChatGroq
from langchain_core.prompts import ChatPromptTemplate
from pydantic import BaseModel, Field

from dotenv import load_dotenv
load_dotenv()

from app.state import GraphState

llm = ChatGroq(model="llama-3.3-70b-versatile", temperature=0)

class LogAnalysis(BaseModel):
    """Structured Output for the Log Analysis agent."""
    summary: str = Field(description="A summary of the key events found in the logs and an analysis of their significance")
    contains_anomaly: bool = Field(description="The final verdict on whether the logs contain any anomalous or suspicious activity.")

log_analyst_prompt = ChatPromptTemplate.from_messages([
    ('system', 
    """You are a senior cybersecurity analyst specializing in log analysis. Your task is to meticulously review the provided logs and identify any suspicious or anomalous activity.

        Look for patterns such as:
        - Multiple failed login attempts followed by a success.
        - Access to sensitive files or directories.
        - Unusual commands being executed.
        - Connections from unexpected IP addresses or ports.

        Summarize the key events and provide a final verdict on whether an anomaly is present.
        You must respond in the format of the `LogAnalysis` tool."""),

        ('human', "Please analyze the following logs related to the alert:\n\n{logs}")
])

log_analyst_chain = log_analyst_prompt | llm.with_structured_output(LogAnalysis)

def run_log_analyst(state: GraphState) -> GraphState: # <-- Returns the whole state
    """Executes the log analysis agent."""
    print("---RUNNING LOG ANALYST---")
    logs = state.get("logs", "")
    if not logs:
        return state # Do nothing if there are no logs
        
    response = log_analyst_chain.invoke({"logs": logs})

    # Explicitly update the state
    state["log_summary"] = response
    trace_message = f"Log Analyst conclusion: Anomaly detected: {response.contains_anomaly}."
    state["investigation_trace"].append(trace_message)

    return state

In [18]:
updated_state = run_log_analyst(updated_state)
updated_state

---RUNNING LOG ANALYST---


{'alert': {'source': 'auth_log',
  'details': 'Multiple failed logins from a new IP'},
 'indicator': '198.51.100.10',
 'logs': "[2023-10-27 10:00:04] CMD: User 'admin' executed 'cat /etc/passwd' from IP 198.51.100.10.",
 'intel': ThreatIntel(summary='The IP address 198.51.100.10 has a reputation score of 0, with 61 out of 95 scanners detecting it as harmless and 0 as malicious.', is_malicious=False),
 'investigation_trace': ["Threat Analyst conclusion: The indicator '198.51.100.10' is benign.",
  'Log Analyst conclusion: Anomaly detected: False.'],
 'next_node': 'Log_Analyst',
 'log_summary': LogAnalysis(summary="The logs show a user 'admin' executing the command 'cat /etc/passwd' from IP 198.51.100.10, which could indicate an attempt to access sensitive information. However, this command is not typically malicious by itself and 'cat /etc/passwd' is a common command for viewing user information. The IP address appears to be a valid IP address, but without more context, it's difficult t

In [19]:
updated_state = run_supervisor(updated_state)
updated_state

---SUPERVISOR---


{'alert': {'source': 'auth_log',
  'details': 'Multiple failed logins from a new IP'},
 'indicator': '198.51.100.10',
 'logs': "[2023-10-27 10:00:04] CMD: User 'admin' executed 'cat /etc/passwd' from IP 198.51.100.10.",
 'intel': ThreatIntel(summary='The IP address 198.51.100.10 has a reputation score of 0, with 61 out of 95 scanners detecting it as harmless and 0 as malicious.', is_malicious=False),
 'investigation_trace': ["Threat Analyst conclusion: The indicator '198.51.100.10' is benign.",
  'Log Analyst conclusion: Anomaly detected: False.'],
 'next_node': 'end_investigation',
 'log_summary': LogAnalysis(summary="The logs show a user 'admin' executing the command 'cat /etc/passwd' from IP 198.51.100.10, which could indicate an attempt to access sensitive information. However, this command is not typically malicious by itself and 'cat /etc/passwd' is a common command for viewing user information. The IP address appears to be a valid IP address, but without more context, it's diffi