# Agent 4: Data Analyzer
**`Data Analyzer`** is an agent focused on data analysis, responsible for extracting valuable information from structured data (such as CSV files exported from databases), supporting two main tasks:

1. **Anomaly Detection**: Identifying potential anomalous events in network security (such as abnormal traffic, unauthorized access, or attack behaviors).
2. **Trend Prediction**: Predicting future security situations based on historical data (such as attack frequency trends, network traffic changes, and potential threat activity).

Working in conjunction with **TimeGPT** (time series analysis tool), **Data Analyzer** can generate intuitive analysis results and return them in visual or data format to assist with network security decision-making.

In [1]:
# load environment variables from the .env file
from dotenv import load_dotenv

load_dotenv()

True

## Define user tools

In [2]:
import os
import json
import pandas as pd
from typing import Any, Optional, Literal, Set, Callable
from nixtla import NixtlaClient


def analyze_data(file_path: str,
                 method: Literal['forecast', 'anomaly_detection'],
                 horizon: Optional[int] = 0,
                 ftSteps: Optional[int] = 0) -> str:
    """
    Analyze a dataset using the specified analysis method (forecasting or anomaly detection).

    This function reads a pre-defined dataset based on the provided `file_name` argument, 
    performs the selected analysis method (`forecast` or `anomaly_detection`), and 
    returns the results in JSON format. If forecasting is selected, it generates 
    predictions over a specified horizon and optionally uses fine-tuning steps. 
    If anomaly detection is selected, it identifies anomalies in the data.

    :param file_name: The name of the dataset to analyze. 

    :param method: The analysis method to use. Must be one of ['forecast', 'anomaly_detection']:
                   - 'forecast': Predicts future values based on historical data.
                   - 'anomaly_detection': Detects anomalies in the data.

    :param horizon: (Optional) The forecast horizon, i.e., the number of future steps to predict.
                    Only applicable when `method='forecast'`. Default is 0.

    :param ftSteps: (Optional) The number of fine-tuning steps to apply during forecasting.
                    Only applicable when `method='forecast'`. Default is 0.

    :return: A JSON string containing the results of the analysis. The structure of the output depends on
             the selected method:
             - For `forecast`, it includes forecasted values and a visualization.
             - For `anomaly_detection`, it includes detected anomalies and a visualization.
             If an error occurs, the returned JSON contains an error message.
    :rtype: str
    """
    
    if os.getenv("TIME_GEN_ENDPOINT") is None or os.getenv("TIME_GEN_KEY") is None:
        if method == 'forecast':
            return "./figures/Intrusion_Attempts_forecast_plot.png"
        else:
            return "./figures/Incident_Detection_Rate_anomalies_plot.png"
    
    nixtla_client = NixtlaClient(
        base_url=os.getenv("TIME_GEN_ENDPOINT"),
        api_key=os.getenv("TIME_GEN_KEY"),
    )

    if file_path is not None:
        file_path = file_path if file_path.endswith(
            'csv') else f'./data/{file_path}.csv'
        file_name = file_path.split('/')[-1].split('.')[0]
        df = pd.read_csv(file_path)

        # Initialize variables
        horizon_int = None
        ftSteps_int = None

        try:
            horizon_int = int(horizon)
        except (ValueError, TypeError) as e:
            return f"Error converting 'horizon': {e}"

        try:
            ftSteps_int = int(ftSteps)
        except (ValueError, TypeError) as e:
            return f"Error converting 'ftSteps': {e}"

        if ftSteps is None:
            ftSteps_int = 0

        try:
            if method == 'forecast':
                forecast_df = nixtla_client.forecast(
                    df=df,
                    h=horizon_int,
                    finetune_steps=ftSteps_int,
                    time_col="timestamp",
                    target_col="value",
                )

                fig = nixtla_client.plot(
                    df=df, forecasts_df=forecast_df, time_col="timestamp", target_col="value"
                )

                ax = fig.axes[0]
                ax.legend(["Actual Values", "Forecasted Values"],
                          loc="upper right", bbox_to_anchor=(1.14, 1), borderaxespad=0)
                ax.set_title(f"{file_name}: Forecasted vs Actual Values", fontsize=18,
                             fontweight="bold", color="teal", pad=15)

                fig_name = f"./figures/{file_name}_forecast_plot.png"
                fig.savefig(fig_name, dpi=300)

                return fig_name
            elif method == "anomaly_detection":
                anomalies_df = nixtla_client.detect_anomalies(
                    df,
                    time_col="timestamp",
                    target_col="value",
                    freq="D",
                )
                anomalies_df = anomalies_df.rename(columns={
                    "TimeGEN": "TimeGPT",
                    "TimeGEN-lo-99": "TimeGPT-lo-99",
                    "TimeGEN-hi-99": "TimeGPT-hi-99"
                })

                fig = nixtla_client.plot(
                    df, anomalies_df, time_col="timestamp", target_col="value")

                ax = fig.axes[0]
                ax.legend(["Actual Values", "TimeGPT", "TimeGPT_level_99", "TimeGPT_anomalies_level_99"],
                          loc="upper right", bbox_to_anchor=(1.21, 1), borderaxespad=0)
                ax.set_title(f"{file_name}: Anomalies on Actual Values", fontsize=18,
                             fontweight="bold", color="teal", pad=15)

                fig_name = f"./figures/{file_name}_anomalies_plot.png"
                fig.savefig(fig_name, dpi=300)

                return fig_name
            else:
                return "Invalid method specified."
        except Exception as e:
            return json.dumps({"error": str(e)})
    else:
        return "No data file provided."



In [None]:
analyze_data("./data/intrusion_attempts_20250105_095639.csv", 'forecast', horizon=10)

In [None]:
analyze_data("./data/Incident Detection Rate.csv", 'anomaly_detection')

## Create Agent

In [5]:
import os
import logging
from azure.ai.projects import AIProjectClient
from azure.identity import DefaultAzureCredential
from azure.ai.projects.models import FunctionTool


project_client = AIProjectClient.from_connection_string(
    credential=DefaultAzureCredential(), conn_str=os.environ["AIPROJECT_CONNECTION_STRING"]
)

logging.getLogger('azure.core.pipeline.policies.http_logging_policy').setLevel(logging.WARNING)

INFO:azure.identity._credentials.environment:No environment configuration found.
INFO:azure.identity._credentials.managed_identity:ManagedIdentityCredential will use IMDS


In [6]:
from typing import Any, Set, Callable
from azure.ai.projects.models import FunctionTool


user_functions: Set[Callable[..., Any]] = {analyze_data}
functions = FunctionTool(functions=user_functions)
print(functions.definitions)

[{'type': 'function', 'function': {'name': 'analyze_data', 'description': 'Analyze a dataset using the specified analysis method (forecasting or anomaly detection).', 'parameters': {'type': 'object', 'properties': {'file_path': {'type': 'string', 'description': 'No description'}, 'method': {'type': 'string', 'description': "The analysis method to use. Must be one of ['forecast', 'anomaly_detection']:"}, 'horizon': {'type': ['integer', 'null'], 'description': '(Optional) The forecast horizon, i.e., the number of future steps to predict.'}, 'ftSteps': {'type': ['integer', 'null'], 'description': '(Optional) The number of fine-tuning steps to apply during forecasting.'}}, 'required': ['file_path', 'method']}}}]


In [7]:
data_analyzer = project_client.agents.create_agent(
    model=os.environ["CHAT_MODEL"],
    name="data_analyzer",
    description="An agent that analyzes data using forecasting or anomaly detection.",
    instructions="Hello, you are helpful assistant.",
    tools=functions.definitions,
    # Parameters
    temperature=0.7,
    top_p=0.95,
    # Metadata
    metadata={"group": "internet_threat_analysis"},
)

print(f"Created agent, agent ID: {data_analyzer.id}")

INFO:azure.identity._credentials.chained:DefaultAzureCredential acquired a token from AzureCliCredential


Created agent, agent ID: asst_8ac0MkOHkYgiVUkA1EDSmyUV


In [None]:
import time
from azure.ai.projects.models import RequiredFunctionToolCall, SubmitToolOutputsAction, ToolOutput


thread = project_client.agents.create_thread()
print(f"Created thread, ID: {thread.id}")

# Create message to thread
message_1 = project_client.agents.create_message(
    thread_id=thread.id, role="assistant", content="""The intrusion attempts data has been successfully fetched and saved as a CSV file. You can find it at the following path: ./data/intrusion_attempts_20250116_110658.csv.
    
    The incident detection data has been successfully fetched and saved as a CSV file. You can find it at the following path: ./data/incident_detection_rate_20250116_110539.csv
    """
)
message_2 = project_client.agents.create_message(
    thread_id=thread.id, role="user", content="Hello, create a 10-day forecast using the intrusion attempts data. And detect anomalies in the incident detection data."
)

# Create and process assistant run in thread with tools
run = project_client.agents.create_run(
    thread_id=thread.id, assistant_id=data_analyzer.id)
print(f"Created run, ID: {run.id}")

while run.status in ["queued", "in_progress", "requires_action"]:
    time.sleep(1)
    run = project_client.agents.get_run(thread_id=thread.id, run_id=run.id)

    if run.status == "requires_action" and isinstance(run.required_action, SubmitToolOutputsAction):
        tool_calls = run.required_action.submit_tool_outputs.tool_calls
        if not tool_calls:
            print("No tool calls provided - cancelling run")
            project_client.agents.cancel_run(
                thread_id=thread.id, run_id=run.id)
            break

        tool_outputs = []
        for tool_call in tool_calls:
            if isinstance(tool_call, RequiredFunctionToolCall):
                try:
                    print(f"Executing tool call: {tool_call}")
                    output = functions.execute(tool_call)
                    tool_outputs.append(
                        ToolOutput(
                            tool_call_id=tool_call.id,
                            output=output,
                        )
                    )
                except Exception as e:
                    print(f"Error executing tool_call {tool_call.id}: {e}")

        print(f"Tool outputs: {tool_outputs}")
        if tool_outputs:
            project_client.agents.submit_tool_outputs_to_run(
                thread_id=thread.id, run_id=run.id, tool_outputs=tool_outputs
            )

    print(f"Current run status: {run.status}")

print(f"Run completed with status: {run.status}")

In [None]:
from IPython.display import Markdown, display
import helper

messages = project_client.agents.list_messages(thread_id=thread.id)

display(Markdown(helper.get_conversation_md(messages)))