## Setup

In [70]:
import os
from openai import AzureOpenAI
import argparse
import sys
import json
from azure.search.documents import SearchClient
from azure.core.credentials import AzureKeyCredential
import csv

#Load environment variables
from dotenv import load_dotenv
load_dotenv()

True

In [2]:
#instatiate Azure OpenAI client
client = AzureOpenAI(
  azure_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT"), 
  api_key=os.getenv("AZURE_OPENAI_API_KEY"),  
  api_version=os.getenv("AZURE_OPENAI_API_VERSION")
)

model = os.getenv("AZURE_OPENAI_DEPLOYMENT_NAME")


## Quick connection test to Azure OpenAI

In [3]:
#Test connection
response = client.chat.completions.create(
    model=model, # model = "deployment_name".
    messages=[
        {"role": "system", "content": "You are a helpful assistant."},
        {"role": "user", "content": "Does Azure OpenAI support customer managed keys?"},
        {"role": "assistant", "content": "Yes, customer managed keys are supported by Azure OpenAI."},
        {"role": "user", "content": "Do other Azure AI services support this too?"}
    ]
)

print(response.choices[0].message.content)

Yes, several other Azure AI services support **Customer Managed Keys (CMKs)**, which allow you to have greater control over the encryption keys used to protect your data. CMKs are typically stored and managed in **Azure Key Vault** or **Azure Managed HSM (Hardware Security Module)**. Below are some notable Azure AI services that also support CMKs:

### 1. **Azure Cognitive Search**
   - Azure Cognitive Search supports CMKs, allowing you to encrypt your indexing and storage data using keys you control in Azure Key Vault.
   - With CMKs, you can revoke or rotate keys, helping you maintain compliance with your organization's security policies.

### 2. **Azure Machine Learning (AML)**
   - Azure Machine Learning supports customer-managed keys to encrypt data used in experiments, datasets, models, and other workspace resources.
   - Your keys are stored in Azure Key Vault, and integration with AML ensures your data is protected at rest.

### 3. **Azure Cognitive Services**
   - Many Azure C

In [4]:
# Configure environment variables for Azure Cognitive Search
search_service_endpoint = os.getenv("AZURE_AI_SEARCH_ENDPOINT")
search_index_name = os.getenv("AZURE_AI_SEARCH_INDEX")
search_api_key = os.getenv("AZURE_AI_SEARCH_API_KEY")
credential = AzureKeyCredential("AZURE_AI_SEARCH_API_KEY")

In [5]:
# Initialize the Azure Cognitive Search client
search_client = SearchClient(
    endpoint=search_service_endpoint,
    index_name=search_index_name,
    credential=AzureKeyCredential(search_api_key)
)

## Testing of function calling

In [49]:
system_message = """Assistant supports users to determine whether a firewall rule is risky or not.
You have access to an Azure Cognitive Search index with a list of ports and IP ranges that are classified as risky. You can search for entries by "ip_ranges"
You are designed to be an interactive assistant, so you can ask users clarifying questions to help them determine whether the firewall rule is risky. It's better to give more detailed queries to the search index rather than vague one.
"""

messages = [{"role": "system", "content": system_message},
            {"role": "user", "content": "Help me find out whether this firewall rule is risky or not: source-ip is 193.163.125.240, destination-ip is 193.163.125.245, port is 20, protocol is tcp"}]

#"role": "user", "content": "Help me find out whether this firewall rule is risky or not: 193.163.125.240"
#"role": "user", "content": "Help me find out whether this firewall rule is risky or not: ip-range is 193.163.125.240, port is 20"
#"role": "user", "content": "Help me find out whether this firewall rule is risky or not: source-ip is 193.163.125.240, destination-ip is 193.163.125.245, and port is 20"
#"role": "user", "content": "Help me find out whether this firewall rule is risky or not: source-ip is 193.163.125.240, destination-ip is 193.163.125.245, port is 20, protocol is tcp"

tools = [
    {
        "type": "function",
        "function": {
            "name": "query_risky_ip_ranges",
            "description": "Retrieve risky IP ranges from Azure AI Search index",
            "parameters": {
                "type": "object",
                "properties": {
                    "ip_range": {
                        "type": "string",
                        "description": "The ip-range from the firewall rule request to search for in the risky IP range index",
                    },
                },
                "required": ["ip_range"],
            },
        },
    },
    {
        "type": "function",
        "function": {
            "name": "query_risky_ports",
            "description": "Retrieve risky ports from Azure AI Search index",
            "parameters": {
                "type": "object",
                "properties": {
                    "port": {
                        "type": "string",
                        "description": "The port number from the firewall rule request to search for in the risky ports index",
                    },
                    "protocol": {
                        "type": "string",
                        "description": "The protocol from the firewall rule request to search for in the risky ports index",
                    },
                },
                "required": ["port"],
            },
        },
    },
    {
        "type": "function",
        "function": {
            "name": "query_firewall_rule_public_risk",
            "description": "Takes source IP, destination IP and port as input and returns whether there is any public risk information associated with the firewall rule",
            "parameters": {
                "type": "object",
                "properties": {
                    "source_ip": {
                        "type": "string",
                        "description": "The source ip-range from the firewall rule request to search for in the risky IP range index",
                    },
                    "destination_ip": {
                        "type": "string",
                        "description": "The destination ip-range from the firewall rule request to search for in the risky IP range index",
                    },
                    "port": {
                        "type": "string",
                        "description": "The port number from the firewall rule request to search for in the risky ports index",
                    },
                    "protocol": {
                        "type": "string",
                        "description": "The protocol from the firewall rule request to search for in the risky ports index",
                    },
                },
                "required": ["source_ip","destination_ip","port", "protocol"],
            },
        },
    },
]

response = client.chat.completions.create(
    model=model,
    messages=messages,
    tools=tools,
    temperature=0.2,
    tool_choice="auto",
)

print(response.choices[0].message)

ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_WsopWUxrF9bxdH8pc8lmNNKX', function=Function(arguments='{"source_ip":"193.163.125.240","destination_ip":"193.163.125.245","port":"20","protocol":"tcp"}', name='query_firewall_rule_public_risk'), type='function')])


## Function Definitions

In [None]:
def query_risky_ip_ranges(ip_range):
    results = search_client.search(
        search_text=ip_range,  # Perform a keyword search using the provided IP range
        query_type="full",  # Use simple query type for keyword search
        search_fields=["ip_ranges"],  # Field to search for the IP range
        select=["risk_id", "description", "cvss_score", "ports", "protocols", "ip_ranges", "last_updated"],  # Select relevant fields
        top=1  # Limit to top 1 results
    )

    top_result = next(iter(results), None)
    if top_result:
        return json.dumps({
            "risk_id": top_result["risk_id"],
            "description": top_result["description"],
            "cvss_score": top_result["cvss_score"],
            "ports": top_result["ports"],
            "protocols": top_result["protocols"],
            "ip_ranges": top_result["ip_ranges"],
            "last_updated": top_result["last_updated"]
        }, indent=2)

    return "No risky IP range found for this input."

In [43]:
def query_risky_ports(port, protocol):
    results = search_client.search(
        search_text="*",  # Perform a keyword search using the provided port
        filter=f"ports eq '{port}' and protocols eq '{protocol}'",  # Filter by the specified port and protocol 
        query_type="full",  # Use simple query type for keyword search
        select=["risk_id", "description", "cvss_score", "ports", "protocols", "ip_ranges", "last_updated"],  # Select relevant fields
        top=1  # Limit to top 1 results
    )

    top_result = next(iter(results), None)
    if top_result:
        return json.dumps({
            "risk_id": top_result["risk_id"],
            "description": top_result["description"],
            "cvss_score": top_result["cvss_score"],
            "ports": top_result["ports"],
            "protocols": top_result["protocols"],
            "ip_ranges": top_result["ip_ranges"],
            "last_updated": top_result["last_updated"]
        }, indent=2)

    return "No risky port found for this input."

In [44]:
def query_firewall_rule_public_risk(source_ip, destination_ip, port, protocol):
    source_ip_risk = query_risky_ip_ranges(source_ip)
    destination_ip_risk = query_risky_ip_ranges(destination_ip)
    port_risk = query_risky_ports(port, protocol)
    #todo: add protocol
    return json.dumps(
        {
        "source_ip": source_ip_risk,
        "destination_ip": destination_ip_risk,
        "port": port_risk
    }, indent=2)

In [47]:
test_firewall_rule_public_risk = query_firewall_rule_public_risk("193.163.125.241","193.163.125.245", "20", "tcp")
print(test_firewall_rule_public_risk)

{
  "source_ip": "{\n  \"risk_id\": \"R401\",\n  \"description\": \"Reported abusive IP\",\n  \"cvss_score\": \"10.0\",\n  \"ports\": null,\n  \"protocols\": null,\n  \"ip_ranges\": \"193.163.125.241\",\n  \"last_updated\": \"2025-03-03T00:00:00Z\"\n}",
  "destination_ip": "No risky IP range found for this input.",
  "port": "{\n  \"risk_id\": \"R002\",\n  \"description\": \"FTP Data- Unencrypted file transfer\",\n  \"cvss_score\": \"5.0\",\n  \"ports\": \"20\",\n  \"protocols\": \"tcp\",\n  \"ip_ranges\": \"0.0.0.0/0\",\n  \"last_updated\": \"2025-03-03T00:00:00Z\"\n}"
}


In [None]:
def query_public_risk_data(ip_range, port):
    # Search for the IP range
    ip_range_result = search_client.search(
        search_text=ip_range,
        query_type="full",
        search_fields=["ip_ranges"],
        select=["risk_id", "description", "cvss_score", "ports", "protocols", "ip_ranges", "last_updated"],
        top=1
    )
    ip_range_info = next(iter(ip_range_result), None)
    ip_range_risk = "No risky IP range found for this input."
    if ip_range_info:
        ip_range_risk = {
            "risk_id": ip_range_info["risk_id"],
            "description": ip_range_info["description"],
            "cvss_score": ip_range_info["cvss_score"],
            "ports": ip_range_info["ports"],
            "protocols": ip_range_info["protocols"],
            "ip_ranges": ip_range_info["ip_ranges"],
            "last_updated": ip_range_info["last_updated"]
        }

    # Search for the port
    port_result = search_client.search(
        search_text=str(port),
        query_type="full",
        search_fields=["ports"],
        select=["risk_id", "description", "cvss_score", "ports", "protocols", "ip_ranges", "last_updated"],
        top=1
    )
    port_info = next(iter(port_result), None)
    port_risk = "No risky port found for this input."
    if port_info:
        port_risk = {
            "risk_id": port_info["risk_id"],
            "description": port_info["description"],
            "cvss_score": port_info["cvss_score"],
            "ports": port_info["ports"],
            "protocols": port_info["protocols"],
            "ip_ranges": port_info["ip_ranges"],
            "last_updated": port_info["last_updated"]
        }

    # Combine results
    return {
        "ip_range_risk": ip_range_risk,
        "port_risk": port_risk
    }

In [37]:
test_public_risk = query_public_risk_data("193.163.125.241","20")
print(test_public_risk)

{'ip_range_risk': {'risk_id': 'R401', 'description': 'Reported abusive IP', 'cvss_score': '10.0', 'ports': None, 'protocols': None, 'ip_ranges': '193.163.125.241', 'last_updated': '2025-03-03T00:00:00Z'}, 'port_risk': {'risk_id': 'R002', 'description': 'FTP Data- Unencrypted file transfer', 'cvss_score': '5.0', 'ports': '20', 'protocols': 'tcp', 'ip_ranges': '0.0.0.0/0', 'last_updated': '2025-03-03T00:00:00Z'}}


In [30]:
test_ports_risky = query_risky_ports("20")
print(test_ports_risky)


{
  "risk_id": "R002",
  "description": "FTP Data- Unencrypted file transfer",
  "cvss_score": "5.0",
  "ports": "20",
  "protocols": "tcp",
  "ip_ranges": "0.0.0.0/0",
  "last_updated": "2025-03-03T00:00:00Z"
}


In [None]:
test_ipranges_risky = query_risky_ip_ranges("193.163.125.241")
print(test_ipranges_risky)

{
  "risk_id": "R401",
  "description": "Reported abusive IP",
  "cvss_score": "10.0",
  "ports": null,
  "protocols": null,
  "ip_ranges": "193.163.125.241",
  "last_updated": "2025-03-03T00:00:00Z"
}


In [17]:
test_ipranges_unrisky = query_risky_ip_ranges("193.163.125.245")
print(test_ipranges_unrisky)

No risky IP range found for this input.


In [16]:
test_ports_unrisky = query_risky_ports("145")
print(test_ports_unrisky)

No risky port found for this input.


## Conversation Handling

In [None]:
# With debug - optimized version
def run_conversation(messages, tools, available_functions, debug=False):
    def log(*args):
        if debug:
            print(*args)

    # Step 1: send the conversation and available functions to GPT
    response = client.chat.completions.create(
        model=model,
        messages=messages,
        tools=tools,
        tool_choice="auto",
        temperature=0.2,
    )
    response_message = response.choices[0].message

    # Step 2: check if the model wants to call a function
    if response_message.tool_calls:
        tool_call = response_message.tool_calls[0]
        function_name = tool_call.function.name
        log("Recommended Function Call:", tool_call)

        # Step 3: validate and execute the function
        if function_name not in available_functions:
            return f"Function '{function_name}' not found in available_functions."

        function_to_call = available_functions[function_name]

        try:
            function_args = json.loads(tool_call.function.arguments)
        except json.JSONDecodeError as e:
            return f"Failed to parse function arguments: {e}"

        try:
            function_response = function_to_call(**function_args)
        except Exception as e:
            return f"Error while calling '{function_name}': {e}"

        log("Function Output:", function_response)

        # Step 4: add tool call + function response to messages
        messages.append(
            {
                "role": response_message.role,
                "function_call": {
                    "name": response_message.tool_calls[0].function.name,
                    "arguments": response_message.tool_calls[0].function.arguments,
                },
                "content": None,
            }
        )

        # adding function response to messages
        messages.append(
            {
                "role": "function",
                "name": function_name,
                "content": function_response,
            }
        )  # extend conversation with function response

        log("Messages passed in second request:")
        if debug:
            for msg in messages:
                print(msg)

        second_response = client.chat.completions.create(
            messages=messages,
            model=model,
        )  # get a new response from GPT where it can see the function response

        return second_response
    else:
        return response

In [None]:
# without debug, everything is printed on console
def run_conversation(messages, tools, available_functions):

    # Step 1: send the conversation and available functions to GPT
    response = client.chat.completions.create(
        model=model,
        messages=messages,
        tools=tools,
        tool_choice="auto",
        temperature=0.2,
    )
    response_message = response.choices[0].message

    # Step 2: check if the model wants to call a function
    if response_message.tool_calls:
        print("Recommended Function call:")
        print(response_message.tool_calls[0])
        print()

        # Step 3: call the function
        # Note: the JSON response may not always be valid; be sure to handle errors
        function_name = response_message.tool_calls[0].function.name

        # verify function exists
        if function_name not in available_functions:
            return "Function " + function_name + " does not exist"
        function_to_call = available_functions[function_name]

        try:
            function_args = json.loads(response_message.tool_calls[0].function.arguments)
        except json.JSONDecodeError as e:
            return f"Failed to parse function arguments: {e}"

        try:
            function_response = function_to_call(**function_args)
        except Exception as e:
            return f"Error while calling '{function_name}': {e}"

        #function_args = json.loads(response_message.tool_calls[0].function.arguments)
        #function_response = function_to_call(**function_args)

        print("Output of function call:")
        print(function_response)
        print()

        # Step 4: send the info on the function call and function response to the model

        # adding assistant response to messages
        messages.append(
            {
                "role": response_message.role,
                "function_call": {
                    "name": response_message.tool_calls[0].function.name,
                    "arguments": response_message.tool_calls[0].function.arguments,
                },
                "content": None,
            }
        )

        # adding function response to messages
        messages.append(
            {
                "role": "function",
                "name": function_name,
                "content": function_response,
            }
        )  # extend conversation with function response

        print("Messages in second request:")
        for message in messages:
            print(message)
        print()

        second_response = client.chat.completions.create(
            messages=messages,
            model=model,
        )  # get a new response from GPT where it can see the function response

        return second_response
    else:
        return response

## System Message experiments

In [80]:
system_message = """
You are a security risk assessment assistant that helps users evaluate whether a given Azure Firewall rule is risky. You interact via natural language and use tools when needed.

You have access to an Azure Cognitive Search index containing public risk data about IP ranges and ports. You can perform keyword-based searches using the following functions:
- `query_risky_ip_ranges(ip_range)`: use this when only the IP address or IP range is provided.
- `query_risky_ports(port, protocol)`: use this when only the port and protocol are provided.
- `query_public_risk_data(ip_range, port, protocol)`: use this when all sourceIp, destination IP, port and protocol are provided.

Your goals:
- Analyze firewall rules based on available data.
- When risky entries are found, use the CVSS score and descriptions to assess severity.
- If no data is found, ask the user for clarification or more detail.

Risk classification based on CVSS:
- `High Risk`: score ≥ 7.0
- `Medium Risk`: 4.0 ≤ score < 7.0
- `Low Risk`: score < 4.0

When forming your assessment:
- Combine the IP and port risk data (if both are available).
- If data is incomplete, explain assumptions or ask follow-up questions.
- Estimate your confidence in the result as a percentage (0–100), based on data quality and completeness.

Respond with a JSON object containing:
{
  "risk_level": "High" | "Medium" | "Low",
  "description": "<brief explanation of why this rule is risky or safe>",
  "recommendation": "<action to take, e.g. 'Block this rule', 'Monitor this rule', 'Allow this rule'>",
  "cvss_score": "<numeric value or 'N/A'>",
  "confidence_score": "<percentage (0–100)>"
}
"""

### tested system message

In [63]:
system_message = """
You are a security risk assessment assistant that helps users evaluate whether an Azure Firewall rule is risky. You interact in natural language and use available tools when needed.

You have access to an Azure Cognitive Search index containing public risk data about IP ranges and ports. Use the appropriate function depending on what information is provided:
- `query_risky_ip_ranges(ip_range)`: use this when only the IP address or IP range is provided.
- `query_risky_ports(port, protocol)`: use this when only the port and protocol are provided.
- `query_public_risk_data(source_ip, destination_ip, port, protocol)`: use this when all sourceIp, destination IP, port and protocol are provided.

Your goals:
- Evaluate firewall rules using available data.
- Use CVSS score and descriptions to assess risk when available.
- If no data is found or input is incomplete, ask follow-up questions or make an informed estimate.
- Assign a confidence score (0–100%) based on how complete and reliable the information is.

Risk classification based on CVSS:
- `High Risk`: score ≥ 7.0
- `Medium Risk`: 4.0 ≤ score < 7.0
- `Low Risk`: score < 4.0

Return your output in the form of a markdown table with the following columns:

| Source IP | Destination IP | Port | Protocol | Risk Level | CVSS Score | Description | Recommendation | Confidence Score |
|-----------|----------------|------|----------|------------|-------------|-------------|----------------|------------------|

- Populate each column with your findings.
- Include all user-provided input fields (IP, port, protocol) in the table.
- Risk Level should be based on CVSS or logical inference.
- Recommendation should be one of: "Block this rule", "Monitor this rule", or "Allow this rule".
- If CVSS score is unavailable, use "N/A".

Only output the final markdown table. Do not include explanations, summaries, or any additional text.
"""

## Test with a few chat inputs

In [81]:
messages = [
    {"role": "system", "content": system_message},
    {
        "role": "user",
        "content": "Help me find out whether this firewall rule is risky or not: source-ip is 193.163.125.240, destination-ip is 193.163.125.245, port is 20, protocol is tcp",
    },
]

#testing for ip-range: "content": "Help me find out whether this firewall rule is risky or not: 193.163.125.240"
#testing for port: "content": "Help me find out whether this firewall rule is risky or not: port is 20"
#testing for port and ip-range:"content": "Help me find out whether this firewall rule is risky or not: ip-range is 193.163.125.240, port is 20"
#testing for all together: "content": "Help me find out whether this firewall rule is risky or not: source-ip is 193.163.125.240, destination-ip is 193.163.125.245, and port is 20"
#testing for all together: "content": "Help me find out whether this firewall rule is risky or not: source-ip is 193.163.125.240, destination-ip is 193.163.125.245, port is 20, protocol is tcp"


available_functions = {
    "query_risky_ip_ranges": query_risky_ip_ranges,
    "query_risky_ports": query_risky_ports,
    "query_firewall_rule_public_risk": query_firewall_rule_public_risk,
}

result = run_conversation(messages, tools, available_functions, debug=False)

print("Final response:")
if isinstance(result, str):
    print(result)  # It's an error message or string content
else:
    print(result.choices[0].message.content)  # It's a ChatCompletionMessage object
#print(result.choices[0].message.content)

Final response:
Error while calling 'query_firewall_rule_public_risk': ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))


## Test with existing firewall rules
ToDo:

read CSV file
go through each line
add risk assessment to it