# Content Safety Evaluations

In [None]:
# Import dependencies
import os
import json
import requests
from typing import Optional, List, Dict, Any
from pathlib import Path
import datetime

from azure.identity import DefaultAzureCredential
from promptflow.evals.synthetic import AdversarialSimulator, AdversarialScenario
from promptflow.evals.evaluators import (ContentSafetyEvaluator)
from promptflow.evals.evaluate import evaluate

In [None]:
# Load environment variables
SUBSCRIPTION_ID = os.environ.get('SUBSCRIPTION_ID') 
RESOURCE_GROUP = os.environ.get('RESOURCE_GROUP') 
AI_PROJECT_NAME = os.environ.get('AI_PROJECT_NAME') 

# Define Azure AI Studio project
try:
  azure_ai_project = {
    "subscription_id": SUBSCRIPTION_ID,
    "resource_group_name": RESOURCE_GROUP,
    "project_name": AI_PROJECT_NAME,
    "credential": DefaultAzureCredential(),
  }
except Exception as e:
  print(f"Error: {e}")

In [None]:
# Validate environment variables
print(f"""
SUBSCRIPTION_ID: {SUBSCRIPTION_ID}
RESOURCE_GROUP: {RESOURCE_GROUP}
AI_PROJECT_NAME: {AI_PROJECT_NAME}
""")

In [None]:
# Initialize simulator
simulator = AdversarialSimulator(azure_ai_project=azure_ai_project)

## Simulation Functions

**Note:** This code block should be updated to call your API endpoint and to handle its response structure.

In [None]:
# Function to call API endpoint
def call_endpoint(query: str) -> dict:
  # Placeholder API response from endpoint
  return {
    "answer": "This is a test response.",
    "context": ""
  }

  # Custom API endpoint
  # api_path = "YOUR_API_ENDPOINT"
  # json_payload = {}
  # response = requests.post(api_path, json=json_payload)
  # response_json = response.json()
  # answer = response_json["text"]
  # json_response = {
  #   "answer": answer,
  #   "context": ""
  # }
  # return json_response

In [None]:
# Test endpoint function
call_endpoint("Hello, what can you help with?")

In [None]:
# Function for simulation callback
async def simulation_callback(
    messages: List[Dict],
    stream: bool = False,
    session_state: Any = None,
    context: Optional[Dict[str, Any]] = None,
) -> dict:
    messages_list = messages["messages"]
    latest_message = messages_list[-1]
    query = latest_message["content"]
    context = None
    response_from_api = call_endpoint(query)
    # Format response to OpenAI chat protocol format
    formatted_response = {
        "content": response_from_api["answer"],
        "role": "assistant",
        "context": {
            "citations": response_from_api["context"],
        },
    }
    messages["messages"].append(formatted_response)
    return {"messages": messages["messages"], "stream": stream, "session_state": session_state, "context": context}

In [None]:
# Function to run simulation with callback
async def run_simulation(
    output_path: str,
    max_simulation_results: int = 10,
    jailbreak: bool = False,
):
  try:
    print("Running simulation...")
    sim_output = await simulator(
      target=simulation_callback,
      scenario=AdversarialScenario.ADVERSARIAL_QA, 
      max_conversation_turns=1, 
      max_simulation_results=max_simulation_results, 
      jailbreak=jailbreak
    )
    with Path.open(output_path, "w") as f:
      f.write(sim_output.to_eval_qa_json_lines())
    print(f"Simulation results written to {output_path}")
    return True
  except Exception as e:
    print(f"Error: {e}")
    return False

## Run Simulations
Generated files written to [../data/](../data/)
* `YYYYMMDDHHDDSS_nonjailbreak_sim.jsonl`
* `YYYYMMDDHHDDSS_jailbreak_sim.jsonl`

In [None]:
# Define common params
max_simulation_results = 10
file_prefix = f"../data/{datetime.datetime.now().strftime("%Y%m%d%H%M%S")}"

In [None]:
# Non-Jailbreak Simulation
file_nonjailbreak_sim = f"{file_prefix}_nonjailbreak_sim.jsonl"
sim_results = await run_simulation(
  output_path=file_nonjailbreak_sim,
  max_simulation_results=max_simulation_results,
  jailbreak=False # FALSE
)

In [None]:
# Jailbreak Simulation
file_jailbreak_sim = f"{file_prefix}_jailbreak_sim.jsonl"
sim_results = await run_simulation(
  output_path=file_jailbreak_sim,
  max_simulation_results=max_simulation_results,
  jailbreak=True # TRUE
)

## Run Evaluators
Generated files written to [../data/](../data/)
* `YYYYMMDDHHDDSS_nonjailbreak_eval.jsonl`
* `YYYYMMDDHHDDSS_jailbreak_eval.jsonl`

In [None]:
# Credential is acquired within evaluate
# Note: When credential is passed, evaluate() function may return odd pickle error
azure_ai_project.pop("credential", None)

# Initialize Azure Content Safety Evaluator
content_safety_evaluator = ContentSafetyEvaluator(project_scope=azure_ai_project)

In [None]:
# Function to run evaluation
async def run_evaluation(
    input_sim_data: str,
    output_file_name: str
):
  eval_results = evaluate(
    data=input_sim_data,
    evaluators={"content_safety": content_safety_evaluator}
  )

  # Write non-jailbreak evaluation results
  try:
    file_eval = f"{file_prefix}_{output_file_name}_eval.jsonl"
    with Path(file_eval).open("w") as f:
      json_string = json.dumps(eval_results)
      f.write(json_string)
    print(f"Wrote eval file to {file_eval}")
    return eval_results
  except Exception as e:
    print(f"Error: {e}")

In [None]:
# Non-Jailbreak Evaluation
eval_results_nonjailbreak = await run_evaluation(
  input_sim_data = file_nonjailbreak_sim,
  output_file_name = "nonjailbreak"
)

In [None]:
# Jailbreak Evaluation
eval_results_jailbreak = await run_evaluation(
  input_sim_data = file_jailbreak_sim,
  output_file_name = "jailbreak"
)

## Evaluation Metrics

In [None]:
print(f"Non-Jailbreak Evaluation:\n")
for metric in eval_results_nonjailbreak['metrics']:
  print(f"{eval_results_nonjailbreak['metrics'][metric]} = {metric}")

In [None]:
print(f"Jailbreak Evaluation:\n")
for metric in eval_results_jailbreak['metrics']:
  print(f"{eval_results_jailbreak['metrics'][metric]} = {metric}")

## Data Files
Check the [data directory](../data/) for generated JSONL files.

Note a traces report gets created on localhost and linked from `run_evaluation()` response above.

The url will be similar to:<br>`http://127.0.0.1:23335/v1.0/ui/traces/?...`