# Cyber Relevance Pass

The goal of this tool is to automatically assess whether various ministries of different countries are direct stakeholders (i.e., responsible for or involved in) the country's cybersecurity ecosystem.



In [None]:
import sys
import os
sys.path.append(os.path.abspath(".."))
import pydantic

from autora import sprayer
import pandas as pd

# Load the dataset
data = pd.read_excel('../data/ministries_of_energy_done_with_all_countries.xlsx')

domain = "energy"
countries = data["COUNTRY"].tolist()
word_sets = {
    "domain": [domain],
    "country": countries
} 

In [2]:
from pydantic import BaseModel, Field
from typing import List, Optional
from enum import Enum

class CyberRelevanceLevel(str, Enum):
    """Level of cybersecurity involvement"""
    HIGH = "high"
    MEDIUM = "medium"
    LOW = "low"
    NONE = "none"

class ConfidenceLevel(str, Enum):
    """Confidence in the assessment"""
    HIGH = "high"
    MEDIUM = "medium" 
    LOW = "low"

class CyberStakeholderAssessment(BaseModel):
    """Assessment of whether a ministry/department is a cybersecurity stakeholder"""    
    relevance_level: CyberRelevanceLevel = Field(
        description="Level of cybersecurity involvement (high/medium/low/none)"
    )
    confidence: ConfidenceLevel = Field(
        description="Confidence level of this assessment"
    )
    explanation: Optional[str] = Field(
        default=None,
        description="Explanation for the assessment with citation reference after each claim."
    )

# Define the research question
prompt = "Is the department/ministry of {domain} in {country} a direct stakeholder (i.e., responsible for or involved in) the country's cybersecurity?"

In [None]:
# Test on a single example
test = await sprayer.spray(
    word_sets={
        "domain": ["health","Telecommunications"],
        "country": ["Afghanistan"],

    },
    response_model=CyberStakeholderAssessment,
    research_questions=[prompt],
    max_queries=1
)

Creating table with 2 word combinations and 1 research questions...
Total API calls to make: 2. Limiting to max_queries={max_queries}.

Processing combination 1/2: {'domain': 'health', 'country': 'Afghanistan'}
  Query 1/2: Is the department/ministry of health in Afghanistan a direct stakeholder (i.e., ...
    ✓ Got structured response (1116 chars, 0 retries)

Processing combination 2/2: {'domain': 'Telecommunications', 'country': 'Afghanistan'}
  Query 2/2: Is the department/ministry of Telecommunications in Afghanistan a direct stakeho...
    ✓ Got structured response (819 chars, 0 retries)


In [9]:
test.enriched_citations[0]

[{'url': 'https://mcit.gov.af/sites/default/files/2020-08/National%20Cybersecurity%20Strategy%20of%20Afghanistan%20(November2014).pdf',
  'title': '[PDF] National Cyber Security Strategy of Afghanistan (NCSA)',
  'snippet': 'The NCSA aims to establish a safe, secure, and resilient cyber space, protecting data and IT infrastructure, and enhancing capacities to prevent cyber threats.',
  'date': None,
  'last_updated': '2024-09-26',
  'matched': True},
 {'url': 'https://digital-square.squarespace.com/s/MM-brief-Afghanistan.pdf',
  'title': '[PDF] Digital health systems to support pandemic response in Afghanistan',
  'snippet': 'Do not invest in new systems if there are existing systems the government endorses that can effectively approach each of the pandemic use cases. Learn more ...',
  'date': None,
  'last_updated': '2025-09-09',
  'matched': True},
 {'url': 'https://2020.wish.org.qa/app/uploads/2020/09/WISH-2020_Forum-Reports_Cyber-Security-and-Healthcare-Systems_ENG.pdf',
  'title'

In [8]:
test.structured_data.values[0]

{'relevance_level': <CyberRelevanceLevel.LOW: 'low'>,
 'confidence': <ConfidenceLevel.HIGH: 'high'>,
 'explanation': "The Ministry of Public Health (MoPH) in Afghanistan plays a critical role in managing the country's healthcare systems, including digital health platforms used for pandemic response and healthcare data management, which require cybersecurity measures to protect sensitive health data and systems[2]. However, the direct responsibility and coordination of national cybersecurity efforts fall under the Ministry of Communications and Information Technology (MCIT), specifically the Information Systems Security Directorate (ISSD), which is tasked with coordinating all cybersecurity issues and infrastructure protection for the government[1]. While MoPH is involved in securing its own digital health systems, it is not a primary or direct stakeholder responsible for national cybersecurity strategy or oversight, which are centralized in the MCIT. This indicates a low level of cyber

In [10]:
test['structured_data'].apply(pd.Series)

pd.concat([test, test['structured_data'].apply(pd.Series)], axis=1)

Unnamed: 0,domain,country,question_template,research_question,sonar_response_json,search_results,citations,content,structured_data,parsing_success,parsing_error,retries_used,relevance_level,confidence,explanation
0,health,Afghanistan,Is the department/ministry of {domain} in {cou...,Is the department/ministry of health in Afghan...,"{'id': 'a78f9e7a-8745-4454-af7e-96baca13024e',...",[{'title': '[PDF] National Cyber Security Stra...,[https://mcit.gov.af/sites/default/files/2020-...,"{\n ""relevance_level"": ""low"",\n ""confidence""...","{'relevance_level': 'CyberRelevanceLevel.LOW',...",True,,0,CyberRelevanceLevel.LOW,ConfidenceLevel.HIGH,The Ministry of Communications and Information...
1,Telecommunications,Afghanistan,Is the department/ministry of {domain} in {cou...,Is the department/ministry of Telecommunicatio...,"{'id': '7aac76fe-d8b1-46bb-9691-eca4d04c5f8b',...",[{'title': '[PDF] National Cyber Security Stra...,[https://mcit.gov.af/sites/default/files/2020-...,"{""relevance_level"":""high"",""confidence"":""high"",...",{'relevance_level': 'CyberRelevanceLevel.HIGH'...,True,,0,CyberRelevanceLevel.HIGH,ConfidenceLevel.HIGH,The Ministry of Communications and Information...


In [10]:
# keep domain, country, research_question, search_results and citations
# expand structured_data
from fileinput import filename


def process_results(spray_output:pd.DataFrame) -> pd.DataFrame:
    # Create a DataFrame from the processed results
    df = pd.DataFrame(spray_output)
    # Keep only the relevant columns
    df = df[['domain', 'country', 'research_question', 'enriched_citations']]
    # Expand structured_data into separate columns
    if 'structured_data' in spray_output.columns:
        structured_df = spray_output['structured_data'].apply(pd.Series)
        df = pd.concat([df, structured_df], axis=1)
    return df


def save_results(spray_output:pd.DataFrame, base_filename:str="cyber_relevance_results"):
    # save directly to json
    json_filename = f"{base_filename}.json"
    spray_output.to_json(json_filename, orient='records', lines=True)
    print(f"Raw results saved to {json_filename}")

    # save to csv
    csv_filename = f"{base_filename}.csv"
    processed_df = process_results(spray_output)
    processed_df.to_csv(csv_filename, index=False)
    print(f"Processed results saved to {csv_filename}")

save_results(test, base_filename="cyber_relevance_results")

Raw results saved to cyber_relevance_results.json
Processed results saved to cyber_relevance_results.csv


In [None]:
test.structured_data

0    {'relevance_level': 'CyberRelevanceLevel.LOW',...
1    {'relevance_level': 'CyberRelevanceLevel.HIGH'...
Name: structured_data, dtype: object