# **Step 1:** Data Generation Code For Planned Test Cases Dataset

In [None]:
# Import libraries
import pandas as pd
import random
from datetime import datetime, timedelta

In [None]:
# Define OWASP WSTG Categories (updated with specified categorisation)
OWASP_WSTG_CATEGORIES = {
    "InformationGathering": "WSTG-IGV-01",
    "ConfigurationDeploymentManagement": "WSTG-CONF-02",
    "IdentityManagement": "WSTG-IDM-03",
    "AuthenticationTesting": "WSTG-ATHN-04",
    "AuthorizationTesting": "WSTG-ATHZ-05",
    "SessionManagementTesting": "WSTG-SESS-06",
    "DataValidationTesting": "WSTG-DV-07",
    "ErrorHandling": "WSTG-ERRH-08",
    "CryptographyTesting": "WSTG-CRYP-09",
    "BusinessLogicTesting": "WSTG-BUSL-10",
    "ClientSideTesting": "WSTG-CLNT-11",
    "APITesting": "WSTG-API-12"
}

In [None]:
# Define MITRE ATT&CK Technique IDs as a dictionary (Selected based on relevance - NOT all are here)

MITRE_TECHNIQUES = {
    "T1190": "Exploit Public-Facing Application",  # Exploit Public-Facing Application - Targets OWASP Injection, XSS, and RCE vulnerabilities
    "T1078": "Valid Accounts", # Valid Accounts - Covers credential-based attacks like brute force and credential stuffing
    "T1133": "External Remote Services", # External Remote Services - Exploits misconfigured VPN, RDP, or SSH, aligning with OWASP Config Testing
    "T1059": "Command and Scripting Interpreter", # Command and Scripting Interpreter - Tests script execution vulnerabilities (JS, Python, Bash)
    "T1087": "Account Discovery", # Account Discovery - Focuses on enumerating user accounts and roles, relevant for Identity Testing
    "T1110": "Brute Force",  # Brute Force - Checks password strength, rate-limiting, and credential stuffing attacks
    "T1595": "Active Scanning", # Active Scanning - Simulates attacker reconnaissance to identify open ports, services, and vulnerabilities
    "T1195": "Supply Chain Compromise", # Supply Chain Compromise - Targets dependencies, 3rd-party software, and package vulnerabilities
    "T1203": "Exploitation for Client Execution",  # Exploitation for Client Execution - Examines drive-by downloads, malicious script execution
    "T1566": "Phishing" # Phishing - Tests social engineering scenarios related to email-based attacks
}

In [None]:
# Define Compliance Frameworks (Random selection for Vantage Point Security Relevance)
COMPLIANCE_FRAMEWORKS = ["CREST Penetration Testing", "NIST 800-53", "MAS TRM (Technology Risk Management)"]

# CREST Penetration Testing - Recognized industry standard for security testing.
# NIST 800-53 - A key framework for security and risk management.git pull origin testing
# MAS TRM (Technology Risk Management) - Critical for financial institutions in Singapore.

In [None]:
# Define CVSS score categories - Industry standards
CVSS_SCORES = {
    "Critical": "9.8 - CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:H/A:H",
    "High":    "7.5 - CVSS:3.1/AV:N/AC:L/PR:N/UI:R/S:U/C:H/I:H/A:H",
    "Medium":  "5.4 - CVSS:3.1/AV:N/AC:H/PR:N/UI:R/S:U/C:L/I:L/A:L",
    "Low":     "3.2 - CVSS:3.1/AV:L/AC:H/PR:N/UI:N/S:U/C:L/I:N/A:N"
}

# CVSS base score - version of the CVSS / Attack Vector: Network / Attack Complexity: Low / Privileges Required: None / User Interaction: None / Scope: Unchanged / Confidentiality Impact: High / Integrity Impact: High / Availability Impact: High

In [None]:
# Define possible values for testing scope, status, test case, and severity
TESTING_SCOPES = ["WEB", "APP"]

#based on common OWASP methodologies and security testing categories. 
TEST_CASES = ["Authentication", "Authorization", "Input Validation", "SQL Injection", "XSS"] 
AFFECTED_COMPONENTS = ["Login form", "User profile page", "Payment gateway", "Database", "Session management"]
SEVERITY_RATINGS = ["Low", "Medium", "High", "Critical"]

In [None]:
# Function to generate refined test cases
def generate_refined_test_cases(num_random_cases=800, num_not_completed_cases=100, 
                                project_ids=None, user_ids=None, seed=None):
    if seed is not None:
        random.seed(seed)

    if not project_ids:
        project_ids = [f"PRJ-{i:03}" for i in range(1, 100)]

    if not user_ids:
        user_ids = [f"USER-{i:03}" for i in range(1, 100)]

    test_cases = []

    # Helper: Generate random deadline between 01-Oct-2024 and 31-Oct-2024
    def random_deadline():
        start_date = datetime.strptime("01-Oct-2024", "%d-%b-%Y")
        end_date = datetime.strptime("31-Oct-2024", "%d-%b-%Y")
        random_days = random.randint(0, (end_date - start_date).days)
        return (start_date + timedelta(days=random_days)).strftime("%d-%b-%Y")

    # Generate 800 randomly distributed test cases
    for i in range(1, num_random_cases + 1):
        project_id = random.choice(project_ids)
        project_name = f"Test Case {i}"
        user_id = random.choice(user_ids)
        testing_scope = random.choice(TESTING_SCOPES)
        test_case_id = f"TC-{i:03}"
        test_case = random.choice(TEST_CASES)
        affected_components = random.choice(AFFECTED_COMPONENTS)
        severity_rating = random.choice(SEVERITY_RATINGS)
        deadline = random_deadline()

        test_case_entry = {
            'Project ID': project_id,
            'Project Name': project_name,
            'User ID': user_id,
            'Testing Scope': testing_scope,
            'Test Case ID': test_case_id,
            'Test Case': test_case,
            'Findings (Affected Components)': affected_components,
            'Severity Ratings': severity_rating,
            'Deadline': deadline
        }
        test_cases.append(test_case_entry)

    # Generate 100 additional test cases with same structure
    for i in range(num_random_cases + 1, num_random_cases + num_not_completed_cases + 1):
        project_id = random.choice(project_ids)
        project_name = f"Test Case {i}"
        user_id = random.choice(user_ids)
        testing_scope = random.choice(TESTING_SCOPES)
        test_case_id = f"TC-{i:03}"
        test_case = random.choice(TEST_CASES)
        affected_components = random.choice(AFFECTED_COMPONENTS)
        severity_rating = random.choice(SEVERITY_RATINGS)
        deadline = random_deadline()

        test_case_entry = {
            'Project ID': project_id,
            'Project Name': project_name,
            'User ID': user_id,
            'Testing Scope': testing_scope,
            'Test Case ID': test_case_id,
            'Test Case': test_case,
            'Findings (Affected Components)': affected_components,
            'Severity Ratings': severity_rating,
            'Deadline': deadline
        }
        test_cases.append(test_case_entry)

    df = pd.DataFrame(test_cases)
    return df

In [None]:
if __name__ == "__main__":
    try:
        crm_projects = pd.read_csv("projects_data.csv")
        existing_project_ids = crm_projects["ProjectID"].unique().tolist()
    except FileNotFoundError:
        print("No CRM project file found; using dummy ProjectIDs.")
        existing_project_ids = None
    
    test_cases_df = generate_refined_test_cases(
        num_random_cases=800,
        num_not_completed_cases=100,
        project_ids=existing_project_ids,
        user_ids=None,
        seed=42
    )
    print("\n--- Sample Refined Test Cases ---")
    print(test_cases_df.head())
    
    test_cases_df.to_csv("refined_security_test_cases.csv", index=False)
    print("\nSaved 'refined_security_test_cases.csv'.")

## Addition of Realistic Project Names to match alongside CRM Data's Naming Convention

In [None]:
import pandas as pd
import random

In [None]:
#Input/output CSV path
csv_path = "refined_security_test_cases.csv"
planned_df = pd.read_csv(csv_path)

#Following a List of project names according to CRM's Data Generation
project_names = [
    "Denial of Service (DoS) Attack Contract Project",
    "ERP Implementation - TechCorp Contract Project",
    "Post-Implementation ERP Support Contract Project",
    "ERP Reporting & Analytics Implementation Contract Project",
    "Software as a Service (SaaS) Contract Project",
    "Network Infrastructure Contract Project",
    "Internal Systems Contract Project",
    "Wireless Network Contract Project",
    "Ransomware Prevention Contract Project",
    "Remote Work Security Contract Project",
    "Insider Threat Awareness Contract Project",
    "Application Modernization Contract Project",
    "Firewall Implementation Contract Project",
    "Wireless Network Security Contract Project",
    "Network Segmentation Contract Project",
    "Cloud Environment Contract Project",
    "Cloud-Based ERP Upgrade Contract Project",
    "Virtual Private Network (VPN) Deployment Contract Project",
    "Customer Behavior Analysis Contract Project",
    "Sales Performance Optimization Contract Project",
    "Supply Chain Optimization Contract Project",
    "Financial Forecasting Contract Project",
    "Data Breach Contract Project",
    "Cloud Adoption Strategy Contract Project",
    "Mobile First Strategy Contract Project",
    "Web Application Contract Project",
    "File Encryption Contract Project",
    "Intrusion Detection/Prevention System (IDS/IPS) Contract Project",
    "Distributed Denial of Service (DDoS) Protection Contract Project",
    "Network Access Control (NAC) Contract Project",
    "API Security Contract Project",
    "Data Security and Privacy Contract Project",
    "Email Encryption Contract Project",
    "Mobile Device Encryption Contract Project",
    "Network Traffic Encryption (TLS/SSL) Contract Project",
    "External Network Contract Project",
    "ERP Data Migration Project Contract Project",
    "ERP Security Enhancement Contract Project",
    "Compliance (HIPAA) Contract Project",
    "E-commerce Platform Development Contract Project",
    "Cloud Storage Encryption Contract Project",
    "Data-Driven Decision Making Contract Project",
    "Infrastructure as a Service (IaaS) Contract Project",
    "Platform as a Service (PaaS) Contract Project",
    "Data Migration & Integration Contract Project",
    "Security and Compliance Contract Project",
    "Cost Optimization Contract Project",
    "Customer Experience Enhancement Contract Project",
    "Business Process Automation Contract Project",
    "Mobile Application Contract Project",
    "Remote Work Enablement Contract Project",
    "ERP Customization & Integration Contract Project",
    "Password Security Contract Project",
    "Marketing Campaign Effectiveness Contract Project",
    "Operational Efficiency Improvement Contract Project",
    "Risk Management Analytics Contract Project"
]

#Assign random project name to each unique Project ID
unique_project_ids = planned_df["Project ID"].unique()

random.seed(42)  #to keep consistent
assigned_names = random.sample(project_names * ((len(unique_project_ids) 
                                                 // len(project_names)) + 1), len(unique_project_ids))
project_name_map = dict(zip(unique_project_ids, assigned_names))

planned_df["Project Name"] = planned_df["Project ID"].map(project_name_map)

#Overwrite the original file with the new realistic project names
planned_df.to_csv(csv_path, index=False)