In [1]:
# Import libraries
import pandas as pd
import random
from datetime import datetime, timedelta

In [3]:

# Define OWASP WSTG Categories (updated with specified categorisation)
owasp_wstg_categories = {
    "Information Gathering": "WSTG-IGV-01",
    "Configuration and Deployment Management": "WSTG-CONF-02",
    "Identity Management": "WSTG-IDM-03",
    "Authentication Testing": "WSTG-ATHN-04",
    "Authorization Testing": "WSTG-ATHZ-05",
    "Session Management Testing": "WSTG-SESS-06",
    "Data Validation Testing": "WSTG-DV-07",
    "Error Handling": "WSTG-ERRH-08",
    "Cryptography Testing": "WSTG-CRYP-09",
    "Business Logic Testing": "WSTG-BUSL-10",
    "Client-Side Testing": "WSTG-CLNT-11",
    "API Testing": "WSTG-API-12"
}


In [4]:
# Define MITRE ATT&CK Technique IDs (Selected based on relevance - NOT all are here)
mitre_techniques = [
    "T1190",  # Exploit Public-Facing Application - Targets OWASP Injection, XSS, and RCE vulnerabilities
    "T1078",  # Valid Accounts - Covers credential-based attacks like brute force and credential stuffing
    "T1133",  # External Remote Services - Exploits misconfigured VPN, RDP, or SSH, aligning with OWASP Config Testing
    "T1059",  # Command and Scripting Interpreter - Tests script execution vulnerabilities (JS, Python, Bash)
    "T1087",  # Account Discovery - Focuses on enumerating user accounts and roles, relevant for Identity Testing
    "T1110",  # Brute Force - Checks password strength, rate-limiting, and credential stuffing attacks
    "T1595",  # Active Scanning - Simulates attacker reconnaissance to identify open ports, services, and vulnerabilities
    "T1195",  # Supply Chain Compromise - Targets dependencies, 3rd-party software, and package vulnerabilities
    "T1203",  # Exploitation for Client Execution - Examines drive-by downloads, malicious script execution
    "T1566",  # Phishing - Tests social engineering scenarios related to email-based attacks
]

In [5]:
# Define Compliance Frameworks (Random selection)
compliance_frameworks = ["PCI DSS 6.5.1", "HIPAA 164.312", "ISO 27001"]

In [6]:
# Define CVSS score categories - Industry standards
cvss_scores = {
    "Critical": "9.8 - CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:H/A:H",
    "High": "7.5 - CVSS:3.1/AV:N/AC:L/PR:N/UI:R/S:U/C:H/I:H/A:H",
    "Medium": "5.4 - CVSS:3.1/AV:N/AC:H/PR:N/UI:R/S:U/C:L/I:L/A:L",
    "Low": "3.2 - CVSS:3.1/AV:L/AC:H/PR:N/UI:N/S:U/C:L/I:N/A:N"
}

In [7]:
# Generate synthetic test cases with OWASP WSTG details
def generate_test_cases(num_cases=100):
    test_cases = []
    for i in range(1, num_cases + 1):
        test_id = f'APP-SEC-{i:03}'  #Generates a test case ID in the format "TEST-001", "TEST-002", ..., "TEST-100".
        project_id = f'PROJ-{random.randint(1, 10):03}'
        tester_id = f'USER-{random.randint(1, 5):03}' #range set between 1-5, have to double check again 
        user_id = f'USER-{random.randint(1, 20):03}' #range set between 1-20, have to double check again 
        days_allocated = random.randint(1, 30) #Up to 30 days 
        owasp_category = random.choice(list(owasp_wstg_categories.keys()))
        owasp_reference = owasp_wstg_categories[owasp_category]
        mitre_attack_id = random.choice(mitre_techniques)
        compliance = random.choice(compliance_frameworks)
        test_prerequisites = random.choice(["Valid user credentials", "VPN access", "Admin privileges required"])
        severity_label = random.choice(list(cvss_scores.keys()))
        cvss_vector = cvss_scores[severity_label]
        remediation_status = random.choice(["Open", "In Progress", "Verified Fixed"])

        test_case = { #as per updated comprehensive schema as of 28/02
            'Test ID': test_id,
            'Project ID': project_id,
            'Tester ID': tester_id,
            'User ID': user_id,
            'Days Allocated': days_allocated,
            'OWASP Category': owasp_category,
            'OWASP Reference': owasp_reference,
            'MITRE ATT&CK Technique ID': mitre_attack_id,
            'Compliance Frameworks': compliance,
            'Test Prerequisites': test_prerequisites,
            'Finding Severity': severity_label,
            'CVSS Score': cvss_vector,
            'Remediation Status': remediation_status,
            'Status': random.choice(['Pending', 'In Progress', 'Completed']),
            'Created On': (datetime.now() - timedelta(days=random.randint(1, 30))).strftime('%Y-%m-%d'),
            'Modified On': datetime.now().strftime('%Y-%m-%d')
        }
        test_cases.append(test_case)
    return pd.DataFrame(test_cases)


In [9]:
# Generate updated test case data
test_case_data = generate_test_cases(100)

# Display first few rows
print(test_case_data.head())  # Prints data in the terminal

       Test ID Project ID Tester ID   User ID  Days Allocated  \
0  APP-SEC-001   PROJ-010  USER-005  USER-003              13   
1  APP-SEC-002   PROJ-005  USER-005  USER-005              18   
2  APP-SEC-003   PROJ-007  USER-004  USER-015              13   
3  APP-SEC-004   PROJ-001  USER-002  USER-020              10   
4  APP-SEC-005   PROJ-005  USER-001  USER-004              23   

                            OWASP Category OWASP Reference  \
0                           Error Handling    WSTG-ERRH-08   
1  Configuration and Deployment Management    WSTG-CONF-02   
2                  Data Validation Testing      WSTG-DV-07   
3                   Authentication Testing    WSTG-ATHN-04   
4               Session Management Testing    WSTG-SESS-06   

  MITRE ATT&CK Technique ID Compliance Frameworks         Test Prerequisites  \
0                     T1087         HIPAA 164.312  Admin privileges required   
1                     T1059         PCI DSS 6.5.1                 VPN access

In [None]:
# use of matching Function (match_logs_to_tests) -- e.g, to match and link the testing dataset to logs 

# A new function match_logs_to_tests() will be added to processes the log data and matches each log entry to its corresponding test case based on keywords in the log event and test case categories.
# Matching Logic: The function looks for matches between log events (like "login", "SQL injection") and test case categories (like "Authentication Testing" or MITRE attack techniques like "T1190").

In [10]:
# Save the generated test data to a CSV file
test_case_data.to_csv("updated_security_test_cases.csv", index=False)
print("Updated security test cases saved as 'updated_security_test_cases.csv'")

Updated security test cases saved as 'updated_security_test_cases.csv'
