In [10]:
# Import libraries
import pandas as pd
import random
from datetime import datetime, timedelta

In [11]:

# Define OWASP WSTG Categories (updated with specified categorisation)
OWASP_WSTG_CATEGORIES = {
    "InformationGathering": "WSTG-IGV-01",
    "ConfigurationDeploymentManagement": "WSTG-CONF-02",
    "IdentityManagement": "WSTG-IDM-03",
    "AuthenticationTesting": "WSTG-ATHN-04",
    "AuthorizationTesting": "WSTG-ATHZ-05",
    "SessionManagementTesting": "WSTG-SESS-06",
    "DataValidationTesting": "WSTG-DV-07",
    "ErrorHandling": "WSTG-ERRH-08",
    "CryptographyTesting": "WSTG-CRYP-09",
    "BusinessLogicTesting": "WSTG-BUSL-10",
    "ClientSideTesting": "WSTG-CLNT-11",
    "APITesting": "WSTG-API-12"
}


In [12]:
# Define MITRE ATT&CK Technique IDs as a dictionary (Selected based on relevance - NOT all are here)

MITRE_TECHNIQUES = {
    "T1190": "Exploit Public-Facing Application",  # Exploit Public-Facing Application - Targets OWASP Injection, XSS, and RCE vulnerabilities
    "T1078": "Valid Accounts", # Valid Accounts - Covers credential-based attacks like brute force and credential stuffing
    "T1133": "External Remote Services", # External Remote Services - Exploits misconfigured VPN, RDP, or SSH, aligning with OWASP Config Testing
    "T1059": "Command and Scripting Interpreter", # Command and Scripting Interpreter - Tests script execution vulnerabilities (JS, Python, Bash)
    "T1087": "Account Discovery", # Account Discovery - Focuses on enumerating user accounts and roles, relevant for Identity Testing
    "T1110": "Brute Force",  # Brute Force - Checks password strength, rate-limiting, and credential stuffing attacks
    "T1595": "Active Scanning", # Active Scanning - Simulates attacker reconnaissance to identify open ports, services, and vulnerabilities
    "T1195": "Supply Chain Compromise", # Supply Chain Compromise - Targets dependencies, 3rd-party software, and package vulnerabilities
    "T1203": "Exploitation for Client Execution",  # Exploitation for Client Execution - Examines drive-by downloads, malicious script execution
    "T1566": "Phishing" # Phishing - Tests social engineering scenarios related to email-based attacks
}

In [13]:
# Define Compliance Frameworks (Random selection for Vantage Point Security Relevance)
COMPLIANCE_FRAMEWORKS = ["CREST Penetration Testing", "NIST 800-53", "MAS TRM (Technology Risk Management)"]

# CREST Penetration Testing - Recognized industry standard for security testing.
# NIST 800-53 - A key framework for security and risk management.git pull origin testing
# MAS TRM (Technology Risk Management) - Critical for financial institutions in Singapore.



In [14]:
# Define CVSS score categories - Industry standards
CVSS_SCORES = {
    "Critical": "9.8 - CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:H/A:H",
    "High":    "7.5 - CVSS:3.1/AV:N/AC:L/PR:N/UI:R/S:U/C:H/I:H/A:H",
    "Medium":  "5.4 - CVSS:3.1/AV:N/AC:H/PR:N/UI:R/S:U/C:L/I:L/A:L",
    "Low":     "3.2 - CVSS:3.1/AV:L/AC:H/PR:N/UI:N/S:U/C:L/I:N/A:N"
}

# CVSS base score - version of the CVSS / Attack Vector: Network / Attack Complexity: Low / Privileges Required: None / User Interaction: None / Scope: Unchanged / Confidentiality Impact: High / Integrity Impact: High / Availability Impact: High

In [15]:

# Define possible values for testing scope, status, test case, and severity
TESTING_SCOPES = ["WEB", "APP"]
TESTING_SCOPE_STATUSES = ["Not started", "In progress", "Completed"]
TEST_CASES = ["Authentication", "Authorization", "Input Validation", "SQL Injection", "XSS"] #based on common OWASP methodologies and security testing categories. 
TEST_CASE_STATUSES = ["Not Started", "Completed"]
AFFECTED_COMPONENTS = ["Login form", "User profile page", "Payment gateway", "Database", "Session management"]
SEVERITY_RATINGS = ["Low", "Medium", "High", "Critical"]

In [16]:
import pandas as pd
import random
from datetime import datetime, timedelta

# Define possible values for testing scope, status, test case, and severity
TESTING_SCOPES = ["WEB", "APP"]
TESTING_SCOPE_STATUSES = ["Not started", "In progress", "Completed"]
TEST_CASES = ["Login form", "User profile page", "Payment gateway", "Database", "Session management"]
TEST_CASE_STATUSES = ["Not Started", "Completed"]
AFFECTED_COMPONENTS = ["Login form", "User profile page", "Payment gateway", "Database", "Session management"]
SEVERITY_RATINGS = ["Low", "Medium", "High", "Critical"]

# Function to generate refined test cases
def generate_refined_test_cases(num_random_cases=800, num_not_completed_cases=100, project_ids=None, user_ids=None, seed=None):
    if seed is not None:
        random.seed(seed)

    if not project_ids:
        project_ids = [f"PRJ-{i:03}" for i in range(1, 100)]

    if not user_ids:
        user_ids = [f"USER-{i:03}" for i in range(1, 100)]

    test_cases = []

    # Generate 800 randomly distributed test cases
    for i in range(1, num_random_cases + 1):
        project_id = random.choice(project_ids)
        project_name = f"Test Case {i}"
        user_id = random.choice(user_ids)
        testing_scope = random.choice(TESTING_SCOPES)
        test_case_status = random.choice(TEST_CASE_STATUSES)
        # Ensure test case logic: if Not Started -> testing scope must not be Completed
        if test_case_status == "Not Started":
            testing_scope_status = random.choice(["Not started", "In progress"])
        else:
            testing_scope_status = random.choice(TESTING_SCOPE_STATUSES)

        test_case_id = f"TC-{i:03}"
        test_case = random.choice(TEST_CASES)
        affected_components = random.choice(AFFECTED_COMPONENTS)
        severity_rating = random.choice(SEVERITY_RATINGS)

        test_case_entry = {
            'Project ID': project_id,
            'Project Name': project_name,
            'User ID': user_id,
            'Testing Scope': testing_scope,
            'Testing Scope Status': testing_scope_status,
            'Test Case ID': test_case_id,
            'Test Case': test_case,
            'Test Case Status': test_case_status,
            'Findings (Affected Components)': affected_components,
            'Severity Ratings': severity_rating
        }
        test_cases.append(test_case_entry)

    # Generate 100 additional test cases with "Not Started" status
    for i in range(num_random_cases + 1, num_random_cases + num_not_completed_cases + 1):
        project_id = random.choice(project_ids)
        project_name = f"Test Case {i}"
        user_id = random.choice(user_ids)
        testing_scope = random.choice(TESTING_SCOPES)
        testing_scope_status = random.choice(["Not started", "In progress"])
        test_case_id = f"TC-{i:03}"
        test_case = random.choice(TEST_CASES)
        test_case_status = "Not Started"
        affected_components = random.choice(AFFECTED_COMPONENTS)
        severity_rating = random.choice(SEVERITY_RATINGS)

        test_case_entry = {
            'Project ID': project_id,
            'Project Name': project_name,
            'User ID': user_id,
            'Testing Scope': testing_scope,
            'Testing Scope Status': testing_scope_status,
            'Test Case ID': test_case_id,
            'Test Case': test_case,
            'Test Case Status': test_case_status,
            'Findings (Affected Components)': affected_components,
            'Severity Ratings': severity_rating
        }
        test_cases.append(test_case_entry)

    df = pd.DataFrame(test_cases)
    return df

In [17]:
if __name__ == "__main__":
    try:
        crm_projects = pd.read_csv("projects_data.csv")
        existing_project_ids = crm_projects["ProjectID"].unique().tolist()
    except FileNotFoundError:
        print("No CRM project file found; using dummy ProjectIDs.")
        existing_project_ids = None
    
    test_cases_df = generate_refined_test_cases(
        num_random_cases=800,
        num_not_completed_cases=100,
        project_ids=existing_project_ids,
        user_ids=None,
        seed=42
    )
    print("\n--- Sample Refined Test Cases ---")
    print(test_cases_df.head())
    
    test_cases_df.to_csv("refined_security_test_cases.csv", index=False)
    print("\nSaved 'refined_security_test_cases.csv'.")

No CRM project file found; using dummy ProjectIDs.

--- Sample Refined Test Cases ---
  Project ID Project Name   User ID Testing Scope Testing Scope Status  \
0    PRJ-082  Test Case 1  USER-015           WEB          Not started   
1    PRJ-087  Test Case 2  USER-095           WEB          Not started   
2    PRJ-030  Test Case 3  USER-065           WEB          In progress   
3    PRJ-001  Test Case 4  USER-098           WEB          In progress   
4    PRJ-098  Test Case 5  USER-044           WEB          In progress   

  Test Case ID          Test Case Test Case Status  \
0       TC-001  User profile page        Completed   
1       TC-002         Login form        Completed   
2       TC-003  User profile page      Not Started   
3       TC-004    Payment gateway        Completed   
4       TC-005         Login form      Not Started   

  Findings (Affected Components) Severity Ratings  
0              User profile page              Low  
1                     Login form        

In [18]:
# use of matching Function (match_logs_to_tests) -- e.g, to match and link the testing dataset to logs 

# A new function match_logs_to_tests() will be added to processes the log data and matches each log entry to its corresponding test case based on keywords in the log event and test case categories.
# Matching Logic: The function looks for matches between log events (like "login", "SQL injection") and test case categories (like "Authentication Testing" or MITRE attack techniques like "T1190")

# Also require adding of things after liasing with Team 2
# Testing requirements: 