In [None]:
For Pl

In [None]:
# Import libraries
import pandas as pd
import random
from datetime import datetime, timedelta

In [None]:
# Define OWASP WSTG Categories (updated with specified categorisation)
OWASP_WSTG_CATEGORIES = {
    "InformationGathering": "WSTG-IGV-01",
    "ConfigurationDeploymentManagement": "WSTG-CONF-02",
    "IdentityManagement": "WSTG-IDM-03",
    "AuthenticationTesting": "WSTG-ATHN-04",
    "AuthorizationTesting": "WSTG-ATHZ-05",
    "SessionManagementTesting": "WSTG-SESS-06",
    "DataValidationTesting": "WSTG-DV-07",
    "ErrorHandling": "WSTG-ERRH-08",
    "CryptographyTesting": "WSTG-CRYP-09",
    "BusinessLogicTesting": "WSTG-BUSL-10",
    "ClientSideTesting": "WSTG-CLNT-11",
    "APITesting": "WSTG-API-12"
}

In [None]:
# Define MITRE ATT&CK Technique IDs as a dictionary (Selected based on relevance - NOT all are here)

MITRE_TECHNIQUES = {
    "T1190": "Exploit Public-Facing Application",  # Exploit Public-Facing Application - Targets OWASP Injection, XSS, and RCE vulnerabilities
    "T1078": "Valid Accounts", # Valid Accounts - Covers credential-based attacks like brute force and credential stuffing
    "T1133": "External Remote Services", # External Remote Services - Exploits misconfigured VPN, RDP, or SSH, aligning with OWASP Config Testing
    "T1059": "Command and Scripting Interpreter", # Command and Scripting Interpreter - Tests script execution vulnerabilities (JS, Python, Bash)
    "T1087": "Account Discovery", # Account Discovery - Focuses on enumerating user accounts and roles, relevant for Identity Testing
    "T1110": "Brute Force",  # Brute Force - Checks password strength, rate-limiting, and credential stuffing attacks
    "T1595": "Active Scanning", # Active Scanning - Simulates attacker reconnaissance to identify open ports, services, and vulnerabilities
    "T1195": "Supply Chain Compromise", # Supply Chain Compromise - Targets dependencies, 3rd-party software, and package vulnerabilities
    "T1203": "Exploitation for Client Execution",  # Exploitation for Client Execution - Examines drive-by downloads, malicious script execution
    "T1566": "Phishing" # Phishing - Tests social engineering scenarios related to email-based attacks
}

In [None]:
# Define Compliance Frameworks (Random selection for Vantage Point Security Relevance)
COMPLIANCE_FRAMEWORKS = ["CREST Penetration Testing", "NIST 800-53", "MAS TRM (Technology Risk Management)"]

# CREST Penetration Testing - Recognized industry standard for security testing.
# NIST 800-53 - A key framework for security and risk management.git pull origin testing
# MAS TRM (Technology Risk Management) - Critical for financial institutions in Singapore.

In [None]:
# Define CVSS score categories - Industry standards
CVSS_SCORES = {
    "Critical": "9.8 - CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:H/A:H",
    "High":    "7.5 - CVSS:3.1/AV:N/AC:L/PR:N/UI:R/S:U/C:H/I:H/A:H",
    "Medium":  "5.4 - CVSS:3.1/AV:N/AC:H/PR:N/UI:R/S:U/C:L/I:L/A:L",
    "Low":     "3.2 - CVSS:3.1/AV:L/AC:H/PR:N/UI:N/S:U/C:L/I:N/A:N"
}

# CVSS base score - version of the CVSS / Attack Vector: Network / Attack Complexity: Low / Privileges Required: None / User Interaction: None / Scope: Unchanged / Confidentiality Impact: High / Integrity Impact: High / Availability Impact: High

In [None]:
# Define possible values for testing scope, status, test case, and severity
TESTING_SCOPES = ["WEB", "APP"]
TEST_CASES = ["Authentication", "Authorization", "Input Validation", "SQL Injection", "XSS"] #based on common OWASP methodologies and security testing categories. 
AFFECTED_COMPONENTS = ["Login form", "User profile page", "Payment gateway", "Database", "Session management"]
SEVERITY_RATINGS = ["Low", "Medium", "High", "Critical"]

In [None]:
# Function to generate refined test cases
def generate_refined_test_cases(num_random_cases=800, num_not_completed_cases=100, project_ids=None, user_ids=None, seed=None):
    if seed is not None:
        random.seed(seed)

    if not project_ids:
        project_ids = [f"PRJ-{i:03}" for i in range(1, 100)]

    if not user_ids:
        user_ids = [f"USER-{i:03}" for i in range(1, 100)]

    test_cases = []

    # Helper: Generate random deadline between 01-Oct-2024 and 31-Oct-2024
    def random_deadline():
        start_date = datetime.strptime("01-Oct-2024", "%d-%b-%Y")
        end_date = datetime.strptime("31-Oct-2024", "%d-%b-%Y")
        random_days = random.randint(0, (end_date - start_date).days)
        return (start_date + timedelta(days=random_days)).strftime("%d-%b-%Y")

    # Generate 800 randomly distributed test cases
    for i in range(1, num_random_cases + 1):
        project_id = random.choice(project_ids)
        project_name = f"Test Case {i}"
        user_id = random.choice(user_ids)
        testing_scope = random.choice(TESTING_SCOPES)
        test_case_id = f"TC-{i:03}"
        test_case = random.choice(TEST_CASES)
        affected_components = random.choice(AFFECTED_COMPONENTS)
        severity_rating = random.choice(SEVERITY_RATINGS)
        deadline = random_deadline()

        test_case_entry = {
            'Project ID': project_id,
            'Project Name': project_name,
            'User ID': user_id,
            'Testing Scope': testing_scope,
            'Test Case ID': test_case_id,
            'Test Case': test_case,
            'Findings (Affected Components)': affected_components,
            'Severity Ratings': severity_rating,
            'Deadline': deadline
        }
        test_cases.append(test_case_entry)

    # Generate 100 additional test cases with same structure
    for i in range(num_random_cases + 1, num_random_cases + num_not_completed_cases + 1):
        project_id = random.choice(project_ids)
        project_name = f"Test Case {i}"
        user_id = random.choice(user_ids)
        testing_scope = random.choice(TESTING_SCOPES)
        test_case_id = f"TC-{i:03}"
        test_case = random.choice(TEST_CASES)
        affected_components = random.choice(AFFECTED_COMPONENTS)
        severity_rating = random.choice(SEVERITY_RATINGS)
        deadline = random_deadline()

        test_case_entry = {
            'Project ID': project_id,
            'Project Name': project_name,
            'User ID': user_id,
            'Testing Scope': testing_scope,
            'Test Case ID': test_case_id,
            'Test Case': test_case,
            'Findings (Affected Components)': affected_components,
            'Severity Ratings': severity_rating,
            'Deadline': deadline
        }
        test_cases.append(test_case_entry)

    df = pd.DataFrame(test_cases)
    return df

In [None]:
if __name__ == "__main__":
    try:
        crm_projects = pd.read_csv("projects_data.csv")
        existing_project_ids = crm_projects["ProjectID"].unique().tolist()
    except FileNotFoundError:
        print("No CRM project file found; using dummy ProjectIDs.")
        existing_project_ids = None
    
    test_cases_df = generate_refined_test_cases(
        num_random_cases=800,
        num_not_completed_cases=100,
        project_ids=existing_project_ids,
        user_ids=None,
        seed=42
    )
    print("\n--- Sample Refined Test Cases ---")
    print(test_cases_df.head())
    
    test_cases_df.to_csv("refined_security_test_cases.csv", index=False)
    print("\nSaved 'refined_security_test_cases.csv'.")

## Adding Important Matching Keys to the Burpsuite Sample Logs

In [None]:
import pandas as pd
import re

#Load the first 800 rows from the CSV
csv_path = "refined_security_test_cases.csv"
df = pd.read_csv(csv_path).head(800)

#Load logs.txt
with open("logs.txt", "r", encoding="utf-8") as f:
    original_log = f.read()

#Identify HTTP request lines (GET, POST, etc.) for each log entry to insert the new fields
request_line_pattern = re.compile(r"^(POST|GET|PUT|DELETE) .*$", re.MULTILINE)
matches = list(request_line_pattern.finditer(original_log))

#Prepare the insertion strings for the first 800 matches
injections = []
for i, match in enumerate(matches[:800]):
    insert_pos = match.end()  # end of the matched request line
    project_id = df.iloc[i]["Project ID"]
    user_id = df.iloc[i]["User ID"]
    test_case_id = df.iloc[i]["Test Case ID"]
    insert_text = f"\nProject ID: {project_id}\nUser ID: {user_id}\nTest Case ID: {test_case_id}"
    injections.append((insert_pos, insert_text))

#Insert the data in reverse to preserve character positions from original logs
final_log = original_log
for pos, text in reversed(injections):
    final_log = final_log[:pos] + text + final_log[pos:]

#Save the updated log file
output_path = "updated_logs.txt"
with open(output_path, "w", encoding="utf-8") as f:
    f.write(final_log)

print(f"Updated log saved to: {output_path}")