In [1]:
from google.colab import drive
import csv
import re

# Mount Google Drive
drive.mount('/content/drive')

# Function to extract CWE ID from the RCM Response
def extract_cwe_from_rcm(response_text):
    # Look for the pattern 'CWE-ID:' or 'CWE ID: CWE-XXX' in the generated response
    match = re.search(r'CWE[-\s]?ID[:]*\s*CWE[-\s]?(\d+)', response_text, re.IGNORECASE)
    if match:
        return match.group(1)  # Return the matched CWE ID (numeric part)
    return None

# Function to calculate accuracy by comparing the CWE IDs
def calculate_cwe_accuracy(file_path, delimiter=','):
    match_count = 0
    total_count = 0

    # Open the file and process each row
    with open(file_path, mode='r') as file:
        reader = csv.DictReader(file, delimiter=delimiter)

        # Loop through each row to compare CWE ID in GT and Generated RCM Response
        for row in reader:
            gt_cwe = row['GT'].strip()  # Get the CWE ID from GT column
            rcm_response = row['Generated RCM Response'].strip()  # Get the RCM response

            # Extract the CWE ID from the RCM response
            generated_cwe = extract_cwe_from_rcm(rcm_response)

            # Compare CWE IDs
            if gt_cwe and generated_cwe and gt_cwe == f"CWE-{generated_cwe}":
                match_count += 1
            total_count += 1

    # Calculate accuracy
    if total_count > 0:
        accuracy = (match_count / total_count) * 100
        print(f"Total Cases: {total_count}")
        print(f"Matching CWE IDs: {match_count}")
        print(f"Accuracy: {accuracy:.2f}%")
    else:
        print("No data found in the file.")

# Example usage
file_path = '/content/drive/MyDrive/rcm-output-results.csv'  # Replace with your actual file path
calculate_cwe_accuracy(file_path)


Mounted at /content/drive
Total Cases: 1000
Matching CWE IDs: 33
Accuracy: 3.30%
