In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
import os

# Set the device to GPU if available, else CPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Model name
model_name = "meta-llama/Llama-3.2-1B"

# Load the tokenizer and model with the Hugging Face token
tokenizer = AutoTokenizer.from_pretrained(model_name, use_auth_token=os.getenv("HF_TOKEN"))
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
    device_map="auto",
    use_auth_token=os.getenv("HF_TOKEN")
).to(device)


tokenizer_config.json:   0%|          | 0.00/50.5k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/301 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/843 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/2.47G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/185 [00:00<?, ?B/s]

In [None]:
import pandas as pd


df=pd.read_csv('/content/cve_cwe_dataset_filtered.tsv', sep='\t')
df.head()

Unnamed: 0,CVE ID,CVE Description,CWE ID,Explanation,Description Length
0,CVE-2024-47188,Suricata is a network Intrusion Detection Syst...,CWE-330,The CVE describes a vulnerability: 'Suricata i...,63.0
1,CVE-2024-45797,LibHTP is a security-aware parser for the HTTP...,CWE-770,The CVE describes a vulnerability: 'LibHTP is ...,47.0
2,CVE-2024-45796,Suricata is a network Intrusion Detection Syst...,CWE-193,The CVE describes a vulnerability: 'Suricata i...,48.0
3,CVE-2024-45795,Suricata is a network Intrusion Detection Syst...,CWE-617,The CVE describes a vulnerability: 'Suricata i...,57.0
4,CVE-2024-41128,Action Pack is a framework for handling and re...,CWE-770,The CVE describes a vulnerability: 'Action Pac...,120.0


In [None]:
import re
import torch
import pandas as pd

def generate_gt(cve_description):
    """Generates the CWE mapping for a given CVE description using LLaMA."""
    try:
        # Set the pad_token if not already set
        if tokenizer.pad_token_id is None:
            tokenizer.pad_token_id = tokenizer.eos_token_id  # Use eos_token_id as pad_token_id

        # Ensure the model is using the device
        model.to(device)

        # The obligatory prompt
        prompt = (f"Analyze the following CVE description and determine the appropriate CWE. "
                  f"Provide a justification. The last part of your response should contain only the CWE ID number.\n\n"
                  f"CVE Description: {cve_description}\n")

        # Tokenize the prompt and move inputs to the correct device
        inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True).to(device)

        # Set the pad_token_id for the model
        model.config.pad_token_id = tokenizer.pad_token_id

        # Generate response with a dynamic length limit
        outputs = model.generate(
            inputs["input_ids"],
            attention_mask=inputs["attention_mask"],
            max_length=min(2000, inputs["input_ids"].shape[1] + 50),  # Be conservative with length
            num_return_sequences=1,
            pad_token_id=tokenizer.pad_token_id
        )

        # Decode the generated text
        response = tokenizer.decode(outputs[0], skip_special_tokens=True).strip()

        # Improved CWE ID extraction
        cwe_id_match = re.search(r'CWE-\d+', response)
        cwe_id = cwe_id_match.group(0) if cwe_id_match else None

        return {
            "full_response": response,
            "cwe_id": cwe_id
        }

    except Exception as e:
        print(f"Error generating CWE mapping: {str(e)}")
        return None

# Assuming `df` is your DataFrame containing the CVE descriptions
# Sample 200 random rows from the DataFrame
sampled_df = df.sample(n=200, random_state=42)  # Initial random sample
results = []
max_retries = 3
target_successful_answers = 200  # We want 200 valid answers
attempted_indices = set()  # Track processed indices to avoid duplicates

# Loop until we reach the target number of valid answers
while len(results) < target_successful_answers:
    # Pick another entry if needed, ensuring we haven't processed it before
    for index, row in sampled_df.iterrows():
        if index in attempted_indices:
            continue  # Skip already processed entries
        attempted_indices.add(index)  # Mark this entry as processed
        cve_description = row["CVE Description"]
        print(f"Processing CVE Description {index + 1}/{len(sampled_df)}: {cve_description}")  # Progress message

        # Attempt to get a valid answer with retries if necessary
        for attempt in range(max_retries):
            result = generate_gt(cve_description)

            # Check if a valid CWE ID was found
            if result and result['cwe_id'] is not None:
                print(f"Success: {result['cwe_id']} - {result['full_response']}")  # Successful result message
                # Append result to the list
                results.append({
                    "CVE Description": cve_description,
                    "Justification": result['full_response'],
                    "CWE_ID": result['cwe_id']
                })
                break  # Break out of retry loop if successful
            else:
                print(f"Retry {attempt + 1}/{max_retries} for CVE Description {index + 1}...")  # Retry message

        # Stop processing if we’ve reached the desired count of valid answers
        if len(results) >= target_successful_answers:
            break

    # If after retries the answer is still invalid, move on to the next entry
    if not result or result['cwe_id'] is None:
        print("Skipping entry after 3 unsuccessful attempts.")
        continue

    # Save the current results to a TSV file after each new valid entry is added
    results_df = pd.DataFrame(results)
    results_df.to_csv("resultsllamacti.tsv", sep='\t', index=False)

# Final save to ensure all results are written to the file
print("Finalizing and saving results...")
results_df.to_csv("resultsllama.tsv", sep='\t', index=False)
print("Results saved to 'resultsllamacti.tsv'.")


Processing CVE Description 136/200: Sakai is a Collaboration and Learning Environment. Starting in version 23.0 and prior to version 23.2, kernel users created with type roleview can log in as a normal user. This can result in illegal access being granted to the system. Version 23.3 fixes this vulnerability.
Success: CWE-284 - Analyze the following CVE description and determine the appropriate CWE. Provide a justification. The last part of your response should contain only the CWE ID number.

CVE Description: Sakai is a Collaboration and Learning Environment. Starting in version 23.0 and prior to version 23.2, kernel users created with type roleview can log in as a normal user. This can result in illegal access being granted to the system. Version 23.3 fixes this vulnerability.
CWE ID: CWE-284 (Improper Access Control)
The CWE-284 describes improper access control as follows:
A program or component that allows a user with the correct credentials to access a resource that should only be

KeyboardInterrupt: 

In [12]:
import pandas as pd

# Load the TSV files
llama_results_df = pd.read_csv('/content/resultsllamamydata2.tsv', sep='\t')
ground_truth_df = pd.read_csv('/content/cve_cwe_dataset_filtered.tsv', sep='\t')

# Merge dataframes on 'CVE Description' from both files
merged_df = pd.merge(llama_results_df, ground_truth_df, on="CVE Description", how="inner")

# Calculate accuracy by comparing 'CWE_ID' (predictions) with 'CWE ID' (ground truth)
accuracy = (merged_df['CWE_ID'] == merged_df['CWE ID']).mean()

# Print accuracy as a percentage
print("Accuracy:", accuracy * 100, "%")


Accuracy: 10.236220472440944 %


In [None]:
import pandas as pd


df=pd.read_csv('/content/cti-rcm.tsv', sep='\t')
df.head()

Unnamed: 0,URL,Description,Prompt,GT
0,https://nvd.nist.gov/vuln/detail/CVE-2024-23848,"In the Linux kernel through 6.7.1, there is a ...",Analyze the following CVE description and map ...,CWE-416
1,https://nvd.nist.gov/vuln/detail/CVE-2023-38738,IBM OpenPages with Watson 8.3 and 9.0 could pr...,Analyze the following CVE description and map ...,CWE-257
2,https://nvd.nist.gov/vuln/detail/CVE-2024-22137,Improper Neutralization of Input During Web Pa...,Analyze the following CVE description and map ...,CWE-79
3,https://nvd.nist.gov/vuln/detail/CVE-2024-20819,Out-of-bounds Write vulnerabilities in svc1td_...,Analyze the following CVE description and map ...,CWE-787
4,https://nvd.nist.gov/vuln/detail/CVE-2024-0585,The Essential Addons for Elementor – Best Elem...,Analyze the following CVE description and map ...,CWE-79


In [None]:
import re
import torch
import pandas as pd

def generate_gt(cve_description):
    """Generates the CWE mapping for a given CVE description using LLaMA."""
    try:
        # Ensure model and tokenizer are using the correct device
        model.to(device)

        # The obligatory prompt
        prompt = (f"Analyze the following CVE description and determine the appropriate CWE. "
                  f"Provide a justification. The last part of your response should contain only the CWE ID number.\n\n"
                  f"CVE Description: {cve_description}\n")

        # Tokenize the prompt and move inputs to the correct device
        inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True).to(device)

        # Generate response with a dynamic length limit
        outputs = model.generate(
            inputs["input_ids"],
            attention_mask=inputs["attention_mask"],
            max_length=min(2000, inputs["input_ids"].shape[1] + 50),
            num_return_sequences=1,
            pad_token_id=tokenizer.pad_token_id
        )

        # Decode the generated text
        response = tokenizer.decode(outputs[0], skip_special_tokens=True).strip()

        # Extract CWE ID from response
        cwe_id_match = re.search(r'CWE-\d+', response)
        cwe_id = cwe_id_match.group(0) if cwe_id_match else None

        return {
            "full_response": response,
            "cwe_id": cwe_id
        }

    except Exception as e:
        print(f"Error generating CWE mapping: {str(e)}")
        return None

# Load the DataFrame containing the CVE descriptions
sampled_df = df.sample(n=200, random_state=42)  # Adjust if needed

# Initialize a list to store valid results
results = []
attempted_rows = 0

# Process each CVE description until we reach 200 valid answers
for index, row in sampled_df.iterrows():
    if len(results) >= 200:
        break  # Stop once we have 200 valid entries

    cve_description = row["Description"]
    print(f"Processing CVE Description {index + 1}/{len(sampled_df)}: {cve_description}")

    # Retry logic with a maximum of 3 attempts
    for attempt in range(3):
        result = generate_gt(cve_description)
        if result and result["cwe_id"]:  # Check if CWE ID was successfully generated
            print(f"Success: {result['cwe_id']} - {result['full_response']}")
            results.append({
                "CVE Description": cve_description,
                "Justification": result["full_response"],
                "CWE_ID": result["cwe_id"]
            })
            break  # Exit retry loop on success
        else:
            print(f"Attempt {attempt + 1} failed for this description.")

        # If all attempts failed, mark as error after third attempt
        if attempt == 2 and (not result or not result["cwe_id"]):
            print("Error: Unable to process this description after 3 attempts.")
            results.append({
                "CVE Description": cve_description,
                "Justification": "Error in processing",
                "CWE_ID": "Error"
            })

    # Increment counter for rows attempted to ensure we sample enough entries
    attempted_rows += 1

    # Save intermediate results to a TSV file after each entry
    results_df = pd.DataFrame(results)
    results_df.to_csv("resultsllamacti11.tsv", sep='\t', index=False)

# Final save after processing all entries
print("Finalizing and saving results...")
results_df.to_csv("resultsllamacti11.tsv", sep='\t', index=False)
print("Results saved to 'resultsllamacti11.tsv'.")


Processing CVE Description 522/200: A SQL injection vulnerability exists in Novel-Plus v4.3.0-RC1 and prior. An attacker can pass specially crafted offset, limit, and sort parameters to perform SQL injection via /novel/userFeedback/list.
Success: CWE-89 - Analyze the following CVE description and determine the appropriate CWE. Provide a justification. The last part of your response should contain only the CWE ID number.

CVE Description: A SQL injection vulnerability exists in Novel-Plus v4.3.0-RC1 and prior. An attacker can pass specially crafted offset, limit, and sort parameters to perform SQL injection via /novel/userFeedback/list.
CWE ID: CWE-89

Explanation: The vulnerability exists because the application does not properly validate the offset, limit, and sort parameters. An attacker can pass specially crafted offset, limit, and sort parameters to perform SQL injection via /novel
Processing CVE Description 738/200: Due to a failure in validating the number of scanline samples of 

In [5]:
!pip install fuzzywuzzy


Collecting fuzzywuzzy
  Downloading fuzzywuzzy-0.18.0-py2.py3-none-any.whl.metadata (4.9 kB)
Downloading fuzzywuzzy-0.18.0-py2.py3-none-any.whl (18 kB)
Installing collected packages: fuzzywuzzy
Successfully installed fuzzywuzzy-0.18.0


In [7]:
import pandas as pd

# Load the TSV files
llama_results_df = pd.read_csv('/content/resultsllamacti11.tsv', sep='\t')
ground_truth_df = pd.read_csv('/content/cti-rcm.tsv', sep='\t')


# Merge dataframes on 'CVE Description' from llama results and 'Description' from ground truth
merged_df = pd.merge(llama_results_df, ground_truth_df, left_on="CVE Description", right_on="Description", how="inner")

# Calculate accuracy by comparing 'CWE_ID' (predictions) with 'GT' (ground truth)
accuracy = (merged_df['CWE_ID'] == merged_df['GT']).mean()
print("Accuracy:", accuracy * 100, "%")

Accuracy: 28.640776699029125 %


In [None]:
import pandas as pd


df=pd.read_csv('/content/cti-vsp.tsv', sep='\t')
df.head()

Unnamed: 0,URL,Description,Prompt,GT
0,https://nvd.nist.gov/vuln/detail/CVE-2024-23848,"In the Linux kernel through 6.7.1, there is a ...",Analyze the following CVE description and calc...,CVSS:3.1/AV:L/AC:L/PR:L/UI:N/S:U/C:N/I:N/A:H
1,https://nvd.nist.gov/vuln/detail/CVE-2023-38738,IBM OpenPages with Watson 8.3 and 9.0 could pr...,Analyze the following CVE description and calc...,CVSS:3.1/AV:N/AC:L/PR:L/UI:N/S:U/C:H/I:H/A:N
2,https://nvd.nist.gov/vuln/detail/CVE-2024-22137,Improper Neutralization of Input During Web Pa...,Analyze the following CVE description and calc...,CVSS:3.1/AV:N/AC:L/PR:L/UI:R/S:C/C:L/I:L/A:N
3,https://nvd.nist.gov/vuln/detail/CVE-2024-20819,Out-of-bounds Write vulnerabilities in svc1td_...,Analyze the following CVE description and calc...,CVSS:3.1/AV:L/AC:L/PR:L/UI:N/S:U/C:H/I:H/A:H
4,https://nvd.nist.gov/vuln/detail/CVE-2024-0585,The Essential Addons for Elementor – Best Elem...,Analyze the following CVE description and calc...,CVSS:3.1/AV:N/AC:L/PR:L/UI:R/S:C/C:L/I:L/A:N


In [None]:
import re
import torch
import pandas as pd
from transformers import AutoTokenizer, AutoModelForCausalLM
import os

def calculate_cvss(cve_description):
    """Generates the CVSS v3.1 Base Score and vector for a given CVE description."""
    try:
        # Ensure pad_token is set
        if tokenizer.pad_token_id is None:
            tokenizer.pad_token_id = tokenizer.eos_token_id

        # The prompt for CVSS calculation
        prompt = (
            "Analyze the following CVE description and calculate the CVSS v3.1 Base Score. "
            "Determine the values for each base metric: AV, AC, PR, UI, S, C, I, and A. "
            "Summarize each metric’s value and provide the final CVSS v3.1 vector string.\n"
            "Valid options for each metric are as follows:\n"
            "- Attack Vector (AV): Network (N), Adjacent (A), Local (L), Physical (P)\n"
            "- Attack Complexity (AC): Low (L), High (H)\n"
            "- Privileges Required (PR): None (N), Low (L), High (H)\n"
            "- User Interaction (UI): None (N), Required (R)\n"
            "- Scope (S): Unchanged (U), Changed (C)\n"
            "- Confidentiality (C): None (N), Low (L), High (H)\n"
            "- Integrity (I): None (N), Low (L), High (H)\n"
            "- Availability (A): None (N), Low (L), High (H)\n"
            "Summarize each metric’s value and provide the final CVSS v3.1 vector string. "
            "Ensure the final line of your response contains only the CVSS v3 Vector String in the following format:\n"
            "Example format: CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:H/A:H\n\n"
            f"CVE Description: {cve_description}\n"
        )

        # Tokenize and prepare inputs
        inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True).to(device)

        # Set padding token for model configuration
        model.config.pad_token_id = tokenizer.pad_token_id

        # Generate model response
        outputs = model.generate(
            inputs["input_ids"],
            attention_mask=inputs["attention_mask"],
            max_length=min(2000, inputs["input_ids"].shape[1] + 50),
            num_return_sequences=1,
            pad_token_id=tokenizer.pad_token_id
        )

        # Decode the generated text
        response = tokenizer.decode(outputs[0], skip_special_tokens=True).strip()

        # Extract the CVSS vector string using a regex pattern
        vector_match = re.search(r'CVSS:3\.1/AV:[NALP]/AC:[LH]/PR:[NLH]/UI:[NR]/S:[UC]/C:[NLH]/I:[NLH]/A:[NLH]', response)
        vector = vector_match.group(0) if vector_match else None

        return {
            "full_response": response,
            "cvss_vector": vector
        }

    except Exception as e:
        print(f"Error calculating CVSS: {str(e)}")
        return None

# Load the CVE descriptions from the uploaded TSV file

# Sample 200 random rows from the DataFrame for processing
sampled_df = df.sample(n=200, random_state=42)

# Initialize an empty list to store results
results = []
attempted_rows = 0

# Process each CVE description until we reach 200 valid answers
for index, row in sampled_df.iterrows():
    if len(results) >= 200:
        break  # Stop once we have 200 valid entries

    cve_description = row["Description"]
    print(f"Processing CVE Description {index + 1}/{len(sampled_df)}: {cve_description}")

    # Retry logic with a maximum of 3 attempts
    for attempt in range(3):
        result = calculate_cvss(cve_description)
        if result and result["cvss_vector"]:  # Check if CVSS vector was successfully generated
            print(f"Success: {result['cvss_vector']} - {result['full_response']}")
            results.append({
                "CVE Description": cve_description,
                "CVSS_Vector": result["cvss_vector"]
            })
            break  # Exit retry loop on success
        else:
            print(f"Attempt {attempt + 1} failed for this description.")

        # If all attempts failed, mark as error after third attempt
        if attempt == 2 and (not result or not result["cvss_vector"]):
            print("Error: Unable to process this description after 3 attempts.")
            results.append({
                "CVE Description": cve_description,
                "CVSS_Vector": "Error"
            })

    # Increment counter for rows attempted to ensure we sample enough entries
    attempted_rows += 1

    # Convert results to DataFrame and save after each iteration
    results_df = pd.DataFrame(results)
    results_df.to_csv("results_cvss.tsv", sep='\t', index=False)

# Final save after processing all entries
print("Finalizing and saving results...")
results_df.to_csv("results_cvss_cti.tsv", sep='\t', index=False)
print("Results saved to 'results_cvss_cti.tsv'.")


Processing CVE Description 522/200: A SQL injection vulnerability exists in Novel-Plus v4.3.0-RC1 and prior. An attacker can pass specially crafted offset, limit, and sort parameters to perform SQL injection via /novel/userFeedback/list.
Success: CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:H/A:H - Analyze the following CVE description and calculate the CVSS v3.1 Base Score. Determine the values for each base metric: AV, AC, PR, UI, S, C, I, and A. Summarize each metric’s value and provide the final CVSS v3.1 vector string.
Valid options for each metric are as follows:
- Attack Vector (AV): Network (N), Adjacent (A), Local (L), Physical (P)
- Attack Complexity (AC): Low (L), High (H)
- Privileges Required (PR): None (N), Low (L), High (H)
- User Interaction (UI): None (N), Required (R)
- Scope (S): Unchanged (U), Changed (C)
- Confidentiality (C): None (N), Low (L), High (H)
- Integrity (I): None (N), Low (L), High (H)
- Availability (A): None (N), Low (L), High (H)
Summarize each metric’s v

In [8]:
import pandas as pd

# Load the TSV files
llama_results_df = pd.read_csv('/content/results_cvss_cti (1).tsv', sep='\t')
ground_truth_df = pd.read_csv('/content/cti-vsp.tsv', sep='\t')

# Merge dataframes on 'CVE Description' from results and 'Description' from ground truth
merged_df = pd.merge(llama_results_df, ground_truth_df, left_on="CVE Description", right_on="Description", how="inner")

# Calculate accuracy by comparing 'CVSS_Vector' (predictions) with 'GT' (ground truth)
accuracy = (merged_df['CVSS_Vector'] == merged_df['GT']).mean()

# Print accuracy as a percentage
print("Accuracy:", accuracy * 100, "%")


Accuracy: 20.689655172413794 %


In [None]:
import pandas as pd


df1=pd.read_csv('/content/cve_dataset_with_cvss_vector.tsv', sep='\t')
df1.head()

Unnamed: 0,CVE ID,Description,CVSS Vector
0,CVE-2024-38424,Memory corruption during GNSS HAL process init...,CVSS:3.1/AV:L/AC:L/PR:L/UI:N/S:U/C:H/I:H/A:H
1,CVE-2024-38423,Memory corruption while processing GPU page ta...,CVSS:3.1/AV:L/AC:L/PR:L/UI:N/S:U/C:H/I:H/A:H
2,CVE-2024-38422,Memory corruption while processing voice packe...,CVSS:3.1/AV:L/AC:L/PR:L/UI:N/S:U/C:H/I:H/A:H
3,CVE-2024-38421,Memory corruption while processing GPU commands.,CVSS:3.1/AV:L/AC:L/PR:L/UI:N/S:U/C:H/I:H/A:H
4,CVE-2024-38419,Memory corruption while invoking IOCTL calls f...,CVSS:3.1/AV:L/AC:L/PR:L/UI:N/S:U/C:H/I:H/A:H


In [None]:
import re
import torch
import pandas as pd
from transformers import AutoTokenizer, AutoModelForCausalLM
import os



# Function to calculate CVSS from CVE description
def calculate_cvss(cve_description):
    """Generates the CVSS v3.1 Base Score and vector for a given CVE description."""
    try:
        # The prompt for CVSS calculation
        prompt = (
            "Analyze the following CVE description and calculate the CVSS v3.1 Base Score. "
            "Determine the values for each base metric: AV, AC, PR, UI, S, C, I, and A. "
            "Summarize each metric’s value and provide the final CVSS v3.1 vector string.\n"
            "Valid options for each metric are as follows:\n"
            "- Attack Vector (AV): Network (N), Adjacent (A), Local (L), Physical (P)\n"
            "- Attack Complexity (AC): Low (L), High (H)\n"
            "- Privileges Required (PR): None (N), Low (L), High (H)\n"
            "- User Interaction (UI): None (N), Required (R)\n"
            "- Scope (S): Unchanged (U), Changed (C)\n"
            "- Confidentiality (C): None (N), Low (L), High (H)\n"
            "- Integrity (I): None (N), Low (L), High (H)\n"
            "- Availability (A): None (N), Low (L), High (H)\n"
            "Summarize each metric’s value and provide the final CVSS v3.1 vector string. "
            "Ensure the final line of your response contains only the CVSS v3 Vector String in the following format:\n"
            "Example format: CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:H/A:H\n\n"
            f"CVE Description: {cve_description}\n"
        )

        # Tokenize and prepare inputs
        inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True).to(device)

        # Set padding token for model configuration
        model.config.pad_token_id = tokenizer.pad_token_id

        # Generate model response
        outputs = model.generate(
            inputs["input_ids"],
            attention_mask=inputs["attention_mask"],
            max_length=min(2000, inputs["input_ids"].shape[1] + 50),
            num_return_sequences=1,
            pad_token_id=tokenizer.pad_token_id
        )

        # Decode the generated text
        response = tokenizer.decode(outputs[0], skip_special_tokens=True).strip()

        # Extract the CVSS vector string using a regex pattern
        vector_match = re.search(r'CVSS:3\.1/AV:[NALP]/AC:[LH]/PR:[NLH]/UI:[NR]/S:[UC]/C:[NLH]/I:[NLH]/A:[NLH]', response)
        vector = vector_match.group(0) if vector_match else "CVSS vector not found"

        return {
            "full_response": response,
            "cvss_vector": vector
        }

    except Exception as e:
        print(f"Error calculating CVSS: {str(e)}")
        return None

# Load the CVE descriptions from the uploaded TSV file
df1 = pd.read_csv("path/to/your/cve_data.tsv", sep="\t")  # Modify with the actual path of your TSV file

# Sample 200 random rows from the DataFrame for processing
sampled_df = df1.sample(n=200, random_state=42)

# Initialize an empty list to store results
results = []

# Iterate over the sampled rows and generate CVSS mappings
for index, row in sampled_df.iterrows():
    cve_description = row["Description"]
    print(f"Processing CVE Description {index + 1}/{len(sampled_df)}: {cve_description}")
    result = calculate_cvss(cve_description)
    if result:
        print(f"Success: {result['cvss_vector']} - {result['full_response']}")
        # Append successful result
        results.append({
            "CVE Description": cve_description,
            "CVSS_Vector": result['cvss_vector']
        })
    else:
        print("Error: Unable to process this description.")
        # Append error result
        results.append({
            "CVE Description": cve_description,
            "CVSS_Vector": "Error"
        })

    # Convert results to DataFrame and save after each iteration
    results_df = pd.DataFrame(results)
    results_df.to_csv("results_cvss_mydata.tsv", sep='\t', index=False)

# Final save
print("Finalizing and saving results...")
results_df.to_csv("results_cvss_mydata.tsv", sep='\t', index=False)
print("Results saved to 'results_cvss_mydata.tsv'.")


FileNotFoundError: [Errno 2] No such file or directory: 'path/to/your/cve_data.tsv'