# Libraries

In [None]:
import pandas as pd
import numpy as np
import time
import random
from tqdm import tqdm
import google.generativeai as genai
import transformers
import torch
from huggingface_hub import login
from google.colab import userdata


# cti-mcq

In [None]:
cti_mcq = pd.read_csv('cti-mcq.tsv', sep='\t')
cti_mcq_responses = pd.read_csv('cti-mcq-responses.tsv', sep='\t')

## Gemini-1.5

In [None]:
gemini_key = userdata.get('gemini')

genai.configure(api_key=gemini_key)

model = genai.GenerativeModel('gemini-1.5-pro')

# Check gemini responses
gemini_responses = cti_mcq_responses["Gemini-1.5"]


nb = 0
correct = 0
for i in tqdm(range(40)):
    row = random.randint(0, 2500)

    while True:
        response = model.generate_content(cti_mcq["Prompt"][row] +" return only the letter (A, B, C, or D) with no additional text.")
        try:
            if response.text[0] in ("A", "B", "C", "D"):
                break
        except:
            time.sleep(60)
            continue

    if response.text[0] == gemini_responses[row]:
        correct += 1
    else:
        print(cti_mcq["Prompt"][row], response.text[0], gemini_responses[row], cti_mcq["GT"][row])
    nb += 1

    time.sleep(60)


print(correct/nb * 100, "% correct answer for gemini")

 12%|█▎        | 5/40 [05:14<36:44, 63.00s/it]

You are given a multiple-choice question (MCQ) from a Cyber Threat Intelligence (CTI) knowledge benchmark dataset. Your task is to choose the best option among the four provided. Return your answer as a single uppercase letter: A, B, C, or D.  **Question:** According to the example instances provided in CAPEC-24, what is one possible consequence of leveraging a buffer overflow to make a filter fail in a web application?  **Options:** A) Executing unauthorized commands B) Destroying log files C) Bypassing authentication mechanisms D) Accessing confidential files  **Important:** The last line of your answer should contain only the single letter corresponding to the best option, with no additional text.  A C B


 25%|██▌       | 10/40 [10:27<31:18, 62.60s/it]

You are given a multiple-choice question (MCQ) from a Cyber Threat Intelligence (CTI) knowledge benchmark dataset. Your task is to choose the best option among the four provided. Return your answer as a single uppercase letter: A, B, C, or D.  **Question:** What is the consequence of calling a thread's run() method directly instead of using the start() method according to CWE-572?  **Options:** A) The code runs in the thread of the callee instead of the caller. B) The code runs in a new, separate thread created by the system. C) The code runs in the thread of the caller instead of the callee. D) The code does not run at all.  **Important:** The last line of your answer should contain only the single letter corresponding to the best option, with no additional text.  C A C


 30%|███       | 12/40 [12:32<29:12, 62.59s/it]

You are given a multiple-choice question (MCQ) from a Cyber Threat Intelligence (CTI) knowledge benchmark dataset. Your task is to choose the best option among the four provided. Return your answer as a single uppercase letter: A, B, C, or D.  **Question:** Which command is likely used by adversaries to discover open application windows as mentioned in the detection section?  **Options:** A) GetSystemWindows B) GetWindowList C) GetForegroundWindow D) GetProcessA  **Important:** The last line of your answer should contain only the single letter corresponding to the best option, with no additional text.  B C C


 32%|███▎      | 13/40 [13:35<28:10, 62.61s/it]

You are given a multiple-choice question (MCQ) from a Cyber Threat Intelligence (CTI) knowledge benchmark dataset. Your task is to choose the best option among the four provided. Return your answer as a single uppercase letter: A, B, C, or D.  **Question:** Which technique does the ID T1033 pertain to in the MITRE ATT&CK framework?  **Options:** A) System Information Discovery B) Account Discovery C) System Owner/User Discovery D) Remote System Discovery  **Important:** The last line of your answer should contain only the single letter corresponding to the best option, with no additional text.  C A C


 38%|███▊      | 15/40 [15:42<26:15, 63.03s/it]

You are given a multiple-choice question (MCQ) from a Cyber Threat Intelligence (CTI) knowledge benchmark dataset. Your task is to choose the best option among the four provided. Return your answer as a single uppercase letter: A, B, C, or D.  **Question:** Which of the following is NOT a phase to consider in mitigating CWE-1255?  **Options:** A) Implementation B) Integration C) Testing D) Architecture and Design  **Important:** The last line of your answer should contain only the single letter corresponding to the best option, with no additional text.  C D C


 52%|█████▎    | 21/40 [21:59<19:51, 62.74s/it]

You are given a multiple-choice question (MCQ) from a Cyber Threat Intelligence (CTI) knowledge benchmark dataset. Your task is to choose the best option among the four provided. Return your answer as a single uppercase letter: A, B, C, or D.  **Question:** Which malicious software mentioned in MITRE ATT&CK T1417.001 is capable of using web injects to capture user credentials?  **Options:** A) Windshift B) Escobar C) EventBot D) Exobot  **Important:** The last line of your answer should contain only the single letter corresponding to the best option, with no additional text.  B D D


 72%|███████▎  | 29/40 [30:18<11:26, 62.39s/it]

You are given a multiple-choice question (MCQ) from a Cyber Threat Intelligence (CTI) knowledge benchmark dataset. Your task is to choose the best option among the four provided. Return your answer as a single uppercase letter: A, B, C, or D.  **Question:** Which mobile malware example in T1406 base64 encodes its malicious functionality at runtime from an RC4-encrypted TTF file?  **Options:** A) Cerberus B) EventBot C) HenBox D) WolfRAT  **Important:** The last line of your answer should contain only the single letter corresponding to the best option, with no additional text.  D C B


 82%|████████▎ | 33/40 [34:30<07:20, 62.87s/it]

You are given a multiple-choice question (MCQ) from a Cyber Threat Intelligence (CTI) knowledge benchmark dataset. Your task is to choose the best option among the four provided. Return your answer as a single uppercase letter: A, B, C, or D.  **Question:** How does BLUELIGHT exfiltrate data according to T1041?  **Options:** A) HTTP POST requests B) External C2 server C) Gratuitous ARP responses D) Temporal precision timing attacks  **Important:** The last line of your answer should contain only the single letter corresponding to the best option, with no additional text.  A B B


100%|██████████| 40/40 [41:47<00:00, 62.70s/it]

80.0 % correct answer for gemini





## LLAMA3-8B

In [None]:
HF = userdata.get('HF')

login(HF)

model_id = "meta-llama/Meta-Llama-3-8B-Instruct"

pipeline = transformers.pipeline(
    "text-generation",
    model=model_id,
    model_kwargs={"torch_dtype": torch.bfloat16},
    device_map="auto",
)

The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: fineGrained).
Your token has been saved to /root/.cache/huggingface/token
Login successful


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/654 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/4 [00:00<?, ?it/s]

model-00001-of-00004.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00002-of-00004.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00003-of-00004.safetensors:   0%|          | 0.00/4.92G [00:00<?, ?B/s]

model-00004-of-00004.safetensors:   0%|          | 0.00/1.17G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/187 [00:00<?, ?B/s]



tokenizer_config.json:   0%|          | 0.00/51.0k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/73.0 [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [None]:
correct = 0

for i in tqdm(range(100)):
  row = random.randint(0,2500)

  system = "You are given a multiple-choice question (MCQ) from a Cyber Threat Intelligence (CTI) knowledge benchmark dataset. Your task is to choose the best option among the four provided. Return your answer as a single uppercase letter: A, B, C, or D.  **Question:** " + cti_mcq["Question"][row] + "**Options:** A) "+ cti_mcq["Option A"][row] +" B) "+ cti_mcq["Option B"][row] +" C) "+ cti_mcq["Option C"][row] +" D) "+ cti_mcq["Option D"][row] +"  **Important:** The answer should contain only the single letter corresponding to the best option, with no additional text. "

  messages = [
      {"role": "system", "content": system},
      #{"role": "user", "content": user},
  ]

  terminators = [
      pipeline.tokenizer.eos_token_id,
      pipeline.tokenizer.convert_tokens_to_ids("<|eot_id|>")
  ]

  outputs = pipeline(
      messages,
      max_new_tokens=10,
      eos_token_id=terminators,
      do_sample=True,
      temperature=0.1,
      top_p=0.9,
  )

  answer = outputs[0]["generated_text"][-1]["content"][0]
  if answer == cti_mcq_responses["LLAMA3-8B"][row]:
    correct += 1
  else:
    print(system, answer, cti_mcq_responses["LLAMA3-8B"][row], cti_mcq_responses["GT"][row])

print(correct, "% of correct answers for Llama 3 8B")


  0%|          | 0/100 [00:00<?, ?it/s]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
  1%|          | 1/100 [00:02<04:46,  2.89s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


You are given a multiple-choice question (MCQ) from a Cyber Threat Intelligence (CTI) knowledge benchmark dataset. Your task is to choose the best option among the four provided. Return your answer as a single uppercase letter: A, B, C, or D.  **Question:** The CAPEC-55 attack pattern primarily threatens which aspect of a system's security?**Options:** A) Integrity and Non-Repudiation. B) Availability and Redundancy. C) Confidentiality and Access Control. D) Physical Security and Compliance.  **Important:** The answer should contain only the single letter corresponding to the best option, with no additional text.  C A C


  2%|▏         | 2/100 [00:05<04:39,  2.85s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
  3%|▎         | 3/100 [00:08<04:26,  2.74s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


You are given a multiple-choice question (MCQ) from a Cyber Threat Intelligence (CTI) knowledge benchmark dataset. Your task is to choose the best option among the four provided. Return your answer as a single uppercase letter: A, B, C, or D.  **Question:** What is a common consequence of exploiting weakness CWE-1265?**Options:** A) Protected data access B) Unexpected state C) Software performance improvement D) Enhanced user interface  **Important:** The answer should contain only the single letter corresponding to the best option, with no additional text.  B A B


  4%|▍         | 4/100 [00:11<04:27,  2.78s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


You are given a multiple-choice question (MCQ) from a Cyber Threat Intelligence (CTI) knowledge benchmark dataset. Your task is to choose the best option among the four provided. Return your answer as a single uppercase letter: A, B, C, or D.  **Question:** What is the primary risk described in the MITRE ATT&CK technique T1548.005 for cloud environments?**Options:** A) Temporary loss of data access B) Unauthorized resource allocation C) Persistent escalation of privileges D) Temporary escalation of privileges  **Important:** The answer should contain only the single letter corresponding to the best option, with no additional text.  D C D


  5%|▌         | 5/100 [00:13<04:24,  2.78s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


You are given a multiple-choice question (MCQ) from a Cyber Threat Intelligence (CTI) knowledge benchmark dataset. Your task is to choose the best option among the four provided. Return your answer as a single uppercase letter: A, B, C, or D.  **Question:** Which group, as per the procedure examples, has breached managed service providers to deliver malware to their customers?**Options:** A) GOLD SOUTHFIELD (G0115) B) Sandworm Team (G0034) C) APT29 (G0016) D) menuPass (G0045)  **Important:** The answer should contain only the single letter corresponding to the best option, with no additional text.  B A A


  6%|▌         | 6/100 [00:17<04:45,  3.04s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
  7%|▋         | 7/100 [00:20<04:38,  2.99s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
  8%|▊         | 8/100 [00:23<04:26,  2.90s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
  9%|▉         | 9/100 [00:25<04:20,  2.86s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 10%|█         | 10/100 [00:28<04:13,  2.82s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


You are given a multiple-choice question (MCQ) from a Cyber Threat Intelligence (CTI) knowledge benchmark dataset. Your task is to choose the best option among the four provided. Return your answer as a single uppercase letter: A, B, C, or D.  **Question:** Which mitigation strategy is NOT suggested for preventing forced integer overflow according to CAPEC-92?**Options:** A) Using a language or compiler with automatic bounds checking B) Abstracting away risky APIs C) Always encrypting integer values before use D) Manual or automated code review  **Important:** The answer should contain only the single letter corresponding to the best option, with no additional text.  A C C


 11%|█         | 11/100 [00:31<04:08,  2.80s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 12%|█▏        | 12/100 [00:34<04:07,  2.81s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


You are given a multiple-choice question (MCQ) from a Cyber Threat Intelligence (CTI) knowledge benchmark dataset. Your task is to choose the best option among the four provided. Return your answer as a single uppercase letter: A, B, C, or D.  **Question:** The weakness CWE-1235 is related to which of the following impacts?**Options:** A) SQL Injection B) Weak cryptographic algorithms C) Denial of Service (DoS) D) Privilege Escalation  **Important:** The answer should contain only the single letter corresponding to the best option, with no additional text.  B A C


 13%|█▎        | 13/100 [00:36<03:53,  2.68s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 14%|█▍        | 14/100 [00:39<03:50,  2.69s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 15%|█▌        | 15/100 [00:41<03:49,  2.69s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


You are given a multiple-choice question (MCQ) from a Cyber Threat Intelligence (CTI) knowledge benchmark dataset. Your task is to choose the best option among the four provided. Return your answer as a single uppercase letter: A, B, C, or D.  **Question:** In CWE-424, what technical impact might result from the product not protecting all possible paths to access restricted functionality?**Options:** A) Denial of Service (DoS) B) Breach of Information Confidentiality C) Bypass Protection Mechanism D) Propagation of Malware  **Important:** The answer should contain only the single letter corresponding to the best option, with no additional text.  B C C


 16%|█▌        | 16/100 [00:44<03:46,  2.70s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


You are given a multiple-choice question (MCQ) from a Cyber Threat Intelligence (CTI) knowledge benchmark dataset. Your task is to choose the best option among the four provided. Return your answer as a single uppercase letter: A, B, C, or D.  **Question:** Which strategy is recommended at the implementation phase to mitigate CWE-37?**Options:** A) Encrypting data at rest B) Deploying access control lists (ACL) C) Running services with least privilege D) Input validation  **Important:** The answer should contain only the single letter corresponding to the best option, with no additional text.  D C D


 17%|█▋        | 17/100 [00:47<03:47,  2.75s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 18%|█▊        | 18/100 [00:49<03:35,  2.63s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


You are given a multiple-choice question (MCQ) from a Cyber Threat Intelligence (CTI) knowledge benchmark dataset. Your task is to choose the best option among the four provided. Return your answer as a single uppercase letter: A, B, C, or D.  **Question:** In the context of CWE-150, what is the primary security impact mentioned?**Options:** A) Confidentiality B) Availability C) Integrity D) Authenticity  **Important:** The answer should contain only the single letter corresponding to the best option, with no additional text.  B A C


 19%|█▉        | 19/100 [00:52<03:35,  2.66s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 20%|██        | 20/100 [00:55<03:34,  2.68s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 21%|██        | 21/100 [00:58<03:32,  2.69s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


You are given a multiple-choice question (MCQ) from a Cyber Threat Intelligence (CTI) knowledge benchmark dataset. Your task is to choose the best option among the four provided. Return your answer as a single uppercase letter: A, B, C, or D.  **Question:** According to the Procedure Examples for MITRE ATT&CK technique T1584.003, which threat group has been reported to use compromised VPS infrastructure from Iranian threat actors?**Options:** A) Turla B) APT29 C) Lazarus Group D) Carbanak  **Important:** The answer should contain only the single letter corresponding to the best option, with no additional text.  B A A


 22%|██▏       | 22/100 [01:00<03:33,  2.74s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 23%|██▎       | 23/100 [01:03<03:29,  2.72s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 24%|██▍       | 24/100 [01:06<03:25,  2.70s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 25%|██▌       | 25/100 [01:08<03:21,  2.69s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 26%|██▌       | 26/100 [01:11<03:19,  2.69s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 27%|██▋       | 27/100 [01:14<03:19,  2.73s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 28%|██▊       | 28/100 [01:17<03:16,  2.72s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 29%|██▉       | 29/100 [01:19<03:12,  2.71s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 30%|███       | 30/100 [01:22<0

You are given a multiple-choice question (MCQ) from a Cyber Threat Intelligence (CTI) knowledge benchmark dataset. Your task is to choose the best option among the four provided. Return your answer as a single uppercase letter: A, B, C, or D.  **Question:** Which mitigation (ID M0953) is suggested to manage the risk of data compromise and enable quick recovery?**Options:** A) Limit file extensions B) Implement network segmentation C) Store data backups separately D) Implement two-factor authentication  **Important:** The answer should contain only the single letter corresponding to the best option, with no additional text.  B C C


 31%|███       | 31/100 [01:25<03:05,  2.69s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 32%|███▏      | 32/100 [01:28<03:24,  3.00s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 33%|███▎      | 33/100 [01:31<03:14,  2.91s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 34%|███▍      | 34/100 [01:34<03:06,  2.83s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 35%|███▌      | 35/100 [01:36<02:54,  2.69s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


You are given a multiple-choice question (MCQ) from a Cyber Threat Intelligence (CTI) knowledge benchmark dataset. Your task is to choose the best option among the four provided. Return your answer as a single uppercase letter: A, B, C, or D.  **Question:** Which category of cyber-attack consequences includes the impact of 'Execute Unauthorized Commands'?**Options:** A) Availability B) Confidentiality C) Confidentiality Integrity Availability D) Integrity  **Important:** The answer should contain only the single letter corresponding to the best option, with no additional text.  C D C


 36%|███▌      | 36/100 [01:39<02:54,  2.72s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


You are given a multiple-choice question (MCQ) from a Cyber Threat Intelligence (CTI) knowledge benchmark dataset. Your task is to choose the best option among the four provided. Return your answer as a single uppercase letter: A, B, C, or D.  **Question:** Which mitigation strategy best helps prevent the attack pattern described in CAPEC-549: Local Execution of Code?**Options:** A) Implementing a multi-factor authentication protocol B) Employing robust cybersecurity training for all employees C) Using intrusion detection systems D) Regularly changing all the passwords to the system  **Important:** The answer should contain only the single letter corresponding to the best option, with no additional text.  C B B


 37%|███▋      | 37/100 [01:42<02:53,  2.75s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 38%|███▊      | 38/100 [01:44<02:48,  2.72s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 39%|███▉      | 39/100 [01:47<02:45,  2.72s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 40%|████      | 40/100 [01:50<02:42,  2.71s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 41%|████      | 41/100 [01:53<02:41,  2.74s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 42%|████▏     | 42/100 [01:55<02:32,  2.64s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


You are given a multiple-choice question (MCQ) from a Cyber Threat Intelligence (CTI) knowledge benchmark dataset. Your task is to choose the best option among the four provided. Return your answer as a single uppercase letter: A, B, C, or D.  **Question:** Which CWE is NOT related to CAPEC-21 exploitation techniques?**Options:** A) CWE-290 B) CWE-523 C) CWE-346 D) CWE-384  **Important:** The answer should contain only the single letter corresponding to the best option, with no additional text.  B A B


 43%|████▎     | 43/100 [01:58<02:30,  2.64s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 44%|████▍     | 44/100 [02:00<02:29,  2.67s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 45%|████▌     | 45/100 [02:03<02:26,  2.67s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


You are given a multiple-choice question (MCQ) from a Cyber Threat Intelligence (CTI) knowledge benchmark dataset. Your task is to choose the best option among the four provided. Return your answer as a single uppercase letter: A, B, C, or D.  **Question:** The product behavior described in CWE-203 can lead to a compromise of which scope primarily?**Options:** A) Availability B) Integrity C) Confidentiality D) Non-repudiation  **Important:** The answer should contain only the single letter corresponding to the best option, with no additional text.  B A C


 46%|████▌     | 46/100 [02:06<02:26,  2.71s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 47%|████▋     | 47/100 [02:09<02:23,  2.71s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 48%|████▊     | 48/100 [02:11<02:20,  2.70s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 49%|████▉     | 49/100 [02:15<02:30,  2.96s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 50%|█████     | 50/100 [02:18<02:25,  2.91s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 51%|█████     | 51/100 [02:20<02:21,  2.89s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 52%|█████▏    | 52/100 [02:23<02:15,  2.83s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


You are given a multiple-choice question (MCQ) from a Cyber Threat Intelligence (CTI) knowledge benchmark dataset. Your task is to choose the best option among the four provided. Return your answer as a single uppercase letter: A, B, C, or D.  **Question:** How does the Dok adversary use AppleScript according to the provided document? (MITRE ATT&CK: T1059.002, Platform: None)**Options:** A) To send keystrokes to the Finder application B) To interact with SSH connections C) To create a login item for persistence D) To execute a reverse shell via Python  **Important:** The answer should contain only the single letter corresponding to the best option, with no additional text.  C A C


 53%|█████▎    | 53/100 [02:26<02:11,  2.79s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 54%|█████▍    | 54/100 [02:29<02:07,  2.77s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 55%|█████▌    | 55/100 [02:31<02:04,  2.76s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 56%|█████▌    | 56/100 [02:34<02:02,  2.77s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 57%|█████▋    | 57/100 [02:37<01:58,  2.75s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


You are given a multiple-choice question (MCQ) from a Cyber Threat Intelligence (CTI) knowledge benchmark dataset. Your task is to choose the best option among the four provided. Return your answer as a single uppercase letter: A, B, C, or D.  **Question:** Which CWE ID is associated with CAPEC-439?**Options:** A) CWE-89: SQL Injection B) CWE-1269: Product Released in Non-Release Configuration C) CWE-79: Cross-Site Scripting (XSS) D) CWE-22: Path Traversal  **Important:** The answer should contain only the single letter corresponding to the best option, with no additional text.  C A B


 58%|█████▊    | 58/100 [02:39<01:55,  2.74s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 59%|█████▉    | 59/100 [02:43<02:02,  2.98s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 60%|██████    | 60/100 [02:46<01:57,  2.95s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 61%|██████    | 61/100 [02:49<01:52,  2.88s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


You are given a multiple-choice question (MCQ) from a Cyber Threat Intelligence (CTI) knowledge benchmark dataset. Your task is to choose the best option among the four provided. Return your answer as a single uppercase letter: A, B, C, or D.  **Question:** In the context of MITRE ATT&CK (Mobile), which mitigation strategy can prevent the installation of specific remote access applications on managed devices?**Options:** A) M1011 - User Guidance B) M1012 - Enterprise Policy C) DS0042 - User Interface D) M1010 - Software Configuration Settings  **Important:** The answer should contain only the single letter corresponding to the best option, with no additional text.  B D B


 62%|██████▏   | 62/100 [02:51<01:47,  2.83s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 63%|██████▎   | 63/100 [02:58<02:23,  3.87s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 64%|██████▍   | 64/100 [03:01<02:09,  3.59s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 65%|██████▌   | 65/100 [03:04<02:04,  3.56s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 66%|██████▌   | 66/100 [03:06<01:49,  3.21s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


You are given a multiple-choice question (MCQ) from a Cyber Threat Intelligence (CTI) knowledge benchmark dataset. Your task is to choose the best option among the four provided. Return your answer as a single uppercase letter: A, B, C, or D.  **Question:** Which phase can introduce CWE-669 due to improper implementation of an architectural security tactic?**Options:** A) Architecture and Design B) Implementation C) Operation D) Testing  **Important:** The answer should contain only the single letter corresponding to the best option, with no additional text.  B A B


 67%|██████▋   | 67/100 [03:09<01:40,  3.06s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 68%|██████▊   | 68/100 [03:12<01:34,  2.96s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 69%|██████▉   | 69/100 [03:15<01:30,  2.92s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 70%|███████   | 70/100 [03:17<01:25,  2.86s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 71%|███████   | 71/100 [03:20<01:21,  2.81s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 72%|███████▏  | 72/100 [03:23<01:17,  2.78s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


You are given a multiple-choice question (MCQ) from a Cyber Threat Intelligence (CTI) knowledge benchmark dataset. Your task is to choose the best option among the four provided. Return your answer as a single uppercase letter: A, B, C, or D.  **Question:** Which procedure uses msiexec.exe to disable security tools on the system? (MITRE ATT&CK: System Binary Proxy Execution: Msiexec - T1218.007)**Options:** A) AppleJeus B) Chaes C) Clop D) DEADEYE  **Important:** The answer should contain only the single letter corresponding to the best option, with no additional text.  C B C


 73%|███████▎  | 73/100 [03:26<01:14,  2.77s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 74%|███████▍  | 74/100 [03:28<01:12,  2.78s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 75%|███████▌  | 75/100 [03:31<01:06,  2.65s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 76%|███████▌  | 76/100 [03:33<01:03,  2.66s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


You are given a multiple-choice question (MCQ) from a Cyber Threat Intelligence (CTI) knowledge benchmark dataset. Your task is to choose the best option among the four provided. Return your answer as a single uppercase letter: A, B, C, or D.  **Question:** Which adversary group used built-in net commands to enumerate domain administrator users as per the examples provided?**Options:** A) BRONZE BUTLER B) APT41 C) menuPass D) Dragonfly  **Important:** The answer should contain only the single letter corresponding to the best option, with no additional text.  B C B


 77%|███████▋  | 77/100 [03:37<01:04,  2.82s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 78%|███████▊  | 78/100 [03:39<01:01,  2.81s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


You are given a multiple-choice question (MCQ) from a Cyber Threat Intelligence (CTI) knowledge benchmark dataset. Your task is to choose the best option among the four provided. Return your answer as a single uppercase letter: A, B, C, or D.  **Question:** In the context of MITRE ATT&CK for ICS, which platform and tactic is associated with T0873, Project File Infection?**Options:** A) ICS, Execution B) ICS, Persistence C) Enterprise, Persistence D) Mobile, Execution  **Important:** The answer should contain only the single letter corresponding to the best option, with no additional text.  B A B


 79%|███████▉  | 79/100 [03:42<00:58,  2.80s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


You are given a multiple-choice question (MCQ) from a Cyber Threat Intelligence (CTI) knowledge benchmark dataset. Your task is to choose the best option among the four provided. Return your answer as a single uppercase letter: A, B, C, or D.  **Question:** In the LATACH G framework, which group has been attributed to the use of ransomware from a batch file in a network share?**Options:** A) BRONZE BUTLER B) Cinnamon Tempest C) Ursnif D) Ramsay  **Important:** The answer should contain only the single letter corresponding to the best option, with no additional text.  C B B


 80%|████████  | 80/100 [03:45<00:55,  2.76s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


You are given a multiple-choice question (MCQ) from a Cyber Threat Intelligence (CTI) knowledge benchmark dataset. Your task is to choose the best option among the four provided. Return your answer as a single uppercase letter: A, B, C, or D.  **Question:** Which related attack pattern involves forcing the use of corrupted files?**Options:** A) CAPEC-552 B) CAPEC-263 C) CAPEC-175 D) CAPEC-640  **Important:** The answer should contain only the single letter corresponding to the best option, with no additional text.  B C B


 81%|████████  | 81/100 [03:48<00:52,  2.74s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 82%|████████▏ | 82/100 [03:50<00:49,  2.72s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 83%|████████▎ | 83/100 [03:53<00:47,  2.78s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 84%|████████▍ | 84/100 [03:56<00:43,  2.75s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 85%|████████▌ | 85/100 [03:58<00:41,  2.73s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


You are given a multiple-choice question (MCQ) from a Cyber Threat Intelligence (CTI) knowledge benchmark dataset. Your task is to choose the best option among the four provided. Return your answer as a single uppercase letter: A, B, C, or D.  **Question:** Which process creation is an indicator of potential SYSTEM privilege escalation according to the detection section?**Options:** A) C:\Windows\System32\services.exe B) C:\Windows\System32\cmd.exe C) C:\Windows\System32\rundll32.exe D) C:\Windows\System32\notepad.exe  **Important:** The answer should contain only the single letter corresponding to the best option, with no additional text.  B C B


 86%|████████▌ | 86/100 [04:01<00:38,  2.72s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 87%|████████▋ | 87/100 [04:04<00:35,  2.70s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 88%|████████▊ | 88/100 [04:07<00:32,  2.73s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 89%|████████▉ | 89/100 [04:09<00:29,  2.71s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


You are given a multiple-choice question (MCQ) from a Cyber Threat Intelligence (CTI) knowledge benchmark dataset. Your task is to choose the best option among the four provided. Return your answer as a single uppercase letter: A, B, C, or D.  **Question:** What is CWE-1233 primarily associated with in terms of impact?**Options:** A) Technical Impact: Data Exposure B) Technical Impact: Information Disclosure C) Technical Impact: Modify Memory D) Technical Impact: Execution Flow Attacks  **Important:** The answer should contain only the single letter corresponding to the best option, with no additional text.  B D C


 90%|█████████ | 90/100 [04:12<00:26,  2.70s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


You are given a multiple-choice question (MCQ) from a Cyber Threat Intelligence (CTI) knowledge benchmark dataset. Your task is to choose the best option among the four provided. Return your answer as a single uppercase letter: A, B, C, or D.  **Question:** What is the primary concern of CWE-215?**Options:** A) Violation of integrity B) Data exfiltration C) Exposure of sensitive information D) Denial of service  **Important:** The answer should contain only the single letter corresponding to the best option, with no additional text.  A C C


 91%|█████████ | 91/100 [04:15<00:24,  2.70s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 92%|█████████▏| 92/100 [04:17<00:21,  2.70s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


You are given a multiple-choice question (MCQ) from a Cyber Threat Intelligence (CTI) knowledge benchmark dataset. Your task is to choose the best option among the four provided. Return your answer as a single uppercase letter: A, B, C, or D.  **Question:** In terms of incident analysis, which method can help in understanding the normal behavior of networks, systems, and applications?**Options:** A) Running antivirus software regularly B) Creating detailed user activity reports C) Performing regular backups D) Reviewing log entries and security alerts  **Important:** The answer should contain only the single letter corresponding to the best option, with no additional text.  B D D


 93%|█████████▎| 93/100 [04:20<00:19,  2.75s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


You are given a multiple-choice question (MCQ) from a Cyber Threat Intelligence (CTI) knowledge benchmark dataset. Your task is to choose the best option among the four provided. Return your answer as a single uppercase letter: A, B, C, or D.  **Question:** Which of the following procedures involved the execution of a Portable Executable (PE) using cmd.exe as seen in MITRE ATT&CK technique T1059.003?**Options:** A) 4H RAT B) ABK C) AUDITCRED D) COBALT STRIKE  **Important:** The answer should contain only the single letter corresponding to the best option, with no additional text.  D C B


 94%|█████████▍| 94/100 [04:23<00:16,  2.72s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 95%|█████████▌| 95/100 [04:26<00:13,  2.72s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


You are given a multiple-choice question (MCQ) from a Cyber Threat Intelligence (CTI) knowledge benchmark dataset. Your task is to choose the best option among the four provided. Return your answer as a single uppercase letter: A, B, C, or D.  **Question:** Which data source and data component combination is recommended for monitoring AppleScript execution through osascript? (MITRE ATT&CK: T1059.002, Platform: None)**Options:** A) Network Traffic; Network Connection Creation B) Command; Command Execution C) Process; OS API Execution D) File; File Modification  **Important:** The answer should contain only the single letter corresponding to the best option, with no additional text.  B C B


 96%|█████████▌| 96/100 [04:28<00:10,  2.71s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


You are given a multiple-choice question (MCQ) from a Cyber Threat Intelligence (CTI) knowledge benchmark dataset. Your task is to choose the best option among the four provided. Return your answer as a single uppercase letter: A, B, C, or D.  **Question:** What is the primary security risk associated with CWE-1253?**Options:** A) Privilege escalation due to an unblown fuse B) Denial of service due to memory read vulnerability C) Exploitable insecure state due to a blown fuse D) Inability to perform remote code execution  **Important:** The answer should contain only the single letter corresponding to the best option, with no additional text.  B C D


 97%|█████████▋| 97/100 [04:31<00:08,  2.72s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 98%|█████████▊| 98/100 [04:34<00:05,  2.76s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 99%|█████████▉| 99/100 [04:37<00:02,  2.74s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
100%|██████████| 100/100 [04:39<00:00,  2.80s/it]

69 % of correct answers for Llama 3 8B





# cti-rcm

In [None]:
cti_rcm = pd.read_csv('cti-rcm.tsv', sep='\t')
cti_rcm_responses = pd.read_csv('cti-rcm-responses.tsv', sep='\t')

## Gemini-1.5

In [None]:
gemini_key = userdata.get('gemini')

genai.configure(api_key=gemini_key)

model = genai.GenerativeModel('gemini-1.5-pro')

# Check gemini responses
gemini_responses = cti_rcm_responses["Gemini-1.5"]


nb = 0
correct = 0
for i in tqdm(range(40)):
    row = random.randint(0, 1000)

    prompt = "Analyze the following CVE description and map it to the appropriate CWE. Ensure that your response contains only the CWE ID with no additional text. CVE Description: "+cti_rcm["Prompt"][row]
    response = model.generate_content(prompt, safety_settings="BLOCK_NONE")

    try:
      if response.text.strip() == gemini_responses[row]:
          correct += 1
      else:
          print(cti_rcm["Prompt"][row], response.text.strip(), gemini_responses[row], cti_rcm["GT"][row], "bad")
      nb += 1
    except Exception as e:
      print(e)

    time.sleep(60)

print(correct/nb * 100, "% correct answers for gemini")

 15%|█▌        | 6/40 [06:30<36:44, 64.82s/it]

Analyze the following CVE description and map it to the appropriate CWE. Provide a brief justification for your choice. Ensure the last line of your response contains only the CWE ID.  CVE Description: A vulnerability was found in Tenda W9 1.0.0.7(4456). It has been declared as critical. Affected by this vulnerability is the function formAddSysLogRule of the component httpd. The manipulation of the argument sysRulenEn leads to stack-based buffer overflow. The attack can be launched remotely. The exploit has been disclosed to the public and may be used. The associated identifier of this vulnerability is VDB-250711. NOTE: The vendor was contacted early about this disclosure but did not respond in any way.  CWE-787 CWE-121 CWE-787 bad


 20%|██        | 8/40 [08:36<34:10, 64.09s/it]

Analyze the following CVE description and map it to the appropriate CWE. Provide a brief justification for your choice. Ensure the last line of your response contains only the CWE ID.  CVE Description: IBM Security Access Manager Container (IBM Security Verify Access Appliance 10.0.0.0 through 10.0.6.1 and IBM Security Verify Access Docker 10.0.0.0 through 10.0.6.1) could allow a remote attacker to gain access to the underlying system using man in the middle techniques.  IBM X-Force ID:  254765.  CWE-330 CWE-346 CWE-300 bad


 22%|██▎       | 9/40 [09:39<32:57, 63.78s/it]

Analyze the following CVE description and map it to the appropriate CWE. Provide a brief justification for your choice. Ensure the last line of your response contains only the CWE ID.  CVE Description: Sysmac Studio installs executables in a directory with poor permissions. This can allow a locally-authenticated attacker to overwrite files which will result in code execution with privileges of a different user.  CWE-732 CWE-269 CWE-276 bad


 28%|██▊       | 11/40 [11:46<30:38, 63.39s/it]

Analyze the following CVE description and map it to the appropriate CWE. Provide a brief justification for your choice. Ensure the last line of your response contains only the CWE ID.  CVE Description: The vulnerability allows an unauthenticated remote attacker to perform a Denial-of-Service (DoS) attack or, possibly, obtain Remote Code Execution (RCE) via a crafted network request.  CWE-400 CWE-20 CWE-787 bad


 38%|███▊      | 15/40 [15:56<26:11, 62.85s/it]

Analyze the following CVE description and map it to the appropriate CWE. Provide a brief justification for your choice. Ensure the last line of your response contains only the CWE ID.  CVE Description: Multiple memory corruption issues were addressed with improved memory handling. This issue is fixed in macOS Sonoma 14.3, iOS 16.7.5 and iPadOS 16.7.5, iOS 17.3 and iPadOS 17.3. Processing maliciously crafted web content may lead to arbitrary code execution.  CWE-787 CWE-120 CWE-787 bad


 40%|████      | 16/40 [16:59<25:09, 62.90s/it]

Analyze the following CVE description and map it to the appropriate CWE. Provide a brief justification for your choice. Ensure the last line of your response contains only the CWE ID.  CVE Description: NULL Pointer Dereference vulnerability in Linux Linux kernel kernel on Linux, x86, ARM (net, bluetooth modules) allows Overflow Buffers. This vulnerability is associated with program files /net/bluetooth/rfcomm/core.C.  This issue affects Linux kernel: v2.6.12-rc2.  CWE-476 Error CWE-476 bad


 52%|█████▎    | 21/40 [22:12<19:48, 62.55s/it]

Analyze the following CVE description and map it to the appropriate CWE. Provide a brief justification for your choice. Ensure the last line of your response contains only the CWE ID.  CVE Description: Statamic is a Laravel and Git powered CMS. HTML files crafted to look like jpg files are able to be uploaded, allowing for XSS. This affects the front-end forms with asset fields without any mime type validation, asset fields in the control panel, and asset browser in the control panel. Additionally, if the XSS is crafted in a specific way, the "copy password reset link" feature may be exploited to gain access to a user's password reset token and gain access to their account. The authorized user is required to execute the XSS in order for the vulnerability to occur. In versions 4.46.0 and 3.4.17, the XSS vulnerability has been patched, and the copy password reset link functionality has been disabled.  CWE-79 CWE-434 CWE-79 bad


 55%|█████▌    | 22/40 [23:15<18:49, 62.75s/it]

Analyze the following CVE description and map it to the appropriate CWE. Provide a brief justification for your choice. Ensure the last line of your response contains only the CWE ID.  CVE Description: A vulnerability, which was classified as critical, has been found in 个人开源 mldong 1.0. This issue affects the function ExpressionEngine of the file com/mldong/modules/wf/engine/model/DecisionModel.java. The manipulation leads to code injection. The attack may be initiated remotely. The exploit has been disclosed to the public and may be used. The identifier VDB-251561 was assigned to this vulnerability.  CWE-94 CWE-77 CWE-94 bad


 57%|█████▊    | 23/40 [24:18<17:49, 62.88s/it]

Analyze the following CVE description and map it to the appropriate CWE. Provide a brief justification for your choice. Ensure the last line of your response contains only the CWE ID.  CVE Description: In the Linux kernel through 6.7.1, there is a use-after-free in cec_queue_msg_fh, related to drivers/media/cec/core/cec-adap.c and drivers/media/cec/core/cec-api.c.  CWE416 CWE-416 CWE-416 bad


 72%|███████▎  | 29/40 [30:35<11:29, 62.69s/it]

Analyze the following CVE description and map it to the appropriate CWE. Provide a brief justification for your choice. Ensure the last line of your response contains only the CWE ID.  CVE Description: An Exposure of Resource to Wrong Sphere vulnerability in the Packet Forwarding Engine (PFE) of Juniper Networks Junos OS on MX Series allows an unauthenticated, network-based attacker to bypass the intended access restrictions.  In an Abstracted Fabric (AF) scenario if routing-instances (RI) are configured, specific valid traffic destined to the device can bypass the configured lo0 firewall filters as it's received in the wrong RI context.  This issue affects Juniper Networks Junos OS on MX Series:      *  All versions earlier than 20.4R3-S9;   *  21.2 versions earlier than 21.2R3-S3;   *  21.4 versions earlier than 21.4R3-S5;   *  22.1 versions earlier than 22.1R3;   *  22.2 versions earlier than 22.2R3;   *  22.3 versions earlier than 22.3R2.  CWE-862 CWE-200 CWE-668 bad


 85%|████████▌ | 34/40 [35:49<06:16, 62.70s/it]

Analyze the following CVE description and map it to the appropriate CWE. Provide a brief justification for your choice. Ensure the last line of your response contains only the CWE ID.  CVE Description: Lif Auth Server is a server for validating logins, managing information, and account recovery for Lif Accounts. The issue relates to the `get_pfp` and `get_banner` routes on Auth Server. The issue is that there is no check to ensure that the file that Auth Server is receiving through these URLs is correct. This could allow an attacker access to files they shouldn't have access to. This issue has been patched in version 1.4.0.  CWE-22 CWE-2 CWE-22 bad


 88%|████████▊ | 35/40 [36:51<05:12, 62.60s/it]

Analyze the following CVE description and map it to the appropriate CWE. Provide a brief justification for your choice. Ensure the last line of your response contains only the CWE ID.  CVE Description: A vulnerability was found in Totolink LR1200GB 9.1.0u.6619_B20230130. It has been classified as critical. This affects the function setTracerouteCfg of the file /cgi-bin/cstecgi.cgi. The manipulation of the argument command leads to stack-based buffer overflow. It is possible to initiate the attack remotely. The exploit has been disclosed to the public and may be used. The associated identifier of this vulnerability is VDB-250791. NOTE: The vendor was contacted early about this disclosure but did not respond in any way.  CWE-787 CWE-121 CWE-121 bad


100%|██████████| 40/40 [42:06<00:00, 63.17s/it]

70.0 % correct answers for gemini





## LLAMA3-8B

In [None]:
correct = 0

for i in tqdm(range(100)):
  row = random.randint(0,1000)

  system = "Analyze the following CVE description and map it to the appropriate CWE. **important** the response must contains only the CWE ID with no additional text. CVE Description: "+cti_rcm["Prompt"][row]+" you return only the CWE."

  messages = [
      {"role": "system", "content": system},
      #{"role": "user", "content": user},
  ]

  terminators = [
      pipeline.tokenizer.eos_token_id,
      pipeline.tokenizer.convert_tokens_to_ids("<|eot_id|>")
  ]

  outputs = pipeline(
      messages,
      max_new_tokens=10,
      eos_token_id=terminators,
      do_sample=True,
      temperature=0.1,
      top_p=0.9,
  )

  answer = outputs[0]["generated_text"][-1]["content"]
  if answer == cti_rcm_responses["LLAMA3-8B"][row]:
    correct += 1
  else:
    print(answer, cti_rcm_responses["LLAMA3-8B"][row], cti_rcm_responses["GT"][row])


print(correct, "% of correct answers for Llama 3 8B")


  0%|          | 0/100 [00:00<?, ?it/s]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
  1%|          | 1/100 [00:05<09:19,  5.65s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
  2%|▏         | 2/100 [00:10<08:53,  5.45s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


CWE-80 CWE-79 CWE-79


  3%|▎         | 3/100 [00:16<08:36,  5.33s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
  4%|▍         | 4/100 [00:21<08:38,  5.40s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
  5%|▌         | 5/100 [00:31<10:58,  6.93s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


CWE-122 CWE-119 CWE-787


  6%|▌         | 6/100 [00:36<09:43,  6.21s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
  7%|▋         | 7/100 [00:41<09:07,  5.89s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
  8%|▊         | 8/100 [00:46<08:38,  5.63s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
  9%|▉         | 9/100 [00:51<08:15,  5.44s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 10%|█         | 10/100 [00:56<08:10,  5.45s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 11%|█         | 11/100 [01:02<07:55,  5.34s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 12%|█▏        | 12/100 [01:07<07:56,  5.41s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 13%|█▎        | 13/100 [01:12<07:42,  5.32s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 14%|█▍        | 14/100 [01:18<07:37

CWE-121 CWE-119 CWE-787


 22%|██▏       | 22/100 [02:00<06:50,  5.26s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 23%|██▎       | 23/100 [02:05<06:41,  5.22s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 24%|██▍       | 24/100 [02:10<06:38,  5.24s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 25%|██▌       | 25/100 [02:19<08:00,  6.41s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


The vulnerability is a Cross-Site Request Forgery CWE-352 CWE-352


 26%|██▌       | 26/100 [02:25<07:31,  6.10s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 27%|██▋       | 27/100 [02:30<07:03,  5.79s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


CWE-787 CWE-120 CWE-787


 28%|██▊       | 28/100 [02:35<06:42,  5.59s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 29%|██▉       | 29/100 [02:40<06:28,  5.48s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 30%|███       | 30/100 [02:45<06:15,  5.36s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 31%|███       | 31/100 [02:51<06:18,  5.49s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 32%|███▏      | 32/100 [02:56<06:04,  5.37s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


CWE-120 CWE-190 CWE-190


 33%|███▎      | 33/100 [03:01<05:47,  5.18s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 34%|███▍      | 34/100 [03:06<05:44,  5.22s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


CWE-787 CWE-120 CWE-787


 35%|███▌      | 35/100 [03:11<05:30,  5.08s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 36%|███▌      | 36/100 [03:16<05:28,  5.13s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 37%|███▋      | 37/100 [03:21<05:17,  5.04s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 38%|███▊      | 38/100 [03:26<05:13,  5.06s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 39%|███▉      | 39/100 [03:31<05:08,  5.06s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


CWE-502 CWE-20 CWE-502


 40%|████      | 40/100 [03:36<05:04,  5.07s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 41%|████      | 41/100 [03:41<04:59,  5.08s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


CWE-284 CWE-310 CWE-319


 42%|████▏     | 42/100 [03:47<04:59,  5.17s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 43%|████▎     | 43/100 [03:51<04:47,  5.05s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


CWE-121 CWE-120 CWE-787


 44%|████▍     | 44/100 [03:56<04:40,  5.01s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 45%|████▌     | 45/100 [04:01<04:36,  5.04s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 46%|████▌     | 46/100 [04:07<04:38,  5.16s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 47%|████▋     | 47/100 [04:12<04:36,  5.22s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 48%|████▊     | 48/100 [04:17<04:29,  5.19s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 49%|████▉     | 49/100 [04:23<04:24,  5.18s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 50%|█████     | 50/100 [04:27<04:14,  5.09s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 51%|█████     | 51/100 [04:33<04:09,  5.08s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


CWE-20 CWE-352 CWE-862


 52%|█████▏    | 52/100 [04:38<04:02,  5.06s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 53%|█████▎    | 53/100 [04:43<03:58,  5.07s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 54%|█████▍    | 54/100 [04:48<03:53,  5.08s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 55%|█████▌    | 55/100 [04:53<03:47,  5.05s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 56%|█████▌    | 56/100 [04:58<03:42,  5.07s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 57%|█████▋    | 57/100 [05:03<03:35,  5.02s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


CWE-122 CWE-119 CWE-787


 58%|█████▊    | 58/100 [05:08<03:36,  5.15s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


CWE-120 CWE-119 CWE-787


 59%|█████▉    | 59/100 [05:13<03:30,  5.13s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 60%|██████    | 60/100 [05:19<03:28,  5.20s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 61%|██████    | 61/100 [05:24<03:21,  5.17s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 62%|██████▏   | 62/100 [05:29<03:21,  5.30s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 63%|██████▎   | 63/100 [05:34<03:14,  5.25s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 64%|██████▍   | 64/100 [05:40<03:07,  5.21s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


CWE-122 CWE-119 CWE-787


 65%|██████▌   | 65/100 [05:45<03:04,  5.27s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 66%|██████▌   | 66/100 [05:50<03:00,  5.31s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 67%|██████▋   | 67/100 [05:56<02:54,  5.28s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 68%|██████▊   | 68/100 [06:01<02:50,  5.34s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 69%|██████▉   | 69/100 [06:06<02:42,  5.25s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 70%|███████   | 70/100 [06:11<02:38,  5.28s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 71%|███████   | 71/100 [06:17<02:35,  5.35s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


CWE-79 CWE-80 CWE-79


 72%|███████▏  | 72/100 [06:22<02:26,  5.23s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 73%|███████▎  | 73/100 [06:27<02:20,  5.20s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 74%|███████▍  | 74/100 [06:32<02:11,  5.06s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 75%|███████▌  | 75/100 [06:37<02:08,  5.15s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 76%|███████▌  | 76/100 [06:42<02:03,  5.13s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 77%|███████▋  | 77/100 [06:48<02:00,  5.25s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


CWE-121 CWE-120 CWE-121


 78%|███████▊  | 78/100 [06:53<01:55,  5.23s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 79%|███████▉  | 79/100 [06:58<01:46,  5.08s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 80%|████████  | 80/100 [07:03<01:41,  5.06s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


CWE-122 CWE-119 CWE-190


 81%|████████  | 81/100 [07:07<01:34,  4.96s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 82%|████████▏ | 82/100 [07:13<01:30,  5.01s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 83%|████████▎ | 83/100 [07:18<01:27,  5.12s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 84%|████████▍ | 84/100 [07:23<01:21,  5.12s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 85%|████████▌ | 85/100 [07:29<01:19,  5.29s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 86%|████████▌ | 86/100 [07:38<01:29,  6.42s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


CWE-287

CWE-287 CWE-287 CWE-639


 87%|████████▋ | 87/100 [07:43<01:19,  6.12s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 88%|████████▊ | 88/100 [07:48<01:09,  5.80s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 89%|████████▉ | 89/100 [07:53<01:01,  5.61s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 90%|█████████ | 90/100 [07:59<00:54,  5.50s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 91%|█████████ | 91/100 [08:04<00:48,  5.37s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 92%|█████████▏| 92/100 [08:09<00:42,  5.34s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


CWE-822 CWE-416 CWE-416


 93%|█████████▎| 93/100 [08:14<00:36,  5.16s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 94%|█████████▍| 94/100 [08:18<00:30,  5.03s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


CWE-601 CWE-918 CWE-601


 95%|█████████▌| 95/100 [08:24<00:25,  5.14s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 96%|█████████▌| 96/100 [08:29<00:20,  5.11s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 97%|█████████▋| 97/100 [08:34<00:15,  5.12s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 98%|█████████▊| 98/100 [08:39<00:10,  5.04s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


CWE-312 CWE-319 CWE-319


 99%|█████████▉| 99/100 [08:44<00:04,  4.95s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
100%|██████████| 100/100 [08:49<00:00,  5.30s/it]

79 % of correct answers for Llama 3 8B





# cti-vsp

In [None]:
cti_vsp = pd.read_csv('cti-vsp.tsv', sep='\t')
cti_vsp_responses = pd.read_csv('cti-vsp-responses.tsv', sep='\t')

## Gemini-1.5

In [None]:
gemini_key = userdata.get('gemini')

genai.configure(api_key=gemini_key)

model = genai.GenerativeModel('gemini-1.5-pro')

correct = 0

for i in tqdm(range(40)):
  line = random.randint(0, 999)

  prompt = cti_vsp["Prompt"][line]

  response = model.generate_content(prompt, safety_settings="BLOCK_NONE")

  response = response.text.strip().split('\n')[-1].strip('*')

  if " " in response:
    response = response.split()[-1]

  if response == "CVSS:3.1/"+cti_vsp_responses["Gemini-1.5"][line]:
    correct += 1
  else:
    print(response, "CVSS:3.1/"+cti_vsp_responses["Gemini-1.5"][line])

  time.sleep(60)

print((correct/40) *100, "% correct answers for gemini")


  8%|▊         | 3/40 [03:28<42:53, 69.56s/it]

CVSS:3.1/AV:N/AC:L/PR:L/UI:R/S:U/C:H/I:N/A:N CVSS:3.1/AV:N/AC:L/PR:N/UI:R/S:U/C:H/I:N/A:N


 12%|█▎        | 5/40 [05:45<40:19, 69.12s/it]

CVSS:3.1/AV:N/AC:L/PR:L/UI:N/S:C/C:H/I:H/A:H CVSS:3.1/AV:N/AC:L/PR:L/UI:N/S:U/C:H/I:H/A:H


 15%|█▌        | 6/40 [06:56<39:22, 69.50s/it]

CVSS:3.1/AV:N/AC:L/PR:L/UI:R/S:U/C:H/I:N/A:N CVSS:3.1/AV:N/AC:L/PR:N/UI:R/S:U/C:H/I:N/A:N


 22%|██▎       | 9/40 [10:21<35:36, 68.92s/it]

CVSS:3.1/AV:L/AC:L/PR:L/UI:N/S:U/C:H/I:H/A:H CVSS:3.1/AV:L/AC:L/PR:H/UI:N/S:U/C:N/I:N/A:H


 30%|███       | 12/40 [13:53<32:35, 69.86s/it]

analyze! CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:H/A:H


 40%|████      | 16/40 [18:37<28:14, 70.62s/it]

CVSS:3.1/AV:L/AC:L/PR:N/UI:R/S:U/C:N/I:N/A:L CVSS:3.1/AV:L/AC:L/PR:N/UI:N/S:U/C:N/I:N/A:H


 42%|████▎     | 17/40 [19:47<26:55, 70.22s/it]

CVSS:3.1/AV:A/AC:L/PR:N/UI:N/S:U/C:H/I:H/A:H CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:H/A:H


 48%|████▊     | 19/40 [22:05<24:25, 69.76s/it]

CVSS:3.1/AV:N/AC:L/PR:L/UI:R/S:U/C:H/I:H/A:N CVSS:3.1/AV:N/AC:L/PR:N/UI:R/S:U/C:H/I:N/A:N


 50%|█████     | 20/40 [23:15<23:15, 69.76s/it]

CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:N/A:N CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:N/A:L


 55%|█████▌    | 22/40 [25:35<20:55, 69.77s/it]

CVSS:3.1/AV:N/AC:L/PR:L/UI:N/S:U/C:H/I:H/A:H CVSS:3.1/AV:N/AC:L/PR:H/UI:N/S:C/C:H/I:H/A:H


 68%|██████▊   | 27/40 [31:17<14:58, 69.08s/it]

CVSS:3.1/AV:N/AC:L/PR:N/UI:R/S:U/C:H/I:L/A:N CVSS:3.1/AV:N/AC:L/PR:N/UI:R/S:U/C:H/I:N/A:N


 70%|███████   | 28/40 [32:27<13:50, 69.19s/it]

CVSS:3.1/AV:N/AC:L/PR:N/UI:R/S:U/C:H/I:H/A:H CVSS:3.1/AV:N/AC:L/PR:N/UI:R/S:C/C:H/I:H/A:H


 72%|███████▎  | 29/40 [33:40<12:54, 70.37s/it]

CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:C/C:H/I:H/A:H CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:H/A:H


 78%|███████▊  | 31/40 [36:02<10:37, 70.89s/it]

CVSS:3.1/AV:N/AC:L/PR:L/UI:N/S:U/C:H/I:H/A:H CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:H/A:H


 82%|████████▎ | 33/40 [38:21<08:09, 69.97s/it]

CVSS:3.1/AV:N/AC:L/PR:N/UI:R/S:C/C:H/I:H/A:H CVSS:3.1/AV:N/AC:L/PR:N/UI:R/S:U/C:L/I:L/A:N


 92%|█████████▎| 37/40 [43:03<03:31, 70.50s/it]

CVSS:3.1/AV:N/AC:L/PR:H/UI:R/S:U/C:L/I:L/A:N CVSS:3.1/AV:N/AC:L/PR:H/UI:R/S:U/C:H/I:H/A:H


100%|██████████| 40/40 [46:33<00:00, 69.83s/it]

60.0 % correct answers for gemini





## LLAMA3-8B

In [None]:
counter = 0
correct = 0

for i in tqdm(range(100)):
  line = random.randint(0, 999)

  system = f"""- Analyze the following CVE description.
- Determine the values for each base metric: AV, AC, PR, UI, S, C, I, and A.
- Provide the final CVSS v3.1 vector string. Valid options for each metric are as follows: - **Attack Vector (AV)**: Network (N), Adjacent (A), Local (L), Physical (P) - **Attack Complexity (AC)**: Low (L), High (H) - **Privileges Required (PR)**: None (N), Low (L), High (H) - **User Interaction (UI)**: None (N), Required (R) - **Scope (S)**: Unchanged (U), Changed (C) - **Confidentiality (C)**: None (N), Low (L), High (H) - **Integrity (I)**: None (N), Low (L), High (H) - **Availability (A)**: None (N), Low (L), High (H).
- Provide the final CVSS v3.1 vector string.
- Your response must contain only the CVSS v3 Vector String in the following format:  Example format: CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:H/A:H
- CVE Description:"""+cti_vsp["Description"][line]+"""
return only the CVSS v3 Vector String.
the CVSS v3 Vector String:"""

  messages = [
      {"role": "system", "content": system},
      #{"role": "user", "content": user},
  ]

  terminators = [
      pipeline.tokenizer.eos_token_id,
      pipeline.tokenizer.convert_tokens_to_ids("<|eot_id|>")
  ]

  outputs = pipeline(
      messages,
      max_new_tokens=50, #44
      eos_token_id=terminators,
      do_sample=True,
      temperature=0.1,
      top_p=0.9,
  )

  answer = outputs[0]["generated_text"][-1]["content"]
  if "CVSS:3.1/" in answer:
    counter += 1
    if answer == "CVSS:3.1/"+cti_vsp_responses["LLAMA3-8B"][line]:
      correct += 1
    else:
      print(answer, "CVSS:3.1/"+cti_vsp_responses["LLAMA3-8B"][line])
  else:
    continue

print((correct/counter)*100, "% of correct answers for Llama 3 8B")

  0%|          | 0/100 [00:00<?, ?it/s]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
  1%|          | 1/100 [00:27<45:01, 27.29s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:H/A:H CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:N/A:N


  2%|▏         | 2/100 [00:52<42:06, 25.78s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


CVSS:3.1/AV:N/AC:L/PR:N/UI:R/S:U/C:H/I:N/A:N CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:N/A:N


  3%|▎         | 3/100 [01:19<42:45, 26.45s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


CVSS:3.1/AV:N/AC:L/PR:N/UI:R/S:U/C:H/I:N/A:N CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:L/I:L/A:N


  4%|▍         | 4/100 [01:43<41:10, 25.73s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
  5%|▌         | 5/100 [02:07<39:23, 24.88s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
  6%|▌         | 6/100 [02:31<38:32, 24.60s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
  7%|▋         | 7/100 [02:55<38:06, 24.59s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


CVSS:3.1/AV:N/AC:H/PR:N/UI:N/S:C/C:H/I:H/A:H CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:C/C:H/I:H/A:H


  8%|▊         | 8/100 [03:19<37:17, 24.32s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


CVSS:3.1/AV:A/AC:L/PR:H/UI:N/S:C/C:H/I:H/A:H CVSS:3.1/AV:N/AC:L/PR:H/UI:N/S:C/C:H/I:H/A:H


  9%|▉         | 9/100 [03:44<36:55, 24.35s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


CVSS:3.1/AV:N/AC:L/PR:H/UI:N/S:U/C:H/I:N/A:N CVSS:3.1/AV:N/AC:L/PR:L/UI:R/S:U/C:L/I:H/A:N


 10%|█         | 10/100 [04:07<36:16, 24.18s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


CVSS:3.1/AV:L/AC:L/PR:N/UI:N/S:U/C:N/I:N/A:H CVSS:3.1/AV:L/AC:L/PR:N/UI:N/S:U/C:N/I:L/A:H


 11%|█         | 11/100 [04:32<35:54, 24.21s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:H/A:H CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:C/C:H/I:H/A:H


 12%|█▏        | 12/100 [04:57<35:57, 24.52s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


CVSS:3.1/AV:N/AC:L/PR:N/UI:R/S:U/C:H/I:N/A:N CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:L/I:L/A:N


 13%|█▎        | 13/100 [05:21<35:12, 24.29s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 14%|█▍        | 14/100 [05:46<35:07, 24.51s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


CVSS:3.1/AV:N/AC:L/PR:N/UI:R/S:U/C:H/I:H/A:H CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:H/A:N


 15%|█▌        | 15/100 [06:10<34:43, 24.52s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:N/I:N/A:H CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:N/I:H/A:H


 16%|█▌        | 16/100 [06:34<33:59, 24.28s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:C/C:H/I:H/A:H CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:L/I:H/A:H


 17%|█▋        | 17/100 [06:58<33:26, 24.18s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:H/A:H CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:C/C:H/I:H/A:H


 18%|█▊        | 18/100 [07:22<33:02, 24.17s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


CVSS:3.1/AV:N/AC:L/PR:N/UI:R/S:C/C:H/I:H/A:H CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:C/C:H/I:H/A:H


 19%|█▉        | 19/100 [07:46<32:40, 24.20s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 20%|██        | 20/100 [08:11<32:35, 24.44s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


CVSS:3.1/AV:N/AC:L/PR:N/UI:R/S:U/C:H/I:H/A:H CVSS:3.1/AV:N/AC:L/PR:N/UI:R/S:C/C:H/I:H/A:N


 21%|██        | 21/100 [08:36<32:29, 24.68s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


CVSS:3.1/AV:N/AC:L/PR:N/UI:R/S:U/C:H/I:H/A:H CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:H/A:N


 22%|██▏       | 22/100 [09:01<31:54, 24.54s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:H/A:H CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:C/C:H/I:H/A:H


 23%|██▎       | 23/100 [09:44<38:49, 30.26s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 24%|██▍       | 24/100 [10:09<36:07, 28.51s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 25%|██▌       | 25/100 [10:52<41:04, 32.86s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 26%|██▌       | 26/100 [11:16<37:20, 30.28s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


CVSS:3.1/AV:N/AC:L/PR:H/UI:N/S:U/C:H/I:H/A:H CVSS:3.1/AV:N/AC:L/PR:H/UI:N/S:U/C:H/I:H/A:N


 27%|██▋       | 27/100 [11:41<35:01, 28.79s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


CVSS:3.1/AV:N/AC:L/PR:N/UI:R/S:U/C:H/I:H/A:H CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:C/C:H/I:H/A:H


 28%|██▊       | 28/100 [12:05<32:51, 27.38s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 29%|██▉       | 29/100 [12:49<37:59, 32.11s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 30%|███       | 30/100 [13:14<35:01, 30.02s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


CVSS:3.1/AV:N/AC:L/PR:N/UI:R/S:U/C:H/I:H/A:H CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:H/A:N


 31%|███       | 31/100 [13:38<32:34, 28.33s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 32%|███▏      | 32/100 [14:03<30:58, 27.32s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 33%|███▎      | 33/100 [14:26<29:12, 26.15s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:C/C:H/I:H/A:H CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:H/A:H


 34%|███▍      | 34/100 [15:09<34:17, 31.18s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 35%|███▌      | 35/100 [15:34<31:32, 29.12s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:H/A:H CVSS:3.1/AV:N/AC:L/PR:L/UI:N/S:U/C:H/I:H/A:H


 36%|███▌      | 36/100 [15:58<29:23, 27.55s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:L/I:N/A:N CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:L/I:L/A:N


 37%|███▋      | 37/100 [16:21<27:41, 26.37s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


CVSS:3.1/AV:N/AC:L/PR:H/UI:N/S:C/C:H/I:H/A:H CVSS:3.1/AV:N/AC:L/PR:H/UI:N/S:C/C:H/I:H/A:N


 38%|███▊      | 38/100 [16:45<26:20, 25.49s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 39%|███▉      | 39/100 [17:09<25:37, 25.21s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


CVSS:3.1/AV:N/AC:L/PR:H/UI:N/S:U/C:H/I:H/A:N CVSS:3.1/AV:N/AC:L/PR:H/UI:N/S:U/C:N/I:H/A:N


 40%|████      | 40/100 [17:33<24:53, 24.89s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:N/I:N/A:N CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:N/I:L/A:L


 41%|████      | 41/100 [17:58<24:16, 24.69s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:H/A:H CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:H/A:N


 42%|████▏     | 42/100 [18:22<23:44, 24.56s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:N/I:N/A:N CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:L/I:H/A:H


 43%|████▎     | 43/100 [18:45<23:03, 24.28s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:C/C:L/I:L/A:N CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:L/I:L/A:N


 44%|████▍     | 44/100 [19:09<22:30, 24.11s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 45%|████▌     | 45/100 [19:35<22:27, 24.49s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


CVSS:3.1/AV:N/AC:L/PR:N/UI:R/S:U/C:H/I:L/A:N CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:H/A:N


 46%|████▌     | 46/100 [20:00<22:10, 24.63s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


CVSS:3.1/AV:N/AC:L/PR:N/UI:R/S:U/C:H/I:N/A:N CVSS:3.1/AV:N/AC:L/PR:N/UI:R/S:C/C:H/I:H/A:N


 47%|████▋     | 47/100 [20:24<21:36, 24.46s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 48%|████▊     | 48/100 [20:48<21:03, 24.30s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 49%|████▉     | 49/100 [21:11<20:24, 24.01s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 50%|█████     | 50/100 [21:36<20:16, 24.33s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


CVSS:3.1/AV:N/AC:L/PR:N/UI:R/S:U/C:H/I:H/A:H CVSS:3.1/AV:N/AC:L/PR:N/UI:R/S:C/C:H/I:H/A:H


 51%|█████     | 51/100 [22:01<20:02, 24.55s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


CVSS:3.1/AV:N/AC:L/PR:N/UI:R/S:U/C:H/I:H/A:H CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:C/C:H/I:H/A:H


 52%|█████▏    | 52/100 [22:24<19:21, 24.21s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 53%|█████▎    | 53/100 [22:49<19:09, 24.45s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


CVSS:3.1/AV:N/AC:L/PR:N/UI:R/S:U/C:L/I:N/A:N CVSS:3.1/AV:N/AC:L/PR:N/UI:R/S:U/C:N/I:L/A:N


 54%|█████▍    | 54/100 [23:32<22:59, 29.99s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 55%|█████▌    | 55/100 [23:57<21:13, 28.29s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:H/A:H CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:C/C:H/I:H/A:H


 56%|█████▌    | 56/100 [24:22<20:03, 27.35s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


CVSS:3.1/AV:N/AC:L/PR:N/UI:R/S:U/C:H/I:H/A:H CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:C/C:H/I:H/A:H


 57%|█████▋    | 57/100 [24:47<19:08, 26.71s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


CVSS:3.1/AV:N/AC:L/PR:N/UI:R/S:U/C:H/I:H/A:H CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:L/I:L/A:N


 58%|█████▊    | 58/100 [25:12<18:22, 26.24s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


CVSS:3.1/AV:N/AC:L/PR:N/UI:R/S:U/C:H/I:N/A:N CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:L/I:L/A:N


 59%|█████▉    | 59/100 [25:36<17:27, 25.54s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:H/A:H CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:H/A:N


 60%|██████    | 60/100 [26:00<16:44, 25.11s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:N/I:N/A:N CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:N/I:L/A:L


 61%|██████    | 61/100 [26:24<16:09, 24.86s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 62%|██████▏   | 62/100 [26:49<15:38, 24.71s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


CVSS:3.1/AV:L/AC:L/PR:N/UI:N/S:U/C:H/I:H/A:H CVSS:3.1/AV:L/AC:L/PR:N/UI:N/S:U/C:H/I:H/A:N


 63%|██████▎   | 63/100 [27:14<15:18, 24.84s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


CVSS:3.1/AV:N/AC:L/PR:N/UI:R/S:U/C:H/I:H/A:H CVSS:3.1/AV:N/AC:L/PR:N/UI:R/S:U/C:H/I:H/A:N


 64%|██████▍   | 64/100 [27:39<14:52, 24.80s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


CVSS:3.1/AV:N/AC:L/PR:N/UI:R/S:C/C:H/I:H/A:H CVSS:3.1/AV:N/AC:L/PR:N/UI:R/S:C/C:L/I:L/A:L


 65%|██████▌   | 65/100 [28:03<14:21, 24.60s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


CVSS:3.1/AV:L/AC:L/PR:N/UI:N/S:U/C:H/I:H/A:N CVSS:3.1/AV:L/AC:L/PR:N/UI:N/S:U/C:L/I:H/A:N


 66%|██████▌   | 66/100 [28:27<13:50, 24.43s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:H/A:H CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:H/A:N


 67%|██████▋   | 67/100 [28:52<13:32, 24.62s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


CVSS:3.1/AV:N/AC:L/PR:N/UI:R/S:U/C:H/I:H/A:H CVSS:3.1/AV:N/AC:L/PR:L/UI:R/S:C/C:H/I:H/A:N


 68%|██████▊   | 68/100 [29:16<13:04, 24.50s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 69%|██████▉   | 69/100 [29:40<12:37, 24.44s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


CVSS:3.1/AV:L/AC:L/PR:N/UI:N/S:U/C:N/I:N/A:H CVSS:3.1/AV:L/AC:L/PR:N/UI:N/S:U/C:N/I:L/A:H


 70%|███████   | 70/100 [30:04<12:05, 24.17s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 71%|███████   | 71/100 [30:29<11:46, 24.37s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:N/I:N/A:N CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:N/I:L/A:L


 72%|███████▏  | 72/100 [30:53<11:23, 24.40s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:H/A:H CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:L/I:H/A:N


 73%|███████▎  | 73/100 [31:36<13:29, 29.96s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 74%|███████▍  | 74/100 [32:01<12:21, 28.50s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


CVSS:3.1/AV:N/AC:L/PR:N/UI:R/S:U/C:H/I:H/A:H CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:H/A:N


 75%|███████▌  | 75/100 [32:25<11:19, 27.17s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:H/A:H CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:C/C:H/I:H/A:H


 76%|███████▌  | 76/100 [32:50<10:31, 26.33s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


CVSS:3.1/AV:N/AC:L/PR:H/UI:N/S:U/C:H/I:H/A:H CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:H/A:H


 77%|███████▋  | 77/100 [33:13<09:46, 25.48s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


CVSS:3.1/AV:N/AC:H/PR:N/UI:N/S:C/C:H/I:H/A:H CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:C/C:H/I:H/A:N


 78%|███████▊  | 78/100 [33:38<09:17, 25.33s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


CVSS:3.1/AV:N/AC:L/PR:N/UI:R/S:U/C:H/I:N/A:N CVSS:3.1/AV:N/AC:L/PR:N/UI:R/S:U/C:H/I:H/A:N


 79%|███████▉  | 79/100 [34:02<08:42, 24.86s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:C/C:H/I:H/A:H CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:H/A:H


 80%|████████  | 80/100 [34:26<08:13, 24.69s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


CVSS:3.1/AV:N/AC:L/PR:N/UI:R/S:C/C:H/I:H/A:H CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:C/C:H/I:H/A:H


 81%|████████  | 81/100 [34:51<07:47, 24.59s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:H/A:H CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:C/C:H/I:H/A:N


 82%|████████▏ | 82/100 [35:15<07:18, 24.39s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:H/A:H CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:H/A:N


 83%|████████▎ | 83/100 [35:39<06:57, 24.55s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


CVSS:3.1/AV:N/AC:L/PR:N/UI:R/S:U/C:H/I:N/A:N CVSS:3.1/AV:N/AC:L/PR:N/UI:R/S:U/C:L/I:L/A:N


 84%|████████▍ | 84/100 [36:04<06:30, 24.41s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:H/A:H CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:C/C:H/I:H/A:H


 85%|████████▌ | 85/100 [36:27<06:02, 24.18s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 86%|████████▌ | 86/100 [36:51<05:38, 24.17s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 87%|████████▋ | 87/100 [37:15<05:12, 24.08s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


CVSS:3.1/AV:L/AC:L/PR:H/UI:N/S:U/C:N/I:N/A:H CVSS:3.1/AV:L/AC:L/PR:H/UI:N/S:U/C:N/I:H/A:H


 88%|████████▊ | 88/100 [37:39<04:48, 24.04s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:H/A:H CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:H/A:N


 89%|████████▉ | 89/100 [38:04<04:27, 24.30s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


CVSS:3.1/AV:N/AC:L/PR:N/UI:R/S:U/C:H/I:H/A:H CVSS:3.1/AV:N/AC:L/PR:N/UI:R/S:C/C:H/I:H/A:N


 90%|█████████ | 90/100 [38:28<04:02, 24.27s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:H/A:H CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:C/C:H/I:H/A:H


 91%|█████████ | 91/100 [38:53<03:40, 24.46s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


CVSS:3.1/AV:N/AC:L/PR:N/UI:R/S:U/C:H/I:H/A:N CVSS:3.1/AV:N/AC:L/PR:N/UI:R/S:C/C:H/I:H/A:H


 92%|█████████▏| 92/100 [39:17<03:14, 24.34s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:N/I:N/A:H CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:N/I:L/A:H


 93%|█████████▎| 93/100 [40:00<03:28, 29.81s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 94%|█████████▍| 94/100 [40:42<03:21, 33.66s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 95%|█████████▌| 95/100 [41:08<02:35, 31.11s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


CVSS:3.1/AV:N/AC:L/PR:N/UI:R/S:U/C:H/I:N/A:N CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:L/I:L/A:N


 96%|█████████▌| 96/100 [41:32<01:56, 29.11s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
 97%|█████████▋| 97/100 [41:56<01:22, 27.55s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:H/A:H CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:C/C:H/I:H/A:H


 98%|█████████▊| 98/100 [42:20<00:52, 26.37s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:C/C:H/I:H/A:H CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:H/A:H


 99%|█████████▉| 99/100 [42:43<00:25, 25.63s/it]Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


CVSS:3.1/AV:N/AC:L/PR:H/UI:N/S:U/C:H/I:H/A:H CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:C/C:H/I:H/A:H


100%|██████████| 100/100 [43:07<00:00, 25.88s/it]

CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:H/A:H CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:C/C:H/I:H/A:H
22.82608695652174 % of correct answers for Llama 3 8B



