In [None]:
import pandas as pd
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch

# Adjust the file path to the TSV file in your Google Drive
file_path = '/content/drive/My Drive/cti-mcq.tsv'

# Read the TSV file
data = pd.read_csv(file_path, sep='\t')

# Load the tokenizer and model from Hugging Face
model_name = "mistralai/Mistral-7B-Instruct-v0.3"  # Update this with the correct Mistral model name
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)

# Function to get the model response
def get_model_response(prompt, question, options):
    options_text = " ".join(options)
    input_text = f"{prompt}\n{question}\nOptions: {options_text}"
    inputs = tokenizer(input_text, return_tensors="pt").to('cuda')
    outputs = model.generate(**inputs, max_length=256)
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return response

# Replace NaN values in options with empty strings
data.fillna({'Option C': '', 'Option D': ''}, inplace=True)

# Create a new column for responses
data['Response'] = data.apply(lambda row: get_model_response(row['Prompt'], row['Question'], [row['Option A'], row['Option B'], row['Option C'], row['Option D']]), axis=1)

# Display the first few rows of the updated DataFrame
print(data.head())


tokenizer_config.json:   0%|          | 0.00/138k [00:00<?, ?B/s]

KeyboardInterrupt: 

In [None]:
import pandas as pd
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
import os
# Load model directly
from transformers import AutoTokenizer, AutoModelForCausalLM
api_key = os.getenv('HF_TOKEN')

# Load the tokenizer and model from Hugging Face
model_name = "mistralai/Mistral-7B-Instruct-v0.3"  # Update this with the correct Mistral model name
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)

tokenizer_config.json:   0%|          | 0.00/138k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/587k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.96M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/601 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/3 [00:00<?, ?it/s]

model-00001-of-00003.safetensors:   0%|          | 0.00/4.95G [00:00<?, ?B/s]

model-00002-of-00003.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00003-of-00003.safetensors:   0%|          | 0.00/4.55G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

In [None]:
df = pd.read_csv('/content/drive/MyDrive/cti-mcq.tsv', sep='\t')
df.head()

Unnamed: 0,URL,Question,Option A,Option B,Option C,Option D,Prompt,GT
0,https://attack.mitre.org/techniques/T1548/,Which of the following mitigations involves pr...,Audit,Execution Prevention,Operating System Configuration,User Account Control,You are given a multiple-choice question (MCQ)...,B
1,https://attack.mitre.org/techniques/T1548/,Which data source is recommended for monitorin...,Command,File,Process,User Account,You are given a multiple-choice question (MCQ)...,D
2,https://attack.mitre.org/techniques/T1548/,What does mitigation ID M1028 suggest to preve...,Limiting privileges of cloud accounts,Preventing unsigned applications from running,Minimizing applications with setuid or setgid ...,Enforcing the highest UAC level,You are given a multiple-choice question (MCQ)...,C
3,https://attack.mitre.org/techniques/T1548/,Which process creation is an indicator of pote...,C:\Windows\System32\services.exe,C:\Windows\System32\cmd.exe,C:\Windows\System32\rundll32.exe,C:\Windows\System32\notepad.exe,You are given a multiple-choice question (MCQ)...,B
4,https://attack.mitre.org/techniques/T1548/,"In a Linux environment, what is recommended to...",Monitor Windows Registry Key Modification,Monitor OS API Execution,Monitor file metadata for setuid or setgid bit...,Audit process metadata changes,You are given a multiple-choice question (MCQ)...,C


In [None]:
import os
import pandas as pd
import re

# Load the dataset from the TSV file
df = pd.read_csv('/content/drive/MyDrive/cti-mcq.tsv', delimiter='\t')
df = df.drop('GT', axis=1)

# Function to prepare the prompt for the model
def prepare_prompt(question, options, prompt):
    options_text = "\n".join([f"Option {chr(65+i)}: {opt}" for i, opt in enumerate(options)])
    formatted_prompt = f"{prompt}\nQuestion: {question}\n{options_text}\nAnswer:"
    print("Formatted Prompt:\n", formatted_prompt)  # Debug print to see the formatted prompt
    return formatted_prompt

# Function to tokenize input
def tokenize_input(formatted_prompt, tokenizer):
    return tokenizer(formatted_prompt, return_tensors='pt')['input_ids']

# Function to generate response from the model
def generate_response(input_ids, model, tokenizer, max_length=512):
    response = model.generate(input_ids, max_length=max_length, pad_token_id=tokenizer.eos_token_id)
    generated_text = tokenizer.decode(response[0], skip_special_tokens=True)
    return generated_text

# Function to extract the answer from generated text
def extract_answer(generated_text):
    print("Generated text:", generated_text)  # Debug print to see the actual generated text
    # Find the last line that contains only a single uppercase letter
    match = re.findall(r'\b[A-D]\b', generated_text)
    return match[-1] if match else "No valid answer found"

# Process the first 100 questions and generate responses
results = []
num_processed = 0
for idx, row in df.iterrows():
    if idx >= 100:
        break

    question = row['Question']
    options = [row['Option A'], row['Option B'], row['Option C'], row['Option D']]
    prompt = row['Prompt']

    # Prepare the prompt
    formatted_prompt = prepare_prompt(question, options, prompt)

    # Tokenize the input
    input_ids = tokenize_input(formatted_prompt, tokenizer)

    # Generate a response
    generated_text = generate_response(input_ids, model, tokenizer)

    # Extract the answer
    predicted_answer = extract_answer(generated_text)

    # Append results
    results.append({
        'Question': question,
        'Formatted Prompt': formatted_prompt,
        'Generated Text': generated_text,
        'Predicted Answer': predicted_answer,
    })

    num_processed += 1

# Create a DataFrame from the results
results_df = pd.DataFrame(results)

# Save the results to a CSV file
results_df.to_csv('predicted_answers_Mistral.csv', index=False)

print(f"Processed {num_processed} out of 100 questions")
print("Predicted answers saved to 'predicted_answers.csv'")


Formatted Prompt:
 You are given a multiple-choice question (MCQ) from a Cyber Threat Intelligence (CTI) knowledge benchmark dataset. Your task is to choose the best option among the four provided. Return your answer as a single uppercase letter: A, B, C, or D.  **Question:** Which of the following mitigations involves preventing applications from running that haven't been downloaded from legitimate repositories?  **Options:** A) Audit B) Execution Prevention C) Operating System Configuration D) User Account Control  **Important:** The last line of your answer should contain only the single letter corresponding to the best option, with no additional text. 
Question: Which of the following mitigations involves preventing applications from running that haven't been downloaded from legitimate repositories?
Option A: Audit
Option B: Execution Prevention
Option C: Operating System Configuration
Option D: User Account Control
Answer:
Generated text: You are given a multiple-choice question (

In [6]:

import pandas as pd

# Load the Ground Truth data (TSV file)
gt_df = pd.read_csv('C:/Users/ACER/Desktop/stage 2024/mcq/cti-mcq.tsv', sep='\t')

# Load the Predicted Answer data (CSV file)
pred_df = pd.read_csv('C:/Users/ACER/Desktop/stage 2024/mcq/evaluation/mistral/predicted_answers (1).csv')

# Display the first few rows to verify
print(gt_df.head())
print(pred_df.head())



                                          URL  \
0  https://attack.mitre.org/techniques/T1548/   
1  https://attack.mitre.org/techniques/T1548/   
2  https://attack.mitre.org/techniques/T1548/   
3  https://attack.mitre.org/techniques/T1548/   
4  https://attack.mitre.org/techniques/T1548/   

                                            Question  \
0  Which of the following mitigations involves pr...   
1  Which data source is recommended for monitorin...   
2  What does mitigation ID M1028 suggest to preve...   
3  Which process creation is an indicator of pote...   
4  In a Linux environment, what is recommended to...   

                                    Option A  \
0                                      Audit   
1                                    Command   
2      Limiting privileges of cloud accounts   
3           C:\Windows\System32\services.exe   
4  Monitor Windows Registry Key Modification   

                                        Option B  \
0                          

In [7]:
# Merge the two DataFrames on the "Question" column (or another common identifier)
merged_df = pd.merge(gt_df, pred_df, on="Question", how="inner")

# Display the merged DataFrame to verify
print(merged_df.head())


                                          URL  \
0  https://attack.mitre.org/techniques/T1548/   
1  https://attack.mitre.org/techniques/T1548/   
2  https://attack.mitre.org/techniques/T1548/   
3  https://attack.mitre.org/techniques/T1548/   
4  https://attack.mitre.org/techniques/T1548/   

                                            Question  \
0  Which of the following mitigations involves pr...   
1  Which data source is recommended for monitorin...   
2  What does mitigation ID M1028 suggest to preve...   
3  Which process creation is an indicator of pote...   
4  In a Linux environment, what is recommended to...   

                                    Option A  \
0                                      Audit   
1                                    Command   
2      Limiting privileges of cloud accounts   
3           C:\Windows\System32\services.exe   
4  Monitor Windows Registry Key Modification   

                                        Option B  \
0                          

In [8]:
# Compare the predicted answer with the ground truth
correct_predictions = merged_df['Predicted Answer'] == merged_df['GT']

# Calculate accuracy as the percentage of correct predictions
accuracy = correct_predictions.mean() * 100

print(f'Accuracy: {accuracy:.2f}%')


Accuracy: 47.00%
