#1. Imports and Installations

In [None]:
CUTOFF_LEN=256
max_length=512

Unsloth Library Installations

In [None]:
#Python Imports
import pandas as pd
import torch
import json
import transformers
import json
import random
import string

from datasets import load_dataset
from transformers import TrainingArguments,pipeline
from transformers.utils import logging


In [None]:
major_version, minor_version = torch.cuda.get_device_capability()

In [None]:
!pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
if major_version >= 8:
    # Use this for new GPUs like Ampere, Hopper GPUs (RTX 30xx, RTX 40xx, A100, H100, L40)
    !pip install --no-deps packaging ninja einops flash-attn xformers trl peft accelerate bitsandbytes
else:
    # Use this for older GPUs (V100, Tesla T4, RTX 20xx)
    !pip install --no-deps xformers trl peft accelerate bitsandbytes
pass

Collecting unsloth@ git+https://github.com/unslothai/unsloth.git (from unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git)
  Cloning https://github.com/unslothai/unsloth.git to /tmp/pip-install-4260o2zk/unsloth_152591caffbd4b60a5a3193489ee8494
  Running command git clone --filter=blob:none --quiet https://github.com/unslothai/unsloth.git /tmp/pip-install-4260o2zk/unsloth_152591caffbd4b60a5a3193489ee8494

  Resolved https://github.com/unslothai/unsloth.git to commit 4211cc01409e3ced4f7abebaf68e244193b46e2c
  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Installing backend dependencies ... [?25ldone
[?25h  Preparing metadata (pyproject.toml) ... [?25ldone


In [None]:
#Unsloth Imports
from unsloth import FastLanguageModel
from trl import SFTTrainer

Library Imports

#2. Functions to use

In [None]:
#Loading the trained models
def load_model_tokenizer(model_name):
    '''
    Function to load model & tokenizer
    Args:
      model_name: path/name of the model
    Returns:
      model and tokenizer

    '''
    model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = model_name, # "unsloth/tinyllama" for 16bit loading
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
    # token = "hf_...", # use one if using gated models like meta-llama/Llama-2-7b-hf
)
    return model,tokenizer

In [None]:
def generate_prompt(data_point):
    return f"""Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
### Instruction:
{data_point["instruction"]}
### Input:
{data_point["input"]}
### Response:
{data_point["output"]}
"""

def tokenize(prompt, tokenizer, add_eos_token=True):
    result = tokenizer(
        prompt,
        truncation=True,
        max_length = CUTOFF_LEN,
        padding=False,
        return_tensors=None,
    )
    if (
        result["input_ids"][-1] != tokenizer.eos_token_id
        and len(result["input_ids"]) < CUTOFF_LEN
        and add_eos_token
    ):
        result["input_ids"].append(tokenizer.eos_token_id)
        result["attention_mask"].append(1)

    result["labels"] = result["input_ids"].copy()

    return result

def generate_and_tokenize_prompt(data_point, tokenizer):
    full_prompt = generate_prompt(data_point)
    tokenized_full_prompt = tokenize(full_prompt, tokenizer=tokenizer)
    return tokenized_full_prompt

In [None]:
#Creating custom dataset for training: Used the Alpaca Prompt template.
def custom_dataset(csv_path):
    '''
    function to convert csv file to the json data used for fine-tuning the LLM
    Args:
      csv_path : filepath or name of the csv file

    Returns:
      dataset_data as a dictionary item

    -saves the data as a file

    '''
    name = csv_path
    df = pd.read_csv(csv_path)
    dataset_data = [
    {
        "instruction": "You are a cyber technique detector. Here are techniques with their definitions for you to learn from and identify technique name & id from a sentence.",
        "input": "ID: "+row_dict['ID']+" Name of the technique: "+row_dict['name']+ " Definition of the technique: "+ row_dict['description'] + " Detection Example: " + row_dict['detection'],
        "output": " Identified Technique with ID is: " +row_dict['ID'] + row_dict['name']
    }
    for row_dict in df.to_dict(orient='records')
]
    print("Sample Data Row", dataset_data[0])

    with open(f"{name}.json", 'w') as f:
        json.dump(dataset_data, f)
        print("Data Written")
    return dataset_data

In [None]:
def train_model(data, model_nickname, model_name):
    """
    Train the language model on the provided data.

    Args:
        data (List[dict]): List of dictionaries containing instructions, inputs, and outputs.
        model_nickname: Name to be saved the model with
        model_name: Path/Name of the model
    Returns:
        Trained language model.
    """
    model_name = model_name
    max_seq_length = 4096
    dtype = None
    load_in_4bit = True

    model, tokenizer = FastLanguageModel.from_pretrained(
        model_name=model_name,
        max_seq_length=max_seq_length,
        dtype=dtype,
        load_in_4bit=load_in_4bit,
    )

    data_ = load_dataset("json", data_files = data)

    train_val = data_["train"].train_test_split(test_size=25, shuffle=True, seed=42)
    train_data = (train_val["train"].map(lambda x: generate_and_tokenize_prompt(x, tokenizer)))
    val_data = (train_val["test"].map(lambda x: generate_and_tokenize_prompt(x, tokenizer)))

    model = FastLanguageModel.get_peft_model(
    model=model,
    r=32,
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
    lora_alpha=32,
    lora_dropout=0,
    bias="none",
    use_gradient_checkpointing=False,
    random_state=3407,
    use_rslora=False,
    loftq_config=None,
    )

    # Define TrainingArguments
    training_args = TrainingArguments(
    per_device_train_batch_size=2,
    gradient_accumulation_steps=4,
    warmup_ratio=0.1,
    num_train_epochs=1,
    learning_rate=2e-5,
    fp16=not torch.cuda.is_bf16_supported(),
    bf16=torch.cuda.is_bf16_supported(),
    logging_steps=1,
    optim="adamw_8bit",
    weight_decay=0.1,
    lr_scheduler_type="linear",
    seed=3407,
    output_dir="outputs",
)

    # Define DataCollator
    data_collator = transformers.DataCollatorForSeq2Seq(
        tokenizer, pad_to_multiple_of=8, return_tensors="pt", padding=True
    )

    trainer = transformers.Trainer(
    model=model,
    train_dataset=train_data,
    eval_dataset=val_data,
    args=training_args,
    data_collator=data_collator
    )

    trainer.train()
    model.save_pretrained(f'/teamspace/studios/this_studio/{model_nickname}')
    print(f"Saved to path: '/teamspace/studios/this_studio/{model_nickname}")


    return model

In [None]:
def generate_dummy_data(num_samples):
    """
    Generate dummy data consisting of sentences and corresponding attack IDs.

    Args:
    - num_samples (int): Number of dummy data samples to generate.

    Returns:
    - list: A list of tuples, each containing a dummy sentence and its corresponding attack ID.
    """
    dummy_data = []
    for i in range(num_samples):
        sentence = "This is a dummy sentence number " + str(i+1)
        attack_id = "DUMMY_" + ''.join(random.choices(string.ascii_uppercase + string.digits, k=5))  # Generate a random attack ID
        dummy_data.append((sentence, attack_id))
    return dummy_data


In [None]:
def evaluate_model_na(json_file, model, tokenizer):
    """
    Evaluate the model's accuracy based on data from a JSON file containing sentences and mappings.

    Args:
        json_file (str): Path to the JSON file.
        model: Pre-trained language model.
        tokenizer: Tokenizer associated with the language model.

    Returns:
        Model accuracy and a dictionary containing each sentence and its corresponding prediction.
    """
    # Load data from the JSON file
    with open(json_file, 'r') as f:
        data = json.load(f)

    # Extract sentences and attack IDs from the JSON data
    original_data = [(sentence_data['text'], sentence_data['mappings'][0]['attack_id']) for sentence_data in data['sentences']]

    # Generate dummy data with reduced size to balance evaluation
    num_dummy_samples = int(len(original_data) * 0.2)  # 20% of the original data size
    dummy_data = generate_dummy_data(num_dummy_samples)

    # Combine original and dummy data
    evaluation_data = original_data + dummy_data

    # Shuffle the evaluation data
    random.shuffle(evaluation_data)

    # Initialize variables for counting correct predictions
    correct_predictions = 0
    total_predictions = len(evaluation_data)

    # Initialize a dictionary to store sentence predictions
    sentence_predictions = {}

    # Evaluate the model on the evaluation data
    for sentence, attack_id in evaluation_data:
        # Format input for model generation
        prompt = f"### Instruction: Act as a machine that gives only binary output in Yes or No.\nDoes the sentence contain the MITRE ATTACK threat technique with ID {attack_id}?\nStrictly answer with Yes or No, skip any additional information.\n\n### Input:\n{sentence}\n\n### Response:"
        # Tokenize the prompt
        inputs = tokenizer(prompt, return_tensors="pt", max_length=256)

        # Get the length of the input sequence
        input_length = inputs.input_ids.size(1)

        # Dynamically adjust max_length if input length exceeds it
        max_length = min(512, input_length + 10)  # Set max_length to input_length + 10, up to a maximum of 512

        # Generate response from the model
        output = model.generate(**inputs, max_length=max_length)

        # Decode the generated response
        decoded_output = tokenizer.decode(output[0], skip_special_tokens=True)

        # Extract only the response (either "yes" or "no")
        response_index = decoded_output.find("### Response:")
        next_line_index = decoded_output.find('\n', response_index)
        response_text = str(decoded_output[response_index:next_line_index+5].strip().lower())

        # Determine if the model's response contains "yes" or "no"
        if "yes" in response_text:
            # Check if the correct attack ID is in the sentence
            correct_predictions += 1
        elif "no" in response_text:
            # Check if the correct attack ID is not in the sentence
            correct_predictions += 1

        # Store the prediction for the sentence in the dictionary
        sentence_predictions[sentence] = response_text

    # Calculate accuracy
    accuracy = (correct_predictions / total_predictions) * 100

    return accuracy, sentence_predictions

#3. Models used and description

In [None]:
max_seq_length = 4096 # Choose any! We auto support RoPE Scaling internally!
dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.

# 4bit pre quantized models we support for 4x faster downloading + no OOMs.
fourbit_models = [
    "unsloth/mistral-7b-bnb-4bit",
    "unsloth/mistral-7b-instruct-v0.2-bnb-4bit",
    "unsloth/llama-2-7b-bnb-4bit",
    "unsloth/llama-2-13b-bnb-4bit",
    "unsloth/codellama-34b-bnb-4bit",
    "unsloth/tinyllama-bnb-4bit",
    "unsloth/gemma-7b-bnb-4bit", # New Google 6 trillion tokens model 2.5x faster!
    "unsloth/gemma-2b-bnb-4bit",
] # More models at https://huggingface.co/unsloth

Models with Binary Class: TinyLlama and Mistral Ai

In [None]:
# Initialize an empty dictionary to store accuracies
accuracy_dict = {}

# List of model paths with descriptive names
model_paths = {
    'mistral': '/teamspace/studios/this_studio/mistral',
    'TinyLlama': '/teamspace/studios/this_studio/model'
}

# Iterate through each model path
for model_name, model_path in model_paths.items():
    # Load model and tokenizer
    model, tokenizer = load_model_tokenizer(model_path)

    # Perform evaluation
    accuracy, predictions = evaluate_model_na(json_file="test-training-data.json", model=model, tokenizer=tokenizer)

    # Save accuracy in the dictionary with descriptive model name
    accuracy_dict[model_name] = accuracy

    # Free up memory by setting variables to None
    model = None
    tokenizer = None

# Print the accuracy dictionary
print(accuracy_dict)


==((====))==  Unsloth: Fast Mistral patching release 2024.4
   \\   /|    GPU: Tesla T4. Max memory: 14.748 GB. Platform = Linux.
O^O/ \_/ \    Pytorch: 2.2.1+cu121. CUDA = 7.5. CUDA Toolkit = 12.1.
\        /    Bfloat16 = FALSE. Xformers = 0.0.25.post1. FA = False.
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth


Unsloth 2024.4 patched 32 layers with 32 QKV layers, 32 O layers and 32 MLP layers.
Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.

==((====))==  Unsloth: Fast Llama patching release 2024.4
   \\   /|    GPU: Tesla T4. Max memory: 14.748 GB. Platform = Linux.
O^O/ \_/ \    Pytorch: 2.2.1+cu121. CUDA = 7.5. CUDA Toolkit = 12.1.
\        /    Bfloat16 = FALSE. Xformers = 0.0.25.post1. FA = False.
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth


You set `add_prefix_space`. The tokenizer needs to be converted from the slow tokenizers
Unsloth 2024.4 patched 22 layers with 22 QKV layers, 22 O layers and 22 MLP layers.
Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.
Both `max_new_tokens` (=32) and `max_length`(=92) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=32) and `max_length`(=102) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transfor

{'mistral': 98.46153846153847, 'TinyLlama': 13.846153846153847}


In [None]:
print(accuracy_dict)

{'mistral': 98.46153846153847, 'TinyLlama': 13.846153846153847}


Model 4a: Open AI

In [None]:
! pip install openai
! pip install chromadb
! pip install openai --upgrade openai migrate
! pip install openai==0.28

Collecting openai
  Downloading openai-1.28.0-py3-none-any.whl (320 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m320.1/320.1 kB[0m [31m3.9 MB/s[0m eta [36m0:00:00[0m
Collecting httpx<1,>=0.23.0 (from openai)
  Downloading httpx-0.27.0-py3-none-any.whl (75 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m75.6/75.6 kB[0m [31m5.5 MB/s[0m eta [36m0:00:00[0m
Collecting httpcore==1.* (from httpx<1,>=0.23.0->openai)
  Downloading httpcore-1.0.5-py3-none-any.whl (77 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m77.9/77.9 kB[0m [31m7.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting h11<0.15,>=0.13 (from httpcore==1.*->httpx<1,>=0.23.0->openai)
  Downloading h11-0.14.0-py3-none-any.whl (58 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m58.3/58.3 kB[0m [31m3.5 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: h11, httpcore, httpx, openai
Successfully installed h11-0.14.0 httpcore-1.0.5 ht

In [None]:
import pandas as pd
import os
import json
import openai
import chromadb
from chromadb.utils.embedding_functions import OpenAIEmbeddingFunction

In [None]:
# Load configuration from JSON file
def load_config():
    with open('/content/config.json', 'r') as f:
        config = json.load(f)
    return config

# Load configuration
config = load_config()
os.environ['OPENAI_API_KEY'] = config['openai_api_key']
openai.api_key = os.getenv("OPENAI_API_KEY")

In [None]:
df = pd.read_excel("/content/enterprise-attack-v14.1-techniques.xlsx")
df.columns

Index(['ID', 'STIX ID', 'name', 'description', 'url', 'created',
       'last modified', 'domain', 'version', 'tactics', 'detection',
       'platforms', 'data sources', 'is sub-technique', 'sub-technique of',
       'defenses bypassed', 'contributors', 'permissions required',
       'supports remote', 'system requirements', 'impact type',
       'effective permissions', 'relationship citations'],
      dtype='object')

In [None]:
technique_df = df[["ID","name","description", "domain", "tactics","detection", "is sub-technique", "sub-technique of"]]
technique_df

Unnamed: 0,ID,name,description,domain,tactics,detection,is sub-technique,sub-technique of
0,T1548,Abuse Elevation Control Mechanism,Adversaries may circumvent mechanisms designed...,enterprise-attack,"Defense Evasion, Privilege Escalation",Monitor the file system for files that have th...,False,
1,T1548.002,Abuse Elevation Control Mechanism: Bypass User...,Adversaries may bypass UAC mechanisms to eleva...,enterprise-attack,"Defense Evasion, Privilege Escalation",There are many ways to perform UAC bypasses wh...,True,T1548
2,T1548.004,Abuse Elevation Control Mechanism: Elevated Ex...,Adversaries may leverage the <code>Authorizati...,enterprise-attack,"Defense Evasion, Privilege Escalation",Consider monitoring for <code>/usr/libexec/sec...,True,T1548
3,T1548.001,Abuse Elevation Control Mechanism: Setuid and ...,An adversary may abuse configurations where an...,enterprise-attack,"Defense Evasion, Privilege Escalation",Monitor the file system for files that have th...,True,T1548
4,T1548.003,Abuse Elevation Control Mechanism: Sudo and Su...,Adversaries may perform sudo caching and/or us...,enterprise-attack,"Defense Evasion, Privilege Escalation","On Linux, auditd can alert every time a user's...",True,T1548
...,...,...,...,...,...,...,...,...
620,T1102.002,Web Service: Bidirectional Communication,"Adversaries may use an existing, legitimate ex...",enterprise-attack,Command and Control,Host data that can relate unknown or suspiciou...,True,T1102
621,T1102.001,Web Service: Dead Drop Resolver,"Adversaries may use an existing, legitimate ex...",enterprise-attack,Command and Control,Host data that can relate unknown or suspiciou...,True,T1102
622,T1102.003,Web Service: One-Way Communication,"Adversaries may use an existing, legitimate ex...",enterprise-attack,Command and Control,Host data that can relate unknown or suspiciou...,True,T1102
623,T1047,Windows Management Instrumentation,Adversaries may abuse Windows Management Instr...,enterprise-attack,Execution,Monitor network traffic for WMI connections; t...,False,


In [None]:
single_label_df = pd.read_json("/content/single_label.json")

In [None]:
samples = single_label_df.sample(50, random_state = 1603)
test_sentences = samples["text"].tolist()
ground_truth = samples["label"].tolist()

In [None]:
def build_prompt(query_text):
  return [
        {"role": "system", "content": "You are a threat technique detector for cyber threat intelligence department. Only identify technique id for the given text"},
        {"role": "user", "content": f"""
                Example:
                Text: 'Extra Window Memory Injection'
                T1055.011

                Text: 'Exfiltration Over C2 Channel'
                T1041

                End of examples. Identify technique ID for the following text:

                Text:
                {query_text}

                Technique ID
                """}
          ]


def identify_technique(query_text):
    responses = []
    seed_value = 100
    # Query the OpenAI API
    for text in query_text:
        response = openai.ChatCompletion.create(
            model='gpt-3.5-turbo',
            messages=build_prompt(text),
        )
        # Strip any punctuation or whitespace from the response
        responses.append(response.choices[0].message.content.strip('., '))

    return responses


In [None]:
predicted_val = identify_technique(test_sentences)

**Accuracy before ChromaDB**

In [None]:
def calculate_accuracy(ground_truth, predicted):
    # Ensure both lists have the same length
    if len(ground_truth) != len(predicted):
        raise ValueError("Input lists must have the same length.")

    # Count the number of correct predictions
    correct_predictions = sum(1 for gt, pred in zip(ground_truth, predicted) if gt == pred)

    # Calculate accuracy
    accuracy = correct_predictions / len(ground_truth) * 100

    return accuracy
accuracy = calculate_accuracy(ground_truth, predicted_val)
print(f"Accuracy: {accuracy:.2f}%")

Accuracy: 20.00%


Model 4b: Open AI with ChromaDB

In [None]:
technique_ids_to_extract = [
    'T1003.001', 'T1210', 'T1570', 'T1140', 'T1218.011', 'T1059.003',
    'T1057', 'T1518.001', 'T1106', 'T1082', 'T1016', 'T1078', 'T1047',
    'T1027', 'T1056.001', 'T1083', 'T1053.005', 'T1070.004', 'T1105',
    'T1090', 'T1005', 'T1574.002', 'T1071.001', 'T1484.001',
    'T1204.002', 'T1055', 'T1562.001', 'T1033', 'T1566.001', 'T1219',
    'T1547.001', 'T1021.001', 'T1543.003', 'T1569.002', 'T1036.005',
    'T1112', 'T1041', 'T1110', 'T1190', 'T1564.001', 'T1113',
    'T1573.001', 'T1095', 'T1552.001', 'T1012', 'T1074.001',
    'T1548.002', 'T1068', 'T1072', 'T1557.001'
]

extracted_df = technique_df[technique_df['ID'].isin(technique_ids_to_extract)].copy()

extracted_df['document'] = extracted_df['name'] + ' | ' + extracted_df['description'] + ' | ' + extracted_df['tactics'] + ' | ' + extracted_df['detection']
extracted_df['metadata'] = extracted_df[['domain', 'is sub-technique', 'sub-technique of']].to_dict(orient='records')


In [None]:
embedding_function = OpenAIEmbeddingFunction(api_key=os.getenv("OPENAI_API_KEY"))

chroma_client = chromadb.Client() # Ephemeral by default
technique_corpus_collection = chroma_client.create_collection(name='technique_corpus', embedding_function=embedding_function)

technique_corpus_collection.add(
    ids=extracted_df['ID'].astype(str).tolist(),
    documents=extracted_df['document'].tolist(),
    metadatas=extracted_df['metadata'].tolist()
)

In [None]:
query_result = technique_corpus_collection.query(query_texts=test_sentences, include=['documents', 'distances'], n_results=2)

**Prompt using context from ChromaDB**

In [None]:
def build_prompt_with_context(query_text, context):
    return [{'role': 'system', 'content': "You are a threat technique detector for cyber threat intelligence department. Only identify technique id for the given text based on following data. Dont generate any text with it"},
            {'role': 'user', 'content': f"""
The data is the following:

{' '.join(context)}

Identify technique id for the given text based on the given data. Just give ID
Text:
{query_text}

[Technique ID:]

"""}]


def identify_technique_with_context(query_text, contexts):
    responses = []
    seed_value = 100
    # Query the OpenAI API
    for text, context in zip(query_text, contexts):
        response = openai.ChatCompletion.create(
            model='gpt-3.5-turbo',
            messages=build_prompt_with_context(query_text=text, context=context),

        )
        # Strip any punctuation or whitespace from the response
        responses.append(response.choices[0].message.content.strip('., '))

    return responses

**Accuracy After ChromaDB**

In [None]:
predicted_val_with_context = identify_technique_with_context(test_sentences, query_result['documents'])
accuracy = calculate_accuracy(ground_truth, predicted_val_with_context)
print(f"Accuracy: {accuracy:.2f}%")

Accuracy: 30.00%


Conclusion:
1. TinyLlama is a billion parameter model and hence it resutled in a lesser accuracy comparatively.
2. Mistral AI performed poor in the multi class classification problem, but it performed very well in the Binary Classification problem.
3. Open AI's GPT performed poorly, even after using ChromaDB Vector Database as a context to the LLM; however this model was not yet fine tuned on any specific data and directly the GPT's API was employed to achieve the cyber threat identification task.