In [3]:
import json
from chromadb import Client
from chromadb.config import Settings
from llama_index.core import Document
import chromadb.utils.embedding_functions as embedding_functions

# Load your JSON
with open('mitre_fight.json') as f:
    raw_data = json.load(f)

# Flatten each threat entry into a document
docs = []
metadatas = []

for threat_id, content in raw_data.items():
    doc = f"ID: {threat_id}\nName: {content.get('Name')}\nDescription: {content.get('Description')}"
    doc += f"\nPlatform: {content.get('Platform')}\nTactics: {content.get('Tactics')}"
    
    for example in content.get("Procedure Examples", []):
        doc += f"\nProcedure Example - {example.get('name')}: {example.get('description')}"

    for detection in content.get("Detection", []):
        doc += f"\nDetection - {detection.get('id')}: {detection.get('description')}"

    for asset in content.get("Critical Assets", []):
        doc += f"\nCritical Asset - {asset.get('name')}: {asset.get('description')}"

    for pre in content.get("Pre-Conditions", []):
        doc += f"\nPre-condition - {pre.get('name')}: {pre.get('description')}"

    for post in content.get("Post-Conditions", []):
        doc += f"\nPost-condition - {post.get('name')}: {post.get('description')}"

    for mitigation in content.get("Mitigations", []):
        doc += f"\nMitigation - {mitigation.get('name')}: {mitigation.get('description')}"

    # for ref in content.get("References", []):
    #     doc += f"\nReference - {ref.get('name')}: {ref.get('description')}"

    docs.append(doc)
    metadatas.append({"id": threat_id, "name": content.get("Name")})
    llamaindex_docs = [
    Document(text=doc, metadata=meta)
    for doc, meta in zip(docs, metadatas)
    ]


In [4]:
import chromadb
from llama_index.core import Document, VectorStoreIndex
from llama_index.vector_stores.chroma import ChromaVectorStore
from llama_index.core.storage.storage_context import StorageContext
from llama_index.embeddings.huggingface import HuggingFaceEmbedding

embed_model  = HuggingFaceEmbedding(model_name = "BAAI/bge-large-en")
        
chroma_client = chromadb.PersistentClient(path='mitre_chroma')
vector_store = ChromaVectorStore(chroma_collection=chroma_client.get_or_create_collection('mitre'))
storage_context = StorageContext.from_defaults(vector_store=vector_store)

index = VectorStoreIndex.from_documents(
            llamaindex_docs,
            storage_context=storage_context,
            embed_model=embed_model
        )

In [None]:
query = '''
The following is a description of the operational modality of a 5G attack:

In this attack, the adversary exploits the lack of rate-limiting on the number of failed security mode procedures to desynchronize the UE's uplink NAS counters. The impacts of the attack are prolonged desynchronization and denial of service (DoS) between the UE and the AMF. The adversary knows the victim UE's C-RNTI and can inject an arbitrary number of invalid sec_mode_command messages to the victim device, which induces the UE to send sec_mode_reject messages as a response while incrementing its uplink sequence number (seque ul) and overflow counter (ocue ul). As a result of this desynchronization, the legitimate AMF will discard any uplink messages sent from the victim UE, allowing the adversary to carry out a prolonged DoS and service disruption.

What MITRE Fight mitigations can be applied to this attack? Please provide a list of mitigations, descriptions and details.
'''

In [30]:
retriever = index.as_retriever(similarity_top_k=4)
# query = "exploits temporary identifiers on layer two during the radio connection establishment"
res = retriever.retrieve(query)

In [31]:
for node in res:
    print(node.metadata, f"-- {node.score:.4f}")
    print(node.text, "\n" + "="*80 + "\n")

{'id': 'FGT1498.503', 'name': 'UE DoS to AMF'} -- 0.7680
ID: FGT1498.503
Name: UE DoS to AMF
Description: An adversary controlled UE may be used to send crafted NAS messages to AMF to crash or slow down the AMF.
Platform: arch-control plane
Tactics: Impact
Procedure Example - AMF is targeted from UE using malformed NAS message: Adversary controlled UE sends a crafted NAS message towards AMF with the length field increased to a very large value. If AMF does not do proper header parameters check including length check, it can cause buffer overflow in the AMF which can force AMF to go to an undefined state or crash. This will cause Denial of Service for existing and future UEs. [1,2,3]
Detection - DS0029: Examine all header fields of control plane messages received in the uplink direction from UE to the core.
This can be done either by logging all messages received by the NF or by using a proxy or firewall at the core network entry point.
Detection - FGDS5015: Test all software patches fo