In [1]:
from IPython.display import HTML, display

def set_css():
  display(HTML('''
  <style>
    pre {
        white-space: pre-wrap;
    }
  </style>
  '''))
get_ipython().events.register('pre_run_cell', set_css)

In [86]:
from getpass import getpass
openapi_key = getpass('Enter OpenAI key: ')

Enter OpenAI key: ········


In [69]:
from langchain.document_loaders import JSONLoader, TomlLoader
from langchain.docstore.document import Document
from langchain.text_splitter import RecursiveCharacterTextSplitter
import json
import os
import zipfile

from pathlib import Path
from pprint import pprint

In [6]:
drive_path = "cti-ATT-CK-v13.0/enterprise-attack/"

In [9]:
%%time

tactic_data = []

folder_path = drive_path + 'x-mitre-tactic/'

print(folder_path)

for filename in os.listdir(folder_path):
  # print(filename)

  description_loader = JSONLoader(
    file_path=folder_path+filename,
    jq_schema='.objects[] | " \(.name) is the name of this tactic. The description of the \(.name) tactic is: \(.description). The Mitre Attack code for the \(.name) tactic is \(.external_references[].external_id)"'
  )
  tactic_data += description_loader.load()

cti-ATT-CK-v13.0/enterprise-attack/x-mitre-tactic/
CPU times: user 448 ms, sys: 13.4 ms, total: 461 ms
Wall time: 563 ms


In [10]:
%%time

# Append attack pattern to tactic

folder_path = drive_path + 'attack-pattern/'

print(folder_path)

for filename in os.listdir(folder_path):
  # print(filename)

  description_loader = JSONLoader(
    file_path=folder_path+filename,
    jq_schema='.objects[] | " \(.name) is the name of this attack pattern. The description of the \(.name) attack pattern is: \(.description). The Mitre Attack code for the \(.name) attack pattern is \(.external_references[].external_id)"'
  )
  # append to tactic
  tactic_data += description_loader.load()

cti-ATT-CK-v13.0/enterprise-attack/attack-pattern/
CPU times: user 21.8 s, sys: 306 ms, total: 22.1 s
Wall time: 22.2 s


In [11]:
%%time

malware_data = []

folder_path = drive_path + 'malware/'

print(folder_path)

for filename in os.listdir(folder_path):
  # print(filename)

  description_loader3 = JSONLoader(
    file_path=folder_path+filename,
    jq_schema='.objects[] | " \(.name) is the name of this malware. The description of the \(.name) malware is: \(.description). The Mitre Attack code for the \(.name) malware is \(.external_references[].external_id)"'
  )
  malware_data += description_loader3.load()

cti-ATT-CK-v13.0/enterprise-attack/malware/
CPU times: user 16.2 s, sys: 232 ms, total: 16.4 s
Wall time: 16.5 s


In [12]:
%%time

mitigation_data = []

folder_path = drive_path + 'course-of-action/'

print(folder_path)

for filename in os.listdir(folder_path):
  # print(filename)

  description_loader2 = JSONLoader(
    file_path=folder_path+filename,
    jq_schema='.objects[] | " \(.name) is the name of this course of action, also known as mitigation. The description of the \(.name) mitigation is: \(.description). The Mitre Attack code for the \(.name) mitigation is \(.external_references[].external_id)"'
  )
  mitigation_data += description_loader2.load()

cti-ATT-CK-v13.0/enterprise-attack/course-of-action/
CPU times: user 7.7 s, sys: 93.9 ms, total: 7.79 s
Wall time: 7.83 s


In [13]:
len(tactic_data)

3516

In [143]:
%%time

folder_path = 'rules1/integrations/gcp'

print(folder_path)
loader = TomlLoader(folder_path)
rules_data = loader.load()

rules1/integrations/gcp
CPU times: user 12 ms, sys: 4.45 ms, total: 16.4 ms
Wall time: 18.2 ms


In [115]:
from langchain.vectorstores import Chroma
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.chains.question_answering import load_qa_chain
from langchain.llms import OpenAI
import os
embeddings = OpenAIEmbeddings(openai_api_key=openapi_key)

In [144]:
rules_vStore = Chroma.from_documents(rules_data, embeddings)

Using embedded DuckDB without persistence: data will be transient


In [28]:
tactic_vStore = Chroma.from_documents(tactic_data, embeddings)

Using embedded DuckDB without persistence: data will be transient


In [29]:
malware_vStore = Chroma.from_documents(malware_data, embeddings)

Using embedded DuckDB without persistence: data will be transient


In [30]:
mitigation_vStore = Chroma.from_documents(mitigation_data, embeddings)

Using embedded DuckDB without persistence: data will be transient


In [145]:
llm = OpenAI(temperature=0.5, openai_api_key=openapi_key)
chain = load_qa_chain(llm, chain_type="stuff")

In [118]:
query = "Describe Potential Defense Evasion via PRoot"
docs = rules_vStore.similarity_search(query, include_metadata=True, k=2)
chain.run(input_documents=docs, question=query)

' Potential Defense Evasion via PRoot is a rule that identifies the execution of the PRoot utility, an open-source tool for user-space implementation of chroot, mount --bind, and binfmt_misc. Adversaries can leverage this tool to expand the scope of their operations to multiple Linux distributions and simplify their necessary efforts. It also provides emulation capabilities that allow for malware built on other architectures, such as ARM, to be run. The post-exploitation technique called bring your own filesystem (BYOF) can be used by the threat actors to execute malicious payload or elevate privileges or perform network scans or orchestrate another attack.'

In [54]:
test_query = "What is lateral movement?"
pprint(tactic_vStore.similarity_search(query=test_query, t=4))

[Document(page_content=' DLL Side-Loading is the name of this attack pattern. The description of the DLL Side-Loading attack pattern is: Programs may specify DLLs that are loaded at runtime. Programs that improperly or vaguely specify a required DLL may be open to a vulnerability in which an unintended DLL is loaded. Side-loading vulnerabilities specifically occur when Windows Side-by-Side (WinSxS) manifests (Citation: MSDN Manifests) are not explicit enough about characteristics of the DLL to be loaded. Adversaries may take advantage of a legitimate program that is vulnerable to side-loading to load a malicious DLL. (Citation: Stewart 2014)\n\nAdversaries likely use this technique as a means of masking actions they perform under a legitimate, trusted system or software process.. The Mitre Attack code for the DLL Side-Loading attack pattern is null', metadata={'source': '/Users/kirtikirti/langchain/cti-ATT-CK-v13.0/enterprise-attack/attack-pattern/attack-pattern--b2001907-166b-4d71-bb3

## MVP Prompts

In [146]:
from langchain.chains import RetrievalQA
tactic_qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=tactic_vStore.as_retriever(k=6, verbose=True))
malware_qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=malware_vStore.as_retriever(k=6))
mitigation_qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=mitigation_vStore.as_retriever(k=9))
rules_qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=rules_vStore.as_retriever(k=2))

In [147]:


def run_queries(query):
    description = malware_qa.run(f"For the malware, '{scenario}', what is the definition?")
    summary = malware_qa.run(summary_template)
    number = malware_qa.run(f"What is the Mitre Attack code for the {mitre_type}, {scenario}? Return only this code.") 

In [148]:
def detection_rules(rule_name):
    #summary_template = rulesf"Describe {rule_name}. What is it's risk score and severity?"
    description = rules_qa.run(f"Describe {rule_name}.")
    techniques = rules_qa.run(f"What attack techniques are used for {rule_name}.")
    severity = rules_qa.run(f"What is the risk score and severity of {rule_name}.")
    mitigations = rules_qa.run(f"Give some mitigations for {rule_name}.")
    print (
    f"{description}\n\n", f"Attack techniques for {rule_name}: {techniques} \n\n", f"Severity of {rule_name}:{severity} \n\n", f"Possible mitigations of {rule_name}: {mitigations}",
    "\n\n\n\n\n",
      "Thanks for testing the WIP Elastic mitre buddy!")

In [149]:
def mitre_buddy(mitre_type, scenario):
    summary_template = f"For the {mitre_type}, {scenario}, can you summarize the attack techniques or malware techniques from the {mitre_type}'s definition? Keep it brief, keywords only."
    if mitre_type == "malware":
        description = malware_qa.run(f"For the malware, '{scenario}', what is the definition?")
        summary = malware_qa.run(summary_template)
        number = malware_qa.run(f"What is the Mitre Attack code for the {mitre_type}, {scenario}? Return only this code.")    
    elif mitre_type == 'tactic':
        description = tactic_qa.run(f"For the tactic, '{scenario}', what is the definition?")
        summary = tactic_qa.run(summary_template)
        number = tactic_qa.run(f"What is the Mitre Attack code for the {mitre_type}, {scenario}? Return only this code.")
    elif mitre_type == 'attack-pattern':
        description = tactic_qa.run(f"For the attack pattern, '{scenario}', what is the definition?")
        summary = tactic_qa.run(summary_template)
        number = tactic_qa.run(f"What is the Mitre Attack code for the {mitre_type}, {scenario}? Return only this code.")     
    else:
        description = "Custom answer: No matches."
    mitigations = mitigation_qa.run(f"What are the mitigations to protect against {summary}? Please list out the corresponding Mitre Attack codes of the mitigations.")
    if mitigations == "I don't know.":
        mitigations = "Sorry, but I don't know the answer! Go to the docs and tell the LangChain team!"
    print(
      f"{description} uses or is related to the keywords, {summary}",
      f"The code for {scenario}, if found, is {number}.",
      f"You asked how to mitigate the {mitre_type}, {scenario}. Here's how: \n\n",
      mitigations,
      "\n\n\n\n\n",
      "Thanks for testing the WIP Elastic mitre buddy!")

In [150]:
len(rules_data)

23

# Elastic Security Buddy

## Demo Area

In [127]:
rule_name = input()

defense evasion


In [136]:
detection_rules(rule_name)

 Defense evasion is a tactic used by adversaries to avoid detection and maintain access to a system. It can involve techniques such as hiding files and directories, masquerading, path interception, and renaming system utilities.

 Attack techniques for defense evasion:  T1211 - Exploitation for Defense Evasion and T1574 - Hijack Execution Flow. 

 Severity of defense evasion: I don't know. 

 Possible mitigations of defense evasion:  Mitigations for defense evasion include: monitoring system configurations for unexpected changes, maintaining least privilege access, and using application whitelisting to prevent unauthorized programs from running. Additionally, implementing endpoint detection and response (EDR) solutions can help detect and respond to defense evasion tactics. 




 Thanks for testing the WIP Elastic mitre buddy!


In [151]:
rule_name = input()

GCP Service Account Creation


In [152]:
detection_rules(rule_name)

 GCP Service Account Creation is a rule that identifies when a new service account is created in Google Cloud Platform (GCP). A service account is a special type of account used by an application or a virtual machine (VM) instance, not a person. Applications use service accounts to make authorized API calls, authorized as either the service account itself, or as G Suite or Cloud Identity users through domain-wide delegation. If service accounts are not tracked and managed properly, they can present a security risk. An adversary may create a new service account to use during their operations in order to avoid using a standard user account and attempt to evade detection.

 Attack techniques for GCP Service Account Creation:  The attack technique used for GCP Service Account Creation is T1136, Create Account. 

 Severity of GCP Service Account Creation: The risk score of GCP Service Account Creation is 21 and the severity is low. 

 Possible mitigations of GCP Service Account Creation:  V

In [None]:
mitre_type = input() # ["malware", "tactic", "attack-pattern"]
scenario = input()

In [94]:
mitre_buddy(mitre_type, scenario)

 The Credential Access tactic consists of techniques for stealing credentials like account names and passwords. Techniques used to get credentials include keylogging or credential dumping. Using legitimate credentials can give adversaries access to systems, make them harder to detect, and provide the opportunity to create more accounts to help achieve their goals. uses or is related to the keywords,  Keylogging, credential dumping. The code for Credential Access, if found, is  T1589.001. You asked how to mitigate the tactic, Credential Access. Here's how: 

  The mitigations to protect against Keylogging and credential dumping are Credential Dumping Mitigation (T1003), Valid Accounts Mitigation (T1078), and Credentials in Registry Mitigation (T1214). 




 Thanks for testing the WIP Elastic mitre buddy!
