---
# 1. Data Ingestion ↵
Start here, ingesting the CSV file with the rules data.

In [None]:
#@title Upload
from os import rename
from google.colab import files

RULES = 'rules.csv'
LAYER = 'layer.json'

print('Select and upload the rule data')
uploaded = files.upload()
rename(list(uploaded.keys())[0], f'{RULES}')
print('🟢 Rule data uploaded successfuly')

---
# 2. Data Handling ✂
Some preparations to get the data ready to be processed.

In [None]:
#@title Data Load and Handling

from urllib.parse import urlparse
from re import compile

import pandas as pd
import matplotlib.pylab as plt
import numpy as np
import seaborn as sns


def adjust_techniques(techniques):
  try:
    return ' '.join(set(x.strip() for x in techniques.split(',')))
  except AttributeError:
    return None

def get_domain(url):
    try:
        return urlparse(url)[1]
    except AttributeError:
        return ''

def int_score(score):
  if score == 'excellent': return 5
  elif score == 'very good': return 4
  elif score == 'good': return 3
  elif score == 'fair': return 2
  elif score == 'basic': return 1
  else: return 0

def int_grade(grade):
  if grade == 'critical': return 5
  elif grade == 'high': return 4
  elif grade == 'medium': return 3
  elif grade == 'low': return 2
  elif grade == 'info': return 1
  else: return 0


# loading and handling rules
df_rules = pd.read_csv(RULES)
df_rules = df_rules.replace(np.nan, None)
# TODO: remove rules in staging here
df_rules['severity'] = df_rules['severity'].str.lower()
df_rules['priority'] = df_rules['priority'].str.lower()
df_rules['detection_score'] = df_rules['detection_score'].str.lower()
df_rules['severity_int'] = df_rules['severity'].apply(int_grade)
df_rules['priority_int'] = df_rules['priority'].apply(int_grade)
df_rules['detection_score_int'] = df_rules['detection_score'].apply(int_score)
df_rules['mitre_technique'] = df_rules['mitre_technique'].apply(adjust_techniques)
df_rules['mitre_technique'] = df_rules['mitre_technique'].str.upper()
df_rules['mitre_technique'] = df_rules['mitre_technique'].astype(str)
df_rules['referred_domain'] = df_rules['reference'].apply(get_domain)
df_rules = df_rules.reset_index(drop=True)

# handling mitre data
techniques = dict()  # list of dicts to store data per technique
re_techniques = compile(r'^T\d+(\.\d+)?(\s(T\d+(\.\d+)?))*$')  # matches T8888 | T8888.888 | T8888 T8888.888...

for index,rule in df_rules.iterrows():
  try:
    if re_techniques.search(str(rule['mitre_technique'])):
      rule_ts = rule['mitre_technique'].split(' ')
      for t in rule_ts:
        if t not in techniques:
          techniques[t] = {
              'names': [rule['name']],
              'references': [rule['reference']],
              'severities': [rule['severity_int']],
              'priorities': [rule['priority_int']],
              'detection_scores': [rule['detection_score_int']]
          }
        else:
          if rule['name'] not in techniques[t]['names']:
            techniques[t]['names'].append(rule['name'])
          if rule['reference'] not in techniques[t]['names']:
            techniques[t]['references'].append(rule['reference'])
          techniques[t]['severities'].append(rule['severity_int'])
          techniques[t]['priorities'].append(rule['priority_int'])
          techniques[t]['detection_scores'].append(rule['detection_score_int'])
    else:
      print(f'🔴 {rule["name"]}: malformed mitre_technique')
  except TypeError:
    print(f'🟡 {rule["name"]}: missing mitre_technique')

# mitre att&ck statistics
data = {
    'mitre_technique': list(),
    'count': list(),
    'severity_mean': list(),
    'priority_mean': list(),
    'detection_score_mean': list()
}
for k,v in techniques.items():
  data['mitre_technique'].append(k)
  data['count'].append(len(v['names']))
  data['severity_mean'].append(int(sum(v['severities'])/len(v['severities'])))
  data['priority_mean'].append(int(sum(v['priorities'])/len(v['priorities'])))
  data['detection_score_mean'].append(int(sum(v['detection_scores'])/len(v['detection_scores'])))
df_techniques = pd.DataFrame(data)

print('🟢 Data is ready')

---
# 3. Charts ◵
Visualizations and listings to represent data.

In [None]:
#@title Chart Style

# full list in matplotlib.style.available
style = 'dark_background'  #@param['Solarize_Light2', '_classic_test_patch', '_mpl-gallery', '_mpl-gallery-nogrid', 'bmh', 'classic', 'dark_background', 'fast', 'fivethirtyeight', 'ggplot', 'grayscale', 'seaborn-v0_8', 'seaborn-v0_8-bright', 'seaborn-v0_8-colorblind', 'seaborn-v0_8-dark', 'seaborn-v0_8-dark-palette', 'seaborn-v0_8-darkgrid', 'seaborn-v0_8-deep', 'seaborn-v0_8-muted', 'seaborn-v0_8-notebook', 'seaborn-v0_8-paper', 'seaborn-v0_8-pastel', 'seaborn-v0_8-poster', 'seaborn-v0_8-talk', 'seaborn-v0_8-ticks', 'seaborn-v0_8-white', 'seaborn-v0_8-whitegrid', 'tableau-colorblind10']
plt.style.use(style)
print(f'🟢 Style set to `{style}`')

In [None]:
#@title Severities, Priorities, and Detection Scores

fig = plt.figure()

ax_pri = fig.add_subplot(311)
ax_sev = fig.add_subplot(312)
ax_sco = fig.add_subplot(313)

ax_priorities = df_rules['priority'].value_counts().plot(kind='barh', ax=ax_pri)
# ax_priorities.set_xlabel('Count')
ax_priorities.set_ylabel('Priority')

ax_severities = df_rules['severity'].value_counts().plot(kind='barh', ax=ax_sev)
# ax_severities.set_xlabel('Count')
ax_severities.set_ylabel('Severity')

ax_scores = df_rules['detection_score'].value_counts().plot(kind='barh', ax=ax_sco)
ax_scores.set_xlabel('Count')
ax_scores.set_ylabel('Detection Score')

fig.tight_layout()
plt.show()

In [None]:
#@title Referred Domains

# df_rules['referred_domain'].value_counts()
# for index,rule in df.iterrows():
#   print(f'{rule["name"]} - `{rule["reference"]}`')

ax_ref_domain = df_rules['referred_domain'].value_counts().plot(kind='barh', title='Referred Domains')
ax_ref_domain.set_xlabel('Count')
ax_ref_domain.set_ylabel('Domains')
plt.show()

In [None]:
#@title Word Cloud
# Colormap options follow the Matplotlib ones: https://matplotlib.org/stable/users/explain/colors/colormaps.html

from wordcloud import WordCloud, STOPWORDS
from PIL import Image

# import urllib.request
# urllib.request.urlretrieve(
#   'https://domain/path/to/image',
#   'nu-cloud.png')


SHAPE = 'wc-shape.png'
nu_palette = {
    'The Purple': '820AD1FF',
    'The Purple Sur Ton': 'AA68FFFF',
    'Grey': 'E4E4E4FF',
    'Off-White': 'F4F4F4FF',
    'White': 'FFFFFFFF',
    'Black': '000000FF'
}
wc_source = 'description'  #@param['description','mitre_technique']
wc_mask = False             #@param['True','False']{type:'raw'}
wc_bg_color = 'Black'      #@param['The Purple','The Purple Sur Ton','Grey','Off-White','White','Black']
wc_colormap = 'Purples'    #@param['Purples','viridis','binary','cool','PRGn','Paired','tab20b','tab20c','rainbow']
wc_max_words = 120         #@param{type:"slider",min:20,max:200,step:10}
wc_contour_width = 0       #@param{type:"slider",min:0,max:10,step:1}
wc_dpi = 100               #@param{type:"slider",min:100,max:300,step:100}

if wc_mask:
  wc_mask_shape = np.array(Image.open(SHAPE))
else:
  wc_mask_shape = None
wc_stopwords = ['rule', 'rules', 'based', 'detected', 'many', 'someone']

wc = WordCloud(
    width=1920,
    height=1080,
    stopwords=wc_stopwords+list(STOPWORDS),
    collocations=True,
    max_words=wc_max_words,
    background_color=f'#{nu_palette[wc_bg_color]}',
    colormap=wc_colormap,
    mask=wc_mask_shape,
    contour_width=wc_contour_width,
    contour_color='#AA68FFFF'
    )
wc.generate_from_text(' '.join(i for i in df_rules[wc_source].str.lower()))

plt.figure(figsize=(16,9), dpi=wc_dpi)
plt.axis('off')
plt.imshow(wc, interpolation='bilinear')
plt.show()

In [None]:
#@title MITRE ATT&CK Distribution

# group techniques in tactics then plot a chart with number of rules per tactics



# ax_mitre_count = df_techniques.sort_values('count', ascending=False).head(10).plot(kind='barh', x='count', y='mitre_technique', title='MITRE ATT&CK Distribution')
# ax_ref_domain.set_xlabel('Mitre')
# ax_ref_domain.set_ylabel('Domains')
# plt.show()

---
# 4. MITRE Navigator Layer ⏧
Generate a [MITRE Navigator](https://mitre-attack.github.io/attack-navigator/) 🔗 layer based on the loaded data.

In [None]:
#@title Layer Setup

mitre_attack_version = '14'           #@param{type:'string'}
mitre_navigator_version = '4.9.1'     #@param{type:'string'}
mitre_navigator_layer_format = '4.5'  #@param{type:'string'}

name = 'SIEM'                         #@param{type:'string'}
source = 'SIEM'                       #@param{type:'string'}
description = 'SIEM Rules'            #@param{type:'string'}

tactic_background = True              #@param['True','False']{type:'raw'}
tactic_hexcolor = '820AD1FF'          #@param{type:'string'}

technique_hexcolor = '820AD1FF'       #@param{type:'string'}
technique_comment = 'SIEM assesment'  #@param{type:'string'}
technique_show_sub = False            #@param['True','False']{type:'raw'}

# alt: d62d20ff (1-red), ffa700ff (2-orange), 008744ff (3-green)
gradient_color_1 = 'F4F4F4FF'         #@param{type:'string'}
gradient_color_2 = 'AA68FFFF'         #@param{type:'string'}
gradient_color_3 = '820AD1FF'         #@param{type:'string'}
gradient_min_value = 1                #@param{type:'integer'}
gradient_max_value = 5                #@param{type:'integer'}

extra_dimension = 'detection_scores'  #@param['severities','priorities','detection_scores']

layer = {
	'name': name,
	'versions': {
		'attack': mitre_attack_version,
		'navigator': mitre_navigator_version,
		'layer': mitre_navigator_layer_format
	},
	'domain': 'enterprise-attack',
	'description': description + f' (scores based on: {extra_dimension})',
	'showTacticRowBackground': tactic_background,
	'tacticRowBackground': f'#{tactic_hexcolor}',
  'gradient': {
	  'colors': [
			f'#{gradient_color_1}',
			f'#{gradient_color_2}',
			f'#{gradient_color_3}'
		],
		"minValue": gradient_min_value,
		"maxValue": gradient_max_value
	},
	'techniques': []
}
template_technique = {
    'techniqueID': '',
    # 'color': f'#{technique_hexcolor}',
    'comment': technique_comment,
    'metadata': [],
    'links': [],
    'enabled': True,
    'showSubtechniques': technique_show_sub
}

print('🟢 Layer setup complete')

In [None]:
#@title Layer Builder

from copy import deepcopy
from json import dump

flatten_techniques = list()  # list of dicts, each dict is a technique

for t in techniques:
  technique = deepcopy(template_technique)
  technique['techniqueID'] = t
  technique['metadata'] = [{'name':'rule','value':x} for x in techniques[t]['names']]
  technique['links'] = [{'label':'reference','url':x} for x in techniques[t]['references']]
  technique['score'] = int(sum(techniques[t][extra_dimension]) / len(techniques[t][extra_dimension]))
  flatten_techniques.append(technique)

layer['techniques'] = flatten_techniques

with open(LAYER, 'w') as f:
  dump(layer, f)

print('🟢 Layer is ready to roll')
files.download(LAYER)