In [None]:
# Jupyter notebook set-up
# install required python modules
!pip install --upgrade pip
!pip install --upgrade Azure-Sentinel-Utilities
!pip install Kqlmagic --upgrade
!pip install networkx
!pip install ipysheet

# import required modules
import SentinelUtils, configparser
import networkx as nx
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import ipywidgets as widgets
from ipysheet import from_dataframe
from IPython.display import display

# check Python version
SentinelUtils.version_management.ModuleVersionCheck().validate_python('3.6.0')

# retrieve credentials
config = configparser.ConfigParser()
config.read('config.ini')
tenant_id, subscription_id, resource_group = config['credentials']['tenant_id'], config['credentials']['subscription_id'], config['credentials']['resource_group']
workspace_id, workspace_name = config['credentials']['workspace_id'], config['credentials']['workspace_name']

# launch kwlmagic and authenticate into workspace
%reload_ext Kqlmagic
%kql loganalytics://code;workspace=workspace_id;tenant=tenant_id;alias="SentinelDB"

# Sentinel ATT&CK threat hunting tool

### killchain overview by affected machine

In [None]:
# functions set-up
def clean_row(row):
    """Removes null and empty elements from a row of Kusto data"""
    return [element for element in row if element]

def find_tactic(row):
    """Returns the ATT&CK tactics contained in a row"""
    for item in row:
        if item in ["Initial Access","Execution","Persistence","Privilege Escalation","Defense Evasion","Credential Access","Discovery","Lateral Movement",
                           "Collection","Command and Control","Exfiltration","Impact"]:
            return item
        
def find_computer(row, computer_list):
    """Returns the computer name contained in a row"""
    for item in row:
        if item in computer_list:
            return item
        
# grab sysmon events
data = %kql Sysmon | where isnotempty(phase_name)

# grab list of computers generating sysmon data
computerData = %kql Sysmon | where isnotempty(phase_name) | summarize event_count = count() by Computer | sort by event_count desc

# build computer list
computers = [computer[0] for computer in computerData]

# prepare base dictionary
data_dict = {}
for computer in computers:
    data_dict[computer] = {"Initial Access":0,"Execution":0,"Persistence":0,"Privilege Escalation":0,"Defense Evasion":0,"Credential Access":0,"Discovery":0,"Lateral Movement":0,
                           "Collection":0,"Command and Control":0,"Exfiltration":0,"Impact":0}

# remove null/empty fields from Kusto data
clean_data = [clean_row(row) for row in data]

# create dataframe
out = widgets.Output(layout={'border': '1px solid black'})
out.append_stdout('Killchain overview by affected machine')
display(out)
for row in clean_data:
    data_dict[find_computer(row, computers)][find_tactic(row)] += 1
df = pd.DataFrame(data_dict)
display(df)

# display computer selection
out = widgets.Output(layout={'border': '1px solid black'})
out.append_stdout('Please select a target machine to begin inspecting commands')
display(out)
tables = list(computers)
selected_table = widgets.Dropdown(options=tables, value=tables[1],description='Select target:')
display(selected_table)

In [None]:
out = widgets.Output(layout={'border': '1px solid black'})
out.append_stdout('Overview of commands executed on target machine: {} (only logs with file_directory field are returned)'.format(selected_table.value))
display(out)

# return
v = str(selected_table.value)
data = %kql let _selection = v; Sysmon | where Computer contains _selection | where isnotempty(tostring(file_directory)) | summarize event_count=count() by tostring(file_directory) | sort by event_count desc
display(data)

df = data.to_dataframe()
process_table = [i for i in df["file_directory"]]
process_table = widgets.Dropdown(options=process_table, value=process_table[1],description='Inspect:')
display(process_table)

In [None]:
v = str(selected_table.value)
processes = %kql let _selection = v; Sysmon | where Computer contains _selection | where isnotempty(tostring(file_directory))| project file_directory, process_guid, event_creation_time

output_selection = []
for i in processes:
    if i[0] == process_table.value:
        output_selection.append(i[2])

if output_selection:
    timestamp_table = widgets.Dropdown(options=output_selection, value=output_selection[0],description='Timestamp:')
    display(timestamp_table)
else:
    out = widgets.Output(layout={'border': '1px solid black'})
    out.append_stdout('Could not find timestamps for selected command: {}'.format(process_table.value))
    display(out)

In [None]:
guid_list = []
for i in processes:
    if i[2] == timestamp_table.value:
        guid_list.append(i[1])
        
guid_value = str(guid_list[0])
target_logs = %kql let _guid = guid_value; Sysmon | where process_guid contains _guid
display(target_logs)
logs = target_logs.to_dataframe()

event_choice = widgets.Dropdown(options=[1], value=1,description='EventID:')
display(event_choice)

In [None]:
# functions set-up
def parse_event_1(log_row):
    """Parses a Sysmon log with EvenID 1"""
    return {
        "TimeGenerated":log_row[0],
        "Computer":log_row[3],
        "User":log_row[17],
        "ProcessParentGuid": log_row[22],
        "Cmd": log_row[15]
    }

def grab_parent_process_logs(guid, event_id):
    """Grabs the parent process of a Sysmon log"""
    logs = %kql let _guid = guid; Sysmon | where process_guid contains _guid
    if logs:
        for log in logs:
            if log[2] == 1:
                tmp = clean_row(log)
                return parse_event_1(tmp)

# Grab event logs that correspond to EventID selection
event = {}
for log in target_logs:
    if log[2] == 1:
        tmp = clean_row(log)
        event = parse_event_1(tmp)

# Collect parent process log
parent_data = grab_parent_process_logs(str(event.get("ProcessParentGuid")), 1)

# If parent process data is available return full process tree
if parent_data:
    out = widgets.Output(layout={'border': '1px solid black'})
    out.append_stdout("Parent process")
    display(out)
    
    parent_data_list = [[parent_data.get("TimeGenerated"), "Computer", parent_data.get("Computer")],
    [parent_data.get("TimeGenerated"), "User", parent_data.get("User")],
    [parent_data.get("TimeGenerated"), "Command", parent_data.get("Cmd")]]
    parent = pd.DataFrame(parent_data_list)
    parent.columns = ['Timestamp', 'Attribute', "Value"]
    pd.set_option('display.max_colwidth', 10000)
    display(parent)
    
    out = widgets.Output(layout={'border': '1px solid black'})
    out.append_stdout("Child process")
    display(out)
    
    baseline_data = [[event.get("TimeGenerated"), "Computer", event.get("Computer")],
    [event.get("TimeGenerated"), "User", event.get("User")],
    [event.get("TimeGenerated"), "Command", event.get("Cmd")]]
    baseline = pd.DataFrame(baseline_data)
    baseline.columns = ['Timestamp', 'Attribute', "Value"]
    pd.set_option('display.max_colwidth', 10000)
    display(baseline)
    
# If parent process data is not available return child process tree only
else:
    out = widgets.Output(layout={'border': '1px solid black'})
    out.append_stdout('Could not find parent process for: {}'.format(event.get("Cmd")))
    display(out)
    baseline_data = [
    [event.get("TimeGenerated"), "Computer", event.get("Computer")],
    [event.get("TimeGenerated"), "User", event.get("User")],
    [event.get("TimeGenerated"), "Command", event.get("Cmd")]]
    
    baseline = pd.DataFrame(baseline_data)
    baseline.columns = ['Timestamp', 'Attribute', "Value"]
    pd.set_option('display.max_colwidth', 10000)
    display(baseline)

out = widgets.Output(layout={'border': '1px solid black'})
out.append_stdout('Visual process graph')
display(out)
command = event.get("Cmd").split("\\")[-1:][0]
val_map = {event.get("Computer"): "yellow",
           event.get("User"): "yellow",
           command: "yellow"}
if parent_data:
    parent_command = parent_data.get("Cmd").split("\\")[-1:][0]
    val_map[parent_command] = "red"

G = nx.Graph()

G.add_node(event.get("Computer"))
G.add_node(event.get("User"))
G.add_node(command)
G.add_edge(event.get("Computer"),event.get("User"), length = 0.1)
G.add_edge(event.get("User"),command, length = 0.1)

if parent_data:
    G.add_node(parent_command)
    G.add_edge(command,parent_command, length = 0.1)

values = [val_map.get(node) for node in G.nodes()]
nx.draw(G, with_labels = True, node_color=values)
plt.show()
