# Imports

In [1]:
import os
import pandas as pd
import yaml
from tqdm import tqdm
import json
from collections import defaultdict

# Load Sigma Rules

In [2]:
# count number of rules in sigma_rules directory and its subdirectories
sigma_rules_list = []

for root, dirs, files in os.walk('sigma_rules'):
    for file in files:
        if file.endswith(('.yml', '.yaml')):
            sigma_rules_list.append(os.path.join(root, file))
        else:
            print(f'Not a sigma rule -> {file}')
            
print(f'Number of sigma rules: {len(sigma_rules_list)}')

Not a sigma rule -> README.md
Number of sigma rules: 2832


# Lateral Movement Sigma Rules

### By File Name

In [3]:
lateral_movement_rules = [rule for rule in sigma_rules_list if 'lateral_movement' in rule]
lateral_movement_rules

['sigma_rules\\application\\rpc_firewall\\rpc_firewall_atsvc_lateral_movement.yml',
 'sigma_rules\\application\\rpc_firewall\\rpc_firewall_itaskschedulerservice_lateral_movement.yml',
 'sigma_rules\\application\\rpc_firewall\\rpc_firewall_printing_lateral_movement.yml',
 'sigma_rules\\application\\rpc_firewall\\rpc_firewall_remote_registry_lateral_movement.yml',
 'sigma_rules\\application\\rpc_firewall\\rpc_firewall_remote_service_lateral_movement.yml',
 'sigma_rules\\application\\rpc_firewall\\rpc_firewall_sasec_lateral_movement.yml',
 'sigma_rules\\windows\\process_creation\\proc_creation_win_hktl_impacket_lateral_movement.yml',
 'sigma_rules\\windows\\process_creation\\proc_creation_win_mmc_mmc20_lateral_movement.yml',
 'sigma_rules\\windows\\process_creation\\proc_creation_win_office_excel_dcom_lateral_movement.yml',
 'sigma_rules\\windows\\process_creation\\proc_creation_win_susp_copy_lateral_movement.yml']

### By Tags - Tactics and Techniques

In [4]:
lm_techniques = ['t1210', 't1534', 't1570', 
                 't1563', # 't1563.001', 't1563.002', 
                 't1021', # 't1021.001', 't1021.002', 't1021.003', 't1021.004', 't1021.005', 't1021.006', 't1021.007', 't1021.008',
                 't1091', 't1072', 't1080', 
                 't1550'] #, 't1550.001', 't1550.002', 't1550.003', 't1550.004']

In [5]:
lm_rules_by_tags = []

for sigma_rule in sigma_rules_list:
    with open(sigma_rule, 'r', encoding='utf-8') as f:
        rule_content = yaml.safe_load(f)
        if 'tags' in rule_content.keys():
            for item in rule_content['tags']:
                if 'lateral_movement' in item or any(technique in item for technique in lm_techniques):
                    lm_rules_by_tags.append(sigma_rule)
                    break

In [6]:
len(lm_rules_by_tags)

129

# Load Provenance Data
With generated attack simulation data

In [7]:
artifacts_df = pd.read_csv('../lm_att_simulation/prov_with_generated_artifacts.csv', index_col=0)
processes_df = pd.read_csv('../lm_att_simulation/prov_with_generated_processes.csv', index_col=0)
edges_df = pd.read_csv('../lm_att_simulation/prov_with_generated_edges.csv', index_col=0)

# Rules Keys and Values

In [8]:
chosen_main_keys = ['title', 'description', 'logsource', 'detection', 'fields']  # and all of their sub-keys

In [9]:
sigma_rules_all_keys_to_rule_names = {}

for sigma_rule in sigma_rules_list:
    with open(sigma_rule, 'r', encoding='utf-8') as f:
        rule_content = yaml.safe_load(f)
        
        rule_name = os.path.basename(sigma_rule)  # Extract rule name from file path
        
        for key, value in rule_content.items():
            if key in chosen_main_keys:
                if key not in sigma_rules_all_keys_to_rule_names.keys():
                    sigma_rules_all_keys_to_rule_names[key] = {rule_name}
                else:
                    sigma_rules_all_keys_to_rule_names[key].add(rule_name)
                
                if isinstance(value, dict):
                    for sub_key in value.keys():
                        if sub_key not in sigma_rules_all_keys_to_rule_names.keys():
                            sigma_rules_all_keys_to_rule_names[sub_key] = {rule_name}
                        else:
                            sigma_rules_all_keys_to_rule_names[sub_key].add(rule_name)
                            
                        if isinstance(value[sub_key], dict):
                            for sub_sub_key in value[sub_key].keys():
                                if sub_sub_key not in sigma_rules_all_keys_to_rule_names.keys():
                                    sigma_rules_all_keys_to_rule_names[sub_sub_key] = {rule_name}
                                else:
                                    sigma_rules_all_keys_to_rule_names[sub_sub_key].add(rule_name)
                                    
                                # No Sub-sub-sub-key

## Rules Values With Rules Names Attached

In [10]:
def add_value_and_rule_no_duplicates(value_dict, value, rule_name):
    if value != "" and value != " " and value != "[]":
        if value not in value_dict.keys():
            value_dict[value] = {rule_name}
        else:
            value_dict[value].add(rule_name)

def parse_value_recursively(value_dict, value, rule_name):
    if isinstance(value, list) and value != []:
        for item in value:
            parse_value_recursively(value_dict, item, rule_name)
    elif isinstance(value, dict):
        for item in value.values():
            parse_value_recursively(value_dict, item, rule_name)
    elif isinstance(value, str):
        add_value_and_rule_no_duplicates(value_dict, value, rule_name)

In [11]:
sigma_rules_values_to_names = {}

for sigma_rule in sigma_rules_list:
    with open(sigma_rule, 'r', encoding='utf-8') as f:
        rule_content = yaml.safe_load(f)

    rule_name = os.path.basename(sigma_rule)  # Extract rule name from file path

    for key, rule_value in rule_content.items():
        if key in sigma_rules_all_keys_to_rule_names.keys():
            parse_value_recursively(sigma_rules_values_to_names, rule_value, rule_name)

In [12]:
sigma_rules_values_to_names

{'Django Framework Exceptions': {'appframework_django_exceptions.yml'},
 'stable': {'app_python_sql_exceptions.yml',
  'appframework_django_exceptions.yml',
  'appframework_ruby_on_rails_exceptions.yml',
  'av_exploiting.yml',
  'av_hacktool.yml',
  'av_password_dumper.yml',
  'aws_ec2_disable_encryption.yml',
  'aws_securityhub_finding_evasion.yml',
  'create_remote_thread_win_susp_password_dumper_lsass.yml',
  'default_credentials_usage.yml',
  'file_event_lnx_doas_conf_creation.yml',
  'host_without_firewall.yml',
  'image_load_thor_unsigned_execution.yml',
  'lnx_auditd_dd_delete_file.yml',
  'lnx_auditd_password_policy_discovery.yml',
  'lnx_auditd_system_info_discovery2.yml',
  'lnx_buffer_overflows.yml',
  'lnx_clamav_relevant_message.yml',
  'lnx_file_copy.yml',
  'net_connection_lnx_crypto_mining_indicators.yml',
  'net_connection_win_crypto_mining_pools.yml',
  'net_dns_pua_cryptocoin_mining_xmr.yml',
  'net_firewall_cleartext_protocols.yml',
  'netflow_cleartext_protocols.ym

In [13]:
# Save dictionary to file, 1st column is the key, 2nd column is the value

# Convert sets to lists because JSON does not support set data type
sigma_rules_values_with_names_lists = {k: list(v) for k, v in sigma_rules_values_to_names.items()}

with open('sigma_rules_values_with_names.json', 'w') as f:
    json.dump(sigma_rules_values_with_names_lists, f)

# Match Data to Rule Values

In [14]:
def get_matched_sigma_values(df, values_to_rules_dict, common_values_to_ignore=[]):
    match_values = []

    for _, row in df.iterrows():
        for _, col_value in row.items():
            if (pd.notna(col_value) and isinstance(col_value, str) and not col_value.isdigit() and
                    any(str(col_value) in rule_value for rule_value in values_to_rules_dict.keys()) and
                    col_value not in common_values_to_ignore and
                    col_value not in match_values):
                
                match_values.append(col_value)
                
    return match_values

In [15]:
def get_matches_by_sigma_values(prov_df, prov_type, values_to_rules_dict, common_values_to_ignore=[]):
    matches = []

    for _, row in prov_df.iterrows():
        for _, col_value in row.items():
            
            if (pd.notna(col_value) and isinstance(col_value, str) and not col_value.isdigit() and
                    col_value not in common_values_to_ignore):
                
                matched_rules = []
                
                # Initial intuition:
                # for _, row in df.iterrows():
                #     for _, col_value in row.items():
                #         if (pd.notna(col_value) and isinstance(col_value, str) and not col_value.isdigit() and
                #             any(str(col_value) in rule_value for rule_value in sigma_rules_values_to_names.keys()) and
                #             col_value not in common_values and
                #             col_value not in matches):
                for rule_value in values_to_rules_dict.keys():
                    if col_value in rule_value:
                        matched_rules.extend(values_to_rules_dict[rule_value])
                        
                if matched_rules:
                    prov_data = None
                    if prov_type == 'Artifact':
                        prov_data = [int(row['remote port']) if pd.notna(row['remote port']) else 'None', 
                                     int(row['local port']) if pd.notna(row['local port']) else 'None']
                    elif prov_type == 'Process':
                        prov_data = [int(row['pid']) if pd.notna(row['pid']) else 'None', 
                                     int(row['ppid']) if pd.notna(row['ppid']) else 'None']
                    
                        
                    match = {
                        'matched_value': col_value,
                        'svc': row['svc'],
                        'matched_rules': matched_rules,
                        'prov_data': prov_data
                    }
                    matches.append(match) 
            
    return matches

In [16]:
artifact_common_values = ['Artifact', 'syscall', 'memory', 'file']
process_common_values = ['Process', 'syscall']
edge_common_values = ['Used', 'syscall', 'accept', 'close', 'execve', 'load', 'open', 'mmap (write)', 'clone', 'connect', 'exit', 'mprotect']

In [17]:
# Get matched values for artifacts              
artifact_sigma_values = get_matched_sigma_values(artifacts_df, sigma_rules_values_to_names, artifact_common_values)
print(artifact_sigma_values)

['0.0.0.0', 'tcp', '127.0.0.1', 'udp', 'external', 'memory map', 'memory dump', '/etc/sudoers']


In [18]:
artifacts_matches = get_matches_by_sigma_values(artifacts_df, 'Artifact', sigma_rules_values_to_names, artifact_common_values)
artifacts_matches

[{'matched_value': '0.0.0.0',
  'svc': 'redis-cart',
  'matched_rules': ['zeek_dns_mining_pools.yml',
   'net_connection_lnx_back_connect_shell_dev.yml',
   'win_security_successful_external_remote_rdp_login.yml',
   'proxy_webdav_search_ms.yml',
   'zeek_http_webdav_put_request.yml',
   'win_security_successful_external_remote_smb_login.yml'],
  'prov_data': [39612, 6379]},
 {'matched_value': 'tcp',
  'svc': 'redis-cart',
  'matched_rules': ['lnx_auditd_bpfdoor_port_redirect.yml',
   'lnx_auditd_network_sniffing.yml',
   'lnx_apt_equationgroup_lnx.yml',
   'lnx_apt_equationgroup_lnx.yml',
   'lnx_apt_equationgroup_lnx.yml',
   'lnx_shell_susp_commands.yml',
   'lnx_shell_susp_rev_shells.yml',
   'lnx_shell_susp_rev_shells.yml',
   'lnx_susp_dev_tcp.yml',
   'lnx_shell_susp_rev_shells.yml',
   'lnx_shell_susp_rev_shells.yml',
   'lnx_shell_susp_rev_shells.yml',
   'lnx_shell_susp_rev_shells.yml',
   'lnx_shell_susp_rev_shells.yml',
   'lnx_susp_dev_tcp.yml',
   'lnx_susp_dev_tcp.yml',


In [19]:
# Get matched values for processes
process_sigma_values = get_matched_sigma_values(processes_df, sigma_rules_values_to_names, process_common_values)
print(process_sigma_values)

['/app', 'external', '/tmp', 'linpeas', 'python3', '/bin/sh', 'sh', '/etc', 'sudo', '/bin/bash', 'bash']


In [20]:
processes_matches = get_matches_by_sigma_values(processes_df, 'Process', sigma_rules_values_to_names, process_common_values)
processes_matches

[{'matched_value': '/app',
  'svc': 'cartservice',
  'matched_rules': ['proc_creation_lnx_process_discovery.yml',
   'proc_creation_macos_suspicious_applet_behaviour.yml',
   'proc_creation_win_sysinternals_psexec_paexec_escalate_system.yml',
   'proc_creation_win_sysinternals_psexesvc_as_system.yml',
   'proc_creation_win_tasklist_basic_execution.yml'],
  'prov_data': [1837, 1827]},
 {'matched_value': '/app',
  'svc': 'cartservice',
  'matched_rules': ['proc_creation_lnx_process_discovery.yml',
   'proc_creation_macos_suspicious_applet_behaviour.yml',
   'proc_creation_win_sysinternals_psexec_paexec_escalate_system.yml',
   'proc_creation_win_sysinternals_psexesvc_as_system.yml',
   'proc_creation_win_tasklist_basic_execution.yml'],
  'prov_data': [1842, 1837]},
 {'matched_value': '/app',
  'svc': 'cartservice',
  'matched_rules': ['proc_creation_lnx_process_discovery.yml',
   'proc_creation_macos_suspicious_applet_behaviour.yml',
   'proc_creation_win_sysinternals_psexec_paexec_escal

In [21]:
# Get matched values for edges
edge_sigma_values = get_matched_sigma_values(edges_df, sigma_rules_values_to_names, edge_common_values)
print(edge_sigma_values)

['external', 'write', 'socket', 'RemoteThread', 'file_read']


# Create Alerts

### Create Alerts From Sigma Values for Artifacts and Processes

In [22]:
def extract_alert_data_for_svc(matches_list, data_type):
    # Create a defaultdict to store svc and its associated rules and related processes
    if data_type == 'Process':
        svc_dict = defaultdict(lambda: {'rules': set(), 'process_ids': set()})
    elif data_type == 'Artifact':
        svc_dict = defaultdict(lambda: {'rules': set(), 'ports': set()})
    # svc_alert_data = defaultdict(lambda: {'svc': '', 'rules': set(), 'process_ids': set()})

    for match_dict in matches_list:
        svc = match_dict['svc']
        matched_rules = match_dict['matched_rules']
        # prov_data = match_dict['prov_data']
        if data_type == 'Process':
            related_processes = match_dict['prov_data']
        elif data_type == 'Artifact':
            related_ports = match_dict['prov_data']

        # Add the associated_rule and related_process_ids to the corresponding sets in the defaultdict
        svc_dict[svc]['rules'].update(matched_rules)
        # svc_dict[svc]['prov_data'].update(prov_data)
        if data_type == 'Process':
            svc_dict[svc]['process_ids'].update(related_processes)
        elif data_type == 'Artifact':
            svc_dict[svc]['ports'].update(related_ports)

    # Convert the defaultdict to a list of dictionaries
    if data_type == 'Process':
        rows = [
            {'svc': svc_key, 
             'rules': list(rules_processes_dict['rules']), 
             'process_ids': list(rules_processes_dict['process_ids'])
             } for svc_key, rules_processes_dict in svc_dict.items()
        ]
    elif data_type == 'Artifact':
        rows = [
            {'svc': svc_key, 
             'rules': list(rules_ports_dict['rules']), 
             'ports': list(rules_ports_dict['ports'])
             } for svc_key, rules_ports_dict in svc_dict.items()
        ]

    # Convert the list of dictionaries to a dataframe
    if data_type == 'Process':
        column_names = ['svc', 'rules', 'process_ids']
    elif data_type == 'Artifact':
        column_names = ['svc', 'rules', 'ports']
        
    svc_data = pd.DataFrame(rows, columns=column_names)
    
    return svc_data

In [23]:
artifacts_alerts = extract_alert_data_for_svc(artifacts_matches, 'Artifact')
artifacts_alerts

Unnamed: 0,svc,rules,ports
0,redis-cart,[net_connection_lnx_back_connect_shell_dev.yml...,"[54112, 34050, 34054, 48936, 6379, 34030, 3403..."
1,frontend,"[win_security_rdp_localhost_login.yml, proc_cr...","[58508, 58510, 8080, 58512, 57236, 58522, 5584..."
2,paymentservice,"[win_security_rdp_localhost_login.yml, proc_cr...","[36032, 50051, 37604, 36038, 45574, 51240, 455..."
3,cartservice,"[win_security_rdp_localhost_login.yml, proc_cr...","[46368, 55106, 53666, 49254, 46376, 53678, 707..."
4,currencyservice,"[win_security_rdp_localhost_login.yml, proc_cr...","[44000, 35042, 43688, 43704, 60528, 43992, 700..."
5,productcatalogservice,"[win_security_rdp_localhost_login.yml, proc_cr...","[43390, 52324, 34350, 52334, 34356, 41176, 411..."
6,checkoutservice,"[win_security_rdp_localhost_login.yml, proc_cr...","[36036, 37050, 37066, 51792, 51798, 5050, 36028]"
7,adservice,"[win_security_rdp_localhost_login.yml, proc_cr...","[57410, 48840, 57420, 9555, 48824]"
8,external,"[image_load_dll_dbghelp_dbgcore_susp_load.yml,...",[None]


In [24]:
processes_alerts = extract_alert_data_for_svc(processes_matches, 'Process')
processes_alerts

Unnamed: 0,svc,rules,process_ids
0,cartservice,"[proc_creation_lnx_process_discovery.yml, proc...","[2433, 2434, 1923, 2435, 2436, 2437, 2438, 243..."
1,adservice,"[proc_creation_lnx_process_discovery.yml, proc...","[2307, 3331, 3335, 3338, 3339, 2324, 3107, 221..."
2,external,"[proc_creation_lnx_nohup_susp_execution.yml, p...","[1234, 9876, 9877, 9878]"
3,recommendationservice,"[posh_ps_susp_proxy_scripts.yml, create_stream...","[9879, 9880, 9881, 1723, 1724, 1725]"
4,frontend,"[proc_creation_macos_local_groups.yml, lnx_aud...","[None, 9877]"


### Add Edges to Alerts

#### Match Edge Data to Rule Values

In [25]:
edges_matches = []
matched_edges_ids = set()

for _, row in edges_df.iterrows():
    for _, col_value in row.items():
        if (pd.notna(col_value) and isinstance(col_value, str) and not col_value.isdigit() and
                    col_value not in edge_common_values):            
            
            matched_rules = []
            
            for rule_value in sigma_rules_values_to_names.keys():
                    if col_value in rule_value:
                        matched_rules.extend(sigma_rules_values_to_names[rule_value])
            
            if matched_rules:
                match = {
                    'matched_value': col_value,
                    'matched_rules': matched_rules,
                    'ids_list': [row['from'], row['to']]
                    # 'from_id': row['from'],
                    # 'to_id': row['to'],
                }
                edges_matches.append(match)
                matched_edges_ids.add(row['from'])
                matched_edges_ids.add(row['to'])
                
edges_matches

[{'matched_value': 'external',
  'matched_rules': ['azure_ad_guest_users_invited_to_tenant_by_non_approved_inviters.yml',
   'azure_guest_invite_failure.yml',
   'azure_identity_protection_inbox_forwarding_rule.yml',
   'microsoft365_susp_inbox_forwarding.yml',
   'net_dns_external_service_interaction_domains.yml',
   'zeek_dns_susp_zbit_flag.yml',
   'win_mssql_failed_logon_from_external_network.yml',
   'win_susp_ntlm_rdp.yml',
   'win_security_external_device.yml',
   'net_connection_win_susp_external_ip_lookup.yml',
   'dns_query_win_susp_external_ip_lookup.yml',
   'net_connection_win_excel_outbound_network_connection.yml',
   'net_connection_win_susp_external_ip_lookup.yml',
   'net_connection_win_susp_external_ip_lookup.yml',
   'posh_ps_cor_profiler.yml',
   'proc_creation_win_bginfo_suspicious_child_process.yml',
   'proc_creation_win_bginfo_uncommon_child_process.yml',
   'proc_creation_win_odbcconf_uncommon_child_process.yml',
   'proc_creation_win_susp_appx_execution.yml',


#### Connect Edges to Artifacts and Processes

In [26]:
def edges_matches_to_prov_vertex_matches(edges_matches, prov_df, vertex_type):
    matches_list = []
    for edge_match in edges_matches:
        edge_ids = edge_match['ids_list']
        match_svc = list(prov_df.loc[prov_df['id'].isin(edge_ids), 'svc'].values)
        if vertex_type == 'Process':
            match_prov_data = prov_df.loc[prov_df['id'].isin(edge_ids), 'pid'].values + prov_df.loc[prov_df['id'].isin(edge_ids), 'ppid'].values
        elif vertex_type == 'Artifact':
            match_prov_data = prov_df.loc[prov_df['id'].isin(edge_ids), 'remote port'].values + prov_df.loc[prov_df['id'].isin(edge_ids), 'local port'].values
        match_prov_data = [int(item) for item in match_prov_data if pd.notna(item)]
        for svc in match_svc:
                
            process_match = {
                                'matched_value': edge_match['matched_value'],
                                'svc': svc,
                                'matched_rules': edge_match['matched_rules'],
                                'prov_data': match_prov_data
                            }
            matches_list.append(process_match)
    return matches_list

In [27]:
edges_artifacts_matches = edges_matches_to_prov_vertex_matches(edges_matches, artifacts_df, 'Artifact')
edges_artifacts_matches

[{'matched_value': 'external',
  'svc': 'frontend',
  'matched_rules': ['azure_ad_guest_users_invited_to_tenant_by_non_approved_inviters.yml',
   'azure_guest_invite_failure.yml',
   'azure_identity_protection_inbox_forwarding_rule.yml',
   'microsoft365_susp_inbox_forwarding.yml',
   'net_dns_external_service_interaction_domains.yml',
   'zeek_dns_susp_zbit_flag.yml',
   'win_mssql_failed_logon_from_external_network.yml',
   'win_susp_ntlm_rdp.yml',
   'win_security_external_device.yml',
   'net_connection_win_susp_external_ip_lookup.yml',
   'dns_query_win_susp_external_ip_lookup.yml',
   'net_connection_win_excel_outbound_network_connection.yml',
   'net_connection_win_susp_external_ip_lookup.yml',
   'net_connection_win_susp_external_ip_lookup.yml',
   'posh_ps_cor_profiler.yml',
   'proc_creation_win_bginfo_suspicious_child_process.yml',
   'proc_creation_win_bginfo_uncommon_child_process.yml',
   'proc_creation_win_odbcconf_uncommon_child_process.yml',
   'proc_creation_win_susp_

In [28]:
edges_artifacts_alerts = extract_alert_data_for_svc(edges_artifacts_matches, 'Artifact')
edges_artifacts_alerts

Unnamed: 0,svc,rules,ports
0,frontend,"[posh_ps_susp_proxy_scripts.yml, proc_creation...","[54401, 65316]"
1,external,"[posh_ps_susp_proxy_scripts.yml, proc_creation...",[54401]
2,recommendationservice,"[azure_guest_invite_failure.yml, net_connectio...",[]


In [29]:
edges_processes_matches = edges_matches_to_prov_vertex_matches(edges_matches, processes_df, 'Process')
edges_processes_matches

[{'matched_value': 'external',
  'svc': 'frontend',
  'matched_rules': ['azure_ad_guest_users_invited_to_tenant_by_non_approved_inviters.yml',
   'azure_guest_invite_failure.yml',
   'azure_identity_protection_inbox_forwarding_rule.yml',
   'microsoft365_susp_inbox_forwarding.yml',
   'net_dns_external_service_interaction_domains.yml',
   'zeek_dns_susp_zbit_flag.yml',
   'win_mssql_failed_logon_from_external_network.yml',
   'win_susp_ntlm_rdp.yml',
   'win_security_external_device.yml',
   'net_connection_win_susp_external_ip_lookup.yml',
   'dns_query_win_susp_external_ip_lookup.yml',
   'net_connection_win_excel_outbound_network_connection.yml',
   'net_connection_win_susp_external_ip_lookup.yml',
   'net_connection_win_susp_external_ip_lookup.yml',
   'posh_ps_cor_profiler.yml',
   'proc_creation_win_bginfo_suspicious_child_process.yml',
   'proc_creation_win_bginfo_uncommon_child_process.yml',
   'proc_creation_win_odbcconf_uncommon_child_process.yml',
   'proc_creation_win_susp_

In [30]:
edges_processes_alerts = extract_alert_data_for_svc(edges_processes_matches, 'Process')
edges_processes_alerts

Unnamed: 0,svc,rules,process_ids
0,frontend,"[posh_ps_susp_proxy_scripts.yml, proc_creation...","[11110, 3401, 3434, 19753, 3447]"
1,external,"[posh_ps_susp_proxy_scripts.yml, proc_creation...","[11110, 3401, 19754, 19753, 3447]"
2,recommendationservice,"[azure_guest_invite_failure.yml, net_connectio...","[3449, 3447]"


In [31]:
def replace_nan_with_empty_list(x):
    if isinstance(x, list):
        return [i if i is not None else [] for i in x]
    else:
        return [] if pd.isnull(x) else x

In [32]:
def merge_and_union_alerts_dfs(prov_alerts_df, edges_alerts_df, union_prov_data_cols, merge_on_col='svc'):
    
    # Merge the dataframes
    merged_df = prov_alerts_df.merge(edges_alerts_df, on=merge_on_col, how='outer').applymap(replace_nan_with_empty_list)

    # Union the specified columns and drop the original columns
    union_cols = [('rules_x', 'rules_y'), (f'{union_prov_data_cols}_x', f'{union_prov_data_cols}_y')]
    for cols_rules, cols_prov_data in union_cols:
        merged_df[cols_rules[:-2]] = merged_df[[cols_rules, cols_prov_data]].apply(lambda x: list(set().union(*x)), axis=1)
        merged_df = merged_df.drop([cols_rules, cols_prov_data], axis=1)

    return merged_df

In [33]:
artifacts_with_edges_alerts = merge_and_union_alerts_dfs(artifacts_alerts, edges_artifacts_alerts, 'ports')
artifacts_with_edges_alerts

Unnamed: 0,svc,rules,ports
0,redis-cart,[net_connection_lnx_back_connect_shell_dev.yml...,"[54112, 34050, 34054, 48936, 6379, 34030, 4892..."
1,frontend,"[posh_ps_susp_proxy_scripts.yml, win_security_...","[54401, 58508, 58510, 8080, 58512, 57236, 5852..."
2,paymentservice,"[win_security_rdp_localhost_login.yml, proc_cr...","[36032, 50051, 37604, 36038, 45574, 51240, 455..."
3,cartservice,"[win_security_rdp_localhost_login.yml, proc_cr...","[46368, 55106, 53666, 49254, 46376, 53678, 551..."
4,currencyservice,"[win_security_rdp_localhost_login.yml, proc_cr...","[44000, 35042, 43688, 60528, 7000, 43992, 4370..."
5,productcatalogservice,"[win_security_rdp_localhost_login.yml, proc_cr...","[52324, 3550, 34350, 52334, 34356, 41176, 4118..."
6,checkoutservice,"[win_security_rdp_localhost_login.yml, proc_cr...","[5050, 36036, 37066, 51792, 51798, 37050, 36028]"
7,adservice,"[win_security_rdp_localhost_login.yml, proc_cr...","[57410, 48840, 57420, 9555, 48824]"
8,external,"[posh_ps_susp_proxy_scripts.yml, proc_creation...","[54401, None]"
9,recommendationservice,"[azure_guest_invite_failure.yml, net_connectio...",[]


In [34]:
processes_with_edges_alerts = merge_and_union_alerts_dfs(processes_alerts, edges_processes_alerts, 'process_ids')
processes_with_edges_alerts

Unnamed: 0,svc,rules,process_ids
0,cartservice,"[proc_creation_lnx_process_discovery.yml, proc...","[2433, 2434, 1923, 2436, 2435, 2437, 2438, 243..."
1,adservice,"[proc_creation_lnx_process_discovery.yml, proc...","[3331, 2307, 3335, 3338, 3339, 2324, 3107, 221..."
2,external,"[posh_ps_susp_proxy_scripts.yml, proc_creation...","[11110, 3401, 19754, 19753, 1234, 9876, 9877, ..."
3,recommendationservice,"[posh_ps_susp_proxy_scripts.yml, create_stream...","[3449, 9879, 9880, 9881, 1723, 1724, 1725, 3447]"
4,frontend,"[posh_ps_susp_proxy_scripts.yml, proc_creation...","[11110, 3401, None, 3434, 19753, 9877, 3447]"


# Final Alerts Data

In [35]:
alerts_df = artifacts_with_edges_alerts.merge(processes_with_edges_alerts, on='svc', how='outer').applymap(replace_nan_with_empty_list)
alerts_df

Unnamed: 0,svc,rules_x,ports,rules_y,process_ids
0,redis-cart,[net_connection_lnx_back_connect_shell_dev.yml...,"[54112, 34050, 34054, 48936, 6379, 34030, 4892...",[],[]
1,frontend,"[posh_ps_susp_proxy_scripts.yml, win_security_...","[54401, 58508, 58510, 8080, 58512, 57236, 5852...","[posh_ps_susp_proxy_scripts.yml, proc_creation...","[11110, 3401, None, 3434, 19753, 9877, 3447]"
2,paymentservice,"[win_security_rdp_localhost_login.yml, proc_cr...","[36032, 50051, 37604, 36038, 45574, 51240, 455...",[],[]
3,cartservice,"[win_security_rdp_localhost_login.yml, proc_cr...","[46368, 55106, 53666, 49254, 46376, 53678, 551...","[proc_creation_lnx_process_discovery.yml, proc...","[2433, 2434, 1923, 2436, 2435, 2437, 2438, 243..."
4,currencyservice,"[win_security_rdp_localhost_login.yml, proc_cr...","[44000, 35042, 43688, 60528, 7000, 43992, 4370...",[],[]
5,productcatalogservice,"[win_security_rdp_localhost_login.yml, proc_cr...","[52324, 3550, 34350, 52334, 34356, 41176, 4118...",[],[]
6,checkoutservice,"[win_security_rdp_localhost_login.yml, proc_cr...","[5050, 36036, 37066, 51792, 51798, 37050, 36028]",[],[]
7,adservice,"[win_security_rdp_localhost_login.yml, proc_cr...","[57410, 48840, 57420, 9555, 48824]","[proc_creation_lnx_process_discovery.yml, proc...","[3331, 2307, 3335, 3338, 3339, 2324, 3107, 221..."
8,external,"[posh_ps_susp_proxy_scripts.yml, proc_creation...","[54401, None]","[posh_ps_susp_proxy_scripts.yml, proc_creation...","[11110, 3401, 19754, 19753, 1234, 9876, 9877, ..."
9,recommendationservice,"[azure_guest_invite_failure.yml, net_connectio...",[],"[posh_ps_susp_proxy_scripts.yml, create_stream...","[3449, 9879, 9880, 9881, 1723, 1724, 1725, 3447]"


In [36]:
# Combine 'associated_rules' columns
alerts_df['rules'] = alerts_df[['rules_x', 'rules_y']].apply(lambda x: list(set().union(*x)), axis=1)
# Drop the original 'associated_rules' columns
alerts_df = alerts_df.drop(['rules_x', 'rules_y'], axis=1)
alerts_df

Unnamed: 0,svc,ports,process_ids,rules
0,redis-cart,"[54112, 34050, 34054, 48936, 6379, 34030, 4892...",[],[net_connection_lnx_back_connect_shell_dev.yml...
1,frontend,"[54401, 58508, 58510, 8080, 58512, 57236, 5852...","[11110, 3401, None, 3434, 19753, 9877, 3447]","[posh_ps_susp_proxy_scripts.yml, proc_creation..."
2,paymentservice,"[36032, 50051, 37604, 36038, 45574, 51240, 455...",[],"[win_security_rdp_localhost_login.yml, proc_cr..."
3,cartservice,"[46368, 55106, 53666, 49254, 46376, 53678, 551...","[2433, 2434, 1923, 2436, 2435, 2437, 2438, 243...","[win_security_rdp_localhost_login.yml, proc_cr..."
4,currencyservice,"[44000, 35042, 43688, 60528, 7000, 43992, 4370...",[],"[win_security_rdp_localhost_login.yml, proc_cr..."
5,productcatalogservice,"[52324, 3550, 34350, 52334, 34356, 41176, 4118...",[],"[win_security_rdp_localhost_login.yml, proc_cr..."
6,checkoutservice,"[5050, 36036, 37066, 51792, 51798, 37050, 36028]",[],"[win_security_rdp_localhost_login.yml, proc_cr..."
7,adservice,"[57410, 48840, 57420, 9555, 48824]","[3331, 2307, 3335, 3338, 3339, 2324, 3107, 221...","[win_security_rdp_localhost_login.yml, proc_cr..."
8,external,"[54401, None]","[11110, 3401, 19754, 19753, 1234, 9876, 9877, ...","[posh_ps_susp_proxy_scripts.yml, proc_creation..."
9,recommendationservice,[],"[3449, 9879, 9880, 9881, 1723, 1724, 1725, 3447]","[posh_ps_susp_proxy_scripts.yml, create_stream..."


In [None]:
# Convert 'alerts' column to lists of strings
# alerts_df['alerts'] = alerts_df['alerts'].apply(lambda x: [str(i) for i in x])

# Save the alerts dataframe to a CSV file
alerts_df.to_csv('alerts_svc_to_rules_with_prov_data.csv', index=False)

# Lateral Movement Alerts

In [None]:
# Check if alerts_df 'alerts' column contains any lateral movement rules list
alerts_df['rules'].apply(lambda x: any(rule in x for rule in lm_rules_by_tags))