## Entity - Regexes
- Account - SID, email, NTAcct
- Host - DNS
- IP address - IP address (v4, v6)
- URL - URL
- Azure resource - ResourceID
- Registry key - Registry Key
- Domain name (DNS) - DNS
- File - file path
- File hash - MD5, SHA1, SHA256

In [53]:
# previously defined regexes

email_rgx = r"^[\w\d._%+-]+@(?:[\w\d-]+\.)+[\w]{2,}$"
resourceId_rgx = r"^(\/subscriptions\/)[^/]*(\/resourcegroups\/).*$"
ntacct_rgx = r"^[^\/:*?\"<>|]{2,15}\\[^\/:*?\"<>|]{2,15}$"

In [54]:
import json

def writeToJSONFile(data, path='./', fileName='regexes'):
    filePathNameWExt = './' + path + '/' + fileName + '.json'
    with open(filePathNameWExt, 'w') as fp:
        json.dump(data, fp)
        
# regexes from the IoCExtract library
data = {'DNS_REGEX': {'regex': r'^((?=[a-z0-9-]{1,63}\\.)[a-z0-9]+(-[a-z0-9]+)*\\.){1,126}[a-z]{2,63}$', 'priority': '1', 'entity': 'host'},
        'IPV4_REGEX': {'regex': r'^(?P<ipaddress>(?:[0-9]{1,3}\\.){3}[0-9]{1,3})$', 'priority': '0', 'entity': 'ipaddress'},
        'IPV6_REGEX': {'regex': r'^(?<![:.\\w])(?:[A-F0-9]{0,4}:){2,7}[A-F0-9]{0,4}(?![:.\\w])$', 'priority': '0', 'entity': 'ipaddress'},
        'URL_REGEX': {'regex': r'^\n            (?P<protocol>(https?|ftp|telnet|ldap|file)://)\n            (?P<userinfo>([a-z0-9-._~!$&\\\'()*+,;=:]|%[0-9A-F]{2})*@)?\n            (?P<host>([a-z0-9-._~!$&\\\'()*+,;=]|%[0-9A-F]{2})*)\n            (:(?P<port>\\d*))?\n            (/(?P<path>([^?\\#"<>\\s]|%[0-9A-F]{2})*/?))?\n            (\\?(?P<query>([a-z0-9-._~!$&\'()*+,;=:/?@]|%[0-9A-F]{2})*))?\n            (\\#(?P<fragment>([a-z0-9-._~!$&\'()*+,;=:/?@]|%[0-9A-F]{2})*))?$', 'priority': '0', 'entity': 'url'},
        'MD5_REGEX': {'regex': r'^(?:^|[^A-Fa-f0-9])(?P<hash>[A-Fa-f0-9]{32})(?:$|[^A-Fa-f0-9])$', 'priority': '1', 'entity': 'hash'},
        'SHA1_REGEX': {'regex': r'^(?:^|[^A-Fa-f0-9])(?P<hash>[A-Fa-f0-9]{40})(?:$|[^A-Fa-f0-9])$', 'priority': '1', 'entity': 'hash'},
        'SHA256_REGEX': {'regex': r'^(?:^|[^A-Fa-f0-9])(?P<hash>[A-Fa-f0-9]{64})(?:$|[^A-Fa-f0-9])$', 'priority': '1', 'entity': 'hash'},
        'LXPATH_REGEX': {'regex': r'^(?P<root>/+||[.]+)\n            (?P<folder>/(?:[^\\\\/:*?<>|\\r\\n]+/)*)\n            (?P<file>[^/\\0<>|\\r\\n ]+)$', 'priority': '2', 'entity': 'file'},
        'WINPATH_REGEX': {'regex': r'^\n            (?P<root>[a-z]:|\\\\\\\\[a-z0-9_.$-]+||[.]+)\n            (?P<folder>\\\\(?:[^\\/:*?"\\\'<>|\\r\\n]+\\\\)*)\n            (?P<file>[^\\\\/*?""<>|\\r\\n ]+)$', 'priority': '1', 'entity': 'file'}}

writeToJSONFile(data)

In [55]:
def appendToJSONFile(data, fileName='regexes.json'):
    with open (fileName, 'w') as f:
        json.dump(data, f)
        
with open ('regexes.json') as json_file:
    data = json.load(json_file)
    y = {'EMAIL_REGEX': {'regex': email_rgx, 'priority': '0', 'entity': 'account'},
         'RESOURCEID_REGEX': {'regex': resourceId_rgx, 'priority': '0', 'entity': 'azureresource'},
         'NTACCT_REGEX': {'regex': ntacct_rgx, 'priority': '0', 'entity': 'account'}}
    data.update(y)
    
appendToJSONFile(data)

In [56]:
# SID (account security identifier) regex
# S-1-5-18

sid_rgx = r"^S-[\d]+-[\d]+-[\d]+$"

with open ('regexes.json') as json_file:
    data = json.load(json_file)
    y = {'SID_REGEX': {'regex': sid_rgx, 'priority': '1', 'entity': 'account'}}
    data.update(y)
    
appendToJSONFile(data)

In [57]:
# Registry key regex
# HKEY_LOCAL_MACHINE\SOFTWARE\Microsoft\Windows NT\CurrentVersion\Console\Nls

regkey_rgx = r"""("|'|\s)?(?P<hive>HKLM|HKCU|HKCR|HKU|HKEY_(LOCAL_MACHINE|USERS|CURRENT_USER|CURRENT_CONFIG|CLASSES_ROOT))(?P<key>(\\[^"'\\/]+){1,}\\?)("|'|\s)?"""

with open ('regexes.json') as json_file:
    data = json.load(json_file)
    y = {'REGKEY_REGEX': {'regex': regkey_rgx, 'priority': '1', 'entity': 'registrykey'}}
    data.update(y)
    
appendToJSONFile(data)