In [240]:
# Default
import os
from pprint import pprint
import pandas as pd

# OTX API
from OTXv2 import OTXv2
from OTXv2 import IndicatorTypes


In [320]:
# Initialize the OTXv2 API
otx = OTXv2(os.environ['OTXv2'])

# Get all the pulses for the specief query
ta505_vault = otx.search_pulses(query='TA505')

In [242]:
# Template for categorizing indicators
vault_types_505 = {
    'ipv4': [],
    'hostname': [],
    'domain': [],
    'url': [],
    'filehash-sha256': [],
    'filehas-sha1': [],
    'filehash-md5': [],
    'yara': []
}

# Fill in the template based on the indicator type
for pulses in ta505_vault['results']:
    for indicator in pulses['indicators']:
        if 'ipv4' in indicator['type'].lower():
            vault_types_505['ipv4'].append(indicator)
        elif 'hostname' in indicator['type'].lower():
            vault_types_505['hostname'].append(indicator)
        elif 'domain' in indicator['type'].lower():
            vault_types_505['domain'].append(indicator)        
        elif 'url' in indicator['type'].lower():
            vault_types_505['url'].append(indicator)
        elif 'filehash-sha256' in indicator['type'].lower():
            vault_types_505['filehash-sha256'].append(indicator)
        elif 'filehas-sha1' in indicator['type'].lower():
            vault_types_505['filehas-sha1'].append(indicator)
        elif 'yara' in indicator['type'].lower():
            vault_types_505['yara'].append(indicator)
        elif 'filehash-md5' in indicator['type'].lower():
            vault_types_505['filehash-md5'].append(indicator)

In [243]:
# Load the data from local machine (unfortunately the .csv's cannot be shared)
maastricht_df = []
maastricht_df.append(pd.read_csv('data/samples_2020-09-28T04_55_22Z-TA505.csv'))
maastricht_df.append(pd.read_csv('data/samples_2020-09-28T04_55_36Z-TA505.csv'))
maastricht_df.append(pd.read_csv('data/samples_2020-09-28T04_56_06Z-TA505.csv'))

In [244]:
# Template for categorizing indicators
maastricht_types = {
    'ipv4': [],
    'hostname': [],
    'domain': [],
    'url': [],
    'filehash-sha256': [],
    'filehas-sha1': [],
    'yara': []
}
# Fill in the template based on the indicator type - only the url and domain available. 
for df in maastricht_df: 
    maastricht_types['url'].extend(df[df['Type'].str.contains('URL')]['Value'].values)
    maastricht_types['domain'].extend(df[df['Type'].str.contains('HEADER_FROM')]['Value'].values)

In [287]:
# Make TA505 data from Delft in similar format with AlientVault data
maastricht_df_cleaned = [] 
for maastricht_hostname in maastricht_types['domain']:
    _char_init = maastricht_hostname.find('@') + 1
    _char_end = maastricht_hostname.find('>')
    maastricht_df_cleaned.append(maastricht_hostname[_char_init:_char_end])

In [289]:
# Get similar domain names.
_similar_505 = []
for maastricht_hostname in maastricht_df_cleaned:
    for vault_url in vault_types_505['domain']:
        if maastricht_hostname.lower() in vault_url['indicator'].lower():
            _similar_505.append(vault_url['indicator'])

In [291]:
print(f"Total 'Domain' based TA505 IOCs from AlienVault: {len(vault_types_505['domain'])}")
print(f"Total 'Domain' based TA505 IOCs from Delft: {len(maastricht_df_cleaned)}")
print(f"Number of Similar IOCs: {len(_similar_505)}")
print(_similar_505)

Total 'Domain' based TA505 IOCs from AlienVault: 328
Total 'Domain' based TA505 IOCs from Delft: 530
Number of Similar IOCs: 0
[]


In [292]:
# Get similar urls. 
_similar_505 = []
for maastricht_hostname in maastricht_types['url']:
    for vault_url in vault_types_505['url']:
        if maastricht_hostname.lower() in vault_url['indicator'].lower():
            _similar_505.append(vault_url['indicator'])

In [293]:
print(f"Total 'url' based TA505 IOCs from AlienVault: {len(vault_types_505['url'])}")
print(f"Total 'url' based TA505 IOCs from Delft: {len(maastricht_types['url'])}")
print(f"Number of Similar IOCs: {len(_similar_505)}")
print(_similar_505)

Total 'url' based TA505 IOCs from AlienVault: 419
Total 'url' based TA505 IOCs from Delft: 1198
Number of Similar IOCs: 0
[]


In [300]:
# Get all the pulses for the specief query
ta542_vault = otx.search_pulses(query='TA542')


vault_types_542 = {
    'ipv4': [],
    'hostname': [],
    'domain': [],
    'url': [],
    'filehash-sha256': [],
    'filehas-sha1': [],
    'filehash-md5': [],
    'yara': []
}

# Fill in the template based on the indicator type
for pulses in ta542_vault['results']:
    for indicator in pulses['indicators']:
        if 'ipv4' in indicator['type'].lower():
            vault_types_542['ipv4'].append(indicator)
        elif 'hostname' in indicator['type'].lower():
            vault_types_542['hostname'].append(indicator)
        elif 'domain' in indicator['type'].lower():
            vault_types_542['domain'].append(indicator)        
        elif 'url' in indicator['type'].lower():
            vault_types_542['url'].append(indicator)
        elif 'filehash-sha256' in indicator['type'].lower():
            vault_types_542['filehash-sha256'].append(indicator)
        elif 'filehas-sha1' in indicator['type'].lower():
            vault_types_542['filehas-sha1'].append(indicator)
        elif 'yara' in indicator['type'].lower():
            vault_types_542['yara'].append(indicator)
        elif 'filehash-md5' in indicator['type'].lower():
            vault_types_542['filehash-md5'].append(indicator)

In [301]:
# Read dataset for TA542
delft_df = []
# Load the data from local machine (unfortunately the .csv's cannot be shared)
delft_df.append(pd.read_csv('data/samples_2020-09-28T04_57_10Z-TA542 (Emotet).csv'))

In [310]:
delft_types = {
    'ipv4': [],
    'hostname': [],
    'domain': [],
    'url': [],
    'filehash-sha256': [],
    'filehas-sha1': [],
    'yara': []
}


for df in delft_df: 
    delft_types['url'].extend(df[df['Type'].str.contains('URL')]['Value'].values)
    delft_types['domain'].extend(df[df['Type'].str.contains('HEADER_FROM')]['Value'].values)

In [311]:
# Make TA542 data from Delft in similar format with AlientVault data
delft_df_cleaned = [] 
for delft_hostname in delft_types['domain']:
    _char_init = delft_hostname.find('@') + 1
    _char_end = delft_hostname.find('>')
    delft_df_cleaned.append(delft_hostname[_char_init:_char_end])

In [319]:
_similar_hosts = []
for hostname_delft in delft_df_cleaned:
    for hostname_vault in vault_types_505['hostname']:
        if hostname_delft.lower() in hostname_vault['indicator'].lower():
            if hostname_vault['indicator'].lower() not in _similar_hosts:
                _similar_hosts.append(hostname_vault['indicator'])
                break

print(f"Total 'domain' based TA505 IOCs from AlienVault {len(vault_types_505['url'])}")
print(f"Total 'domain' based TA542 IOCs from Delft: {len(delft_df_cleaned)}")
print(f"Number of Similar IOCs: {len(_similar_hosts)}")
print(_similar_hosts)

Total 'domain' based TA505 IOCs from AlienVault 419
Total 'domain' based TA542 IOCs from Delft: 79
Number of Similar IOCs: 2
['www.chimachinenow.com', 'www.highlandfamily.org']
