Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

SiLK IPSet output support #122

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
25 changes: 24 additions & 1 deletion README.md
Expand Up @@ -86,11 +86,17 @@ Installation on Unix and Unix-like systems is straightforward. Either clone the
sudo apt-get install python-dev python-pip python-virtualenv git
git clone https://github.com/mlsecproject/combine.git
cd combine
virtualenv venv
virtualenv --system-site-packages venv
source venv/bin/activate
pip install -r requirements.txt
```

In RHEL/Centos, use the following line instead of apt-get:

```
sudo yum install python-dev python-pip python-virtualenv git gcc
```

At this point you should be ready to run Combine.

We also have a [dockerfile](https://github.com/mlsecproject/combine/tree/master/docker) available.
Expand All @@ -113,6 +119,23 @@ set as `medium` throughout the export by default.

Thanks to [@paulpc](https://github.com/paulpc) for implementing this feature and [@mgoffin](https://github.com/mgoffin) for moral support ;).

### Exporting to SiLK

In order to use the [SiLK](http://tools.netsa.cert.org/silk/silk.html) exporting function a single
configuration item is necessary in the Baler section of the configuraiton file. Make sure you configure
the following entry correctly:

```
silk_output_path = /path/to/silk/ipsets
```

Each dataset that is collected will be created in the configured directory as a unique ipset file with
the naming convention:

```
<source host>_<list name>_<direction>.ipset
```

### Copyright Info

Permission is hereby granted, free of charge, to any person obtaining a copy
Expand Down
67 changes: 65 additions & 2 deletions baler.py
Expand Up @@ -198,6 +198,69 @@ def bale_CRITs(harvest, filename):
(total_iocs, maxThreads, time.time() - start_time))


def bale_silk(harvest, output_file):
""" Output the data as SiLK IPSet files"""
try:
from silk import IPSet
except:
raise ImportError('Failed to import IPSet from silk package. You probably need to install silk or reconfigure virtualenv with: --system-site-packages')
config = ConfigParser.SafeConfigParser()
cfg_success = config.read('combine.cfg')
if not cfg_success:
logger.error('bale_SiLK: Could not read combine.cfg.\n')
logger.error('HINT: edit combine-example.cfg and save as combine.cfg.\n')
return

if config.has_option('Baler', 'silk_output_path'):
path = config.get('Baler', 'silk_output_path')
else:
raise 'Please check the combine.cfg file for the silk_output_path field in the [Baler] section'

ipsets = {}
# Ugly regex to parse out source host and filename
# might be nice to rewrite the URL lists as config files, e.g.
# [source_name]
# url = <url>
# direction = <inbound|outbound|bidirectional>
# confidence = <rating>
source_pattern = re.compile('(?:https?|file)://(.*?)/.*?([^/.]*)')
for indicator in harvest:
if indicator[1] == 'IPv4':
# add indicator to IPSet - one per direction, per source
data = {}
data['ip'] = indicator[0]
data['reference'] = indicator[3]
data['direction'] = indicator[2]
# getting the source automatically:
source_match = source_pattern.match(data['reference'])
if source_match:
# Generate an IPSet filename based on source and direction
setname = str(source_match.group(1) + "_" + source_match.group(2) + "_" + data['direction'])

# define IPSet if not already defined
if setname not in ipsets:
ipsets[setname] = IPSet()

# add ipv4 address to ipset
ipsets[setname].add(data['ip'])
else:
logger.info("can't determine source from ref (%s) so don't know what to do with: %s[%s]" % (indicator[3], indicator[1], indicator[0]))
elif indicator[1] == "FQDN":
# Not dealing with FQDNs
True
else:
logger.info("don't yet know what to do with: %s[%s]" % (indicator[1], indicator[0]))

for ipset in ipsets:
outfile = path + "/" + ipset + ".set"
logger.info("saving IPSet to: %s" % outfile)
if os.path.isfile(outfile):
ipsets[ipset].save(outfile + ".tmp")
os.rename(outfile + ".tmp", outfile)
else:
ipsets[ipset].save(outfile)


def bale(input_file, output_file, output_format, is_regular):
config = ConfigParser.SafeConfigParser()
cfg_success = config.read('combine.cfg')
Expand All @@ -212,9 +275,9 @@ def bale(input_file, output_file, output_format, is_regular):

# TODO: also need plugins here (cf. #23)
if is_regular:
format_funcs = {'csv': bale_reg_csv, 'crits': bale_CRITs}
format_funcs = {'csv': bale_reg_csv, 'crits': bale_CRITs, 'silk': bale_silk}
else:
format_funcs = {'csv': bale_enr_csv, 'crits': bale_CRITs}
format_funcs = {'csv': bale_enr_csv, 'crits': bale_CRITs, 'silk': bale_silk}
format_funcs[output_format](harvest, output_file)

if __name__ == "__main__":
Expand Down
1 change: 1 addition & 0 deletions combine-example.cfg
Expand Up @@ -16,3 +16,4 @@ crits_username = CRITS_USERNAME
crits_api_key = CRITS_API_KEY
crits_campaign = combine
crits_maxThreads = 10
silk_output_path = /path/to/silk/ipsets
4 changes: 2 additions & 2 deletions combine.py
Expand Up @@ -15,14 +15,14 @@
logger = get_logger()

parser = argparse.ArgumentParser()
parser.add_argument('-t', '--type', help="Specify output type. Currently supported: CSV and exporting to CRITs")
parser.add_argument('-t', '--type', help="Specify output type. Currently supported: CSV, SiLK IPSets and exporting to CRITs")
parser.add_argument('-f', '--file', help="Specify output file. Defaults to harvest.FILETYPE")
parser.add_argument('-d', '--delete', help="Delete intermediate files", action="store_true")
parser.add_argument('-e', '--enrich', help="Enrich data", action="store_true")
parser.add_argument('--tiq-test', help="Output in tiq-test format", action="store_true")
args = parser.parse_args()

possible_types = ['csv', 'json','crits']
possible_types = ['csv', 'json','crits', 'silk']

if not args.type:
out_type = 'csv'
Expand Down