Skip to content

Commit

Permalink
work in progress for idaholab#463, allowing custom tags on the malcol…
Browse files Browse the repository at this point in the history
…m side
  • Loading branch information
mmguero committed May 20, 2024
1 parent 56d5cb8 commit 6f972c8
Show file tree
Hide file tree
Showing 15 changed files with 199 additions and 28 deletions.
16 changes: 11 additions & 5 deletions Dockerfiles/filebeat.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,8 @@ ENV SUPERCRONIC_VERSION "0.2.29"
ENV SUPERCRONIC_URL "https://github.com/aptible/supercronic/releases/download/v$SUPERCRONIC_VERSION/supercronic-linux-"
ENV SUPERCRONIC_CRONTAB "/etc/crontab"

ENV YQ_VERSION "4.44.1"
ENV YQ_URL "https://github.com/mikefarah/yq/releases/download/v${YQ_VERSION}/yq_linux_"

USER root

Expand All @@ -80,6 +82,8 @@ RUN export BINARCH=$(uname -m | sed 's/x86_64/amd64/' | sed 's/aarch64/arm64/')
gzip \
inotify-tools \
lzma \
jq \
jo \
openssl \
p7zip \
p7zip-full \
Expand All @@ -96,26 +100,28 @@ RUN export BINARCH=$(uname -m | sed 's/x86_64/amd64/' | sed 's/aarch64/arm64/')
python3 -m pip install --no-compile --no-cache-dir patool entrypoint2 pyunpack python-magic ordered-set supervisor watchdog && \
curl -fsSL -o /usr/local/bin/supercronic "${SUPERCRONIC_URL}${BINARCH}" && \
chmod +x /usr/local/bin/supercronic && \
curl -fsSL -o /usr/local/bin/yq "${YQ_URL}${BINARCH}" && \
chmod 755 /usr/local/bin/yq && \
apt-get -y -q --allow-downgrades --allow-remove-essential --allow-change-held-packages autoremove && \
apt-get clean && \
rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*

COPY --chmod=755 shared/bin/docker-uid-gid-setup.sh /usr/local/bin/
COPY --chmod=755 shared/bin/service_check_passthrough.sh /usr/local/bin/
COPY --from=ghcr.io/mmguero-dev/gostatic --chmod=755 /goStatic /usr/bin/goStatic
ADD filebeat/filebeat.yml /usr/share/filebeat/filebeat.yml
ADD filebeat/filebeat-logs.yml /usr/share/filebeat-logs/filebeat-logs.yml
ADD filebeat/filebeat-nginx.yml /usr/share/filebeat-nginx/filebeat-nginx.yml
ADD filebeat/filebeat-tcp.yml /usr/share/filebeat-tcp/filebeat-tcp.yml
ADD filebeat/scripts /usr/local/bin/
ADD scripts/malcolm_utils.py /usr/local/bin/
ADD shared/bin/watch_common.py /usr/local/bin/
ADD shared/bin/opensearch_status.sh /usr/local/bin/
ADD filebeat/supervisord.conf /etc/supervisord.conf
RUN for INPUT in nginx tcp; do \
RUN for INPUT in logs nginx tcp; do \
mkdir -p /usr/share/filebeat-$INPUT/data; \
chown -R root:${PGROUP} /usr/share/filebeat-$INPUT; \
cp -a /usr/share/filebeat/module /usr/share/filebeat-$INPUT/module; \
chmod 750 /usr/share/filebeat-$INPUT; \
chmod 770 /usr/share/filebeat-$INPUT; \
chmod 770 /usr/share/filebeat-$INPUT/data; \
done; \
chmod 755 /usr/local/bin/*.sh /usr/local/bin/*.py && \
Expand Down Expand Up @@ -153,11 +159,11 @@ ENV FILEBEAT_TCP_PARSE_SOURCE_FIELD $FILEBEAT_TCP_PARSE_SOURCE_FIELD
ENV FILEBEAT_TCP_PARSE_TARGET_FIELD $FILEBEAT_TCP_PARSE_TARGET_FIELD
ENV FILEBEAT_TCP_PARSE_DROP_FIELD $FILEBEAT_TCP_PARSE_DROP_FIELD
ENV FILEBEAT_TCP_TAG $FILEBEAT_TCP_TAG
ENV FILEBEAT_REGISTRY_FILE "/usr/share/filebeat/data/registry/filebeat/data.json"
ENV FILEBEAT_REGISTRY_FILE "/usr/share/filebeat-logs/data/registry/filebeat/data.json"
ENV FILEBEAT_ZEEK_DIR "/zeek/"
ENV PCAP_NODE_NAME $PCAP_NODE_NAME

VOLUME ["/usr/share/filebeat/data", "/usr/share/filebeat-nginx/data", "/usr/share/filebeat-tcp/data"]
VOLUME ["/usr/share/filebeat-logs/data", "/usr/share/filebeat-nginx/data", "/usr/share/filebeat-tcp/data"]

ENTRYPOINT ["/usr/bin/tini", \
"--", \
Expand Down
11 changes: 10 additions & 1 deletion arkime/scripts/live_capture.sh
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,15 @@ fi
# download and/or update geo updates
$ARKIME_DIR/bin/arkime_update_geo.sh

# calculate tags
TAGS_ARGS=()
if [[ -n "${EXTRA_TAGS}" ]]; then
while read EXTRA_TAG; do
TAGS_ARGS+=( -t )
TAGS_ARGS+=( "${EXTRA_TAG}" )
done < <(echo "${EXTRA_TAGS}" | tr ',' '\n') # loop over ',' separated EXTRA_TAGS values
fi

# we haven't dropUser/dropGroup'ed yet, so make sure the regular user owns the files we just touched
[[ -n ${PUID} ]] && [[ -n ${PGID} ]] && chown -f -R ${PUID}:${PGID} "${ARKIME_DIR}"/etc/ || true

Expand All @@ -65,7 +74,7 @@ touch /var/run/arkime/initialized
echo "Arkime is initialized!"
echo

"${ARKIME_DIR}"/bin/capture --insecure "${NODE_ARGS[@]}" \
"${ARKIME_DIR}"/bin/capture --insecure "${TAGS_ARGS[@]}" "${NODE_ARGS[@]}" \
-c "${ARKIME_DIR}"/etc/config.ini \
-o pcapDir=/data/pcap/arkime-live \
-o dropUser=${PUSER} \
Expand Down
1 change: 1 addition & 0 deletions arkime/supervisord.conf
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ command=python3 /opt/pcap_arkime_processor.py %(ENV_PCAP_PIPELINE_VERBOSITY)s
--arkime "%(ENV_ARKIME_DIR)s"/bin/capture-offline
--autoarkime "%(ENV_ARKIME_AUTO_ANALYZE_PCAP_FILES)s"
--forcearkime "%(ENV_ARKIME_ROTATED_PCAP)s"
--extra-tags "%(ENV_EXTRA_TAGS)s"
--autotag "%(ENV_AUTO_TAG)s"
--managed "%(ENV_MANAGE_PCAP_FILES)s"
startsecs=15
Expand Down
2 changes: 1 addition & 1 deletion config/filebeat.env.example
Original file line number Diff line number Diff line change
Expand Up @@ -33,4 +33,4 @@ FILEBEAT_TCP_PARSE_TARGET_FIELD=miscbeat
# Name of field to drop (if it exists) in events sent to the filebeat TCP input listener
FILEBEAT_TCP_PARSE_DROP_FIELD=message
# Tag to append to events sent to the filebeat TCP input listener
FILEBEAT_TCP_TAG=_malcolm_beats
FILEBEAT_TCP_TAG=_malcolm_beats
4 changes: 3 additions & 1 deletion config/upload-common.env.example
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# Comma-separated list of tags for data generated by Malcolm
EXTRA_TAGS=
# Whether or not to automatically apply tags based (on the PCAP filename) to network traffic metadata
# parsed from uploaded PCAP files
AUTO_TAG=true
Expand All @@ -14,4 +16,4 @@ PCAP_PIPELINE_POLLING=false
PCAP_PIPELINE_POLLING_ASSUME_CLOSED_SEC=10
# 'pcap-monitor' to match the name of the container providing the uploaded/captured PCAP file
# monitoring service
PCAP_MONITOR_HOST=pcap-monitor
PCAP_MONITOR_HOST=pcap-monitor
4 changes: 2 additions & 2 deletions docs/contributing-logstash.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,15 @@ $ grep -P "^( - ./zeek-logs| [\w-]+:)" docker-compose.yml | grep -B1 "zeek

Access to the `cooltool` logs must be provided in a similar fashion.

Next, tweak [`filebeat.yml`]({{ site.github.repository_url }}/blob/{{ site.github.build_revision }}/filebeat/filebeat.yml) by adding a new log input path pointing to the `cooltool` logs to send them along to the `logstash` container. This modified `filebeat.yml` will need to be reflected in the `filebeat` container via [bind mount](contributing-local-modifications.md#Bind) or by [rebuilding](development.md#Build) it.
Next, tweak [`filebeat-logs.yml`]({{ site.github.repository_url }}/blob/{{ site.github.build_revision }}/filebeat/filebeat-logs.yml) by adding a new log input path pointing to the `cooltool` logs to send them along to the `logstash` container. This modified `filebeat-logs.yml` will need to be reflected in the `filebeat` container via [bind mount](contributing-local-modifications.md#Bind) or by [rebuilding](development.md#Build) it.

Logstash can then be easily extended to add more [`logstash/pipelines`]({{ site.github.repository_url }}/blob/{{ site.github.build_revision }}/logstash/pipelines). At the time of this writing (as of the [v5.0.0 release]({{ site.github.repository_url }}/releases/tag/v5.0.0)), the Logstash pipelines basically look like this:

* input (from `filebeat`) sends logs to 1..*n* **parse pipelines**
* each **parse pipeline** does what it needs to do to parse its logs then sends them to the [**enrichment pipeline**](#LogstashEnrichments)
* the [**enrichment pipeline**]({{ site.github.repository_url }}/blob/{{ site.github.build_revision }}/logstash/pipelines/enrichment) performs common lookups to the fields that have been normalized and indexes the logs into the OpenSearch data store

In order to add a new **parse pipeline** for `cooltool` after tweaking [`filebeat.yml`]({{ site.github.repository_url }}/blob/{{ site.github.build_revision }}/filebeat/filebeat.yml) as described above, create a `cooltool` directory under [`logstash/pipelines`]({{ site.github.repository_url }}/blob/{{ site.github.build_revision }}/logstash/pipelines) that follows the same pattern as the `zeek` parse pipeline. This directory will have an input file (tiny), a filter file (possibly large), and an output file (tiny). In the filter file, be sure to set the field [`event.hash`](https://www.elastic.co/guide/en/ecs/master/ecs-event.html#field-event-hash) to a unique value to identify indexed documents in OpenSearch; the [fingerprint filter](https://www.elastic.co/guide/en/logstash/current/plugins-filters-fingerprint.html) may be useful for this.
In order to add a new **parse pipeline** for `cooltool` after tweaking [`filebeat-logs.yml`]({{ site.github.repository_url }}/blob/{{ site.github.build_revision }}/filebeat/filebeat-logs.yml) as described above, create a `cooltool` directory under [`logstash/pipelines`]({{ site.github.repository_url }}/blob/{{ site.github.build_revision }}/logstash/pipelines) that follows the same pattern as the `zeek` parse pipeline. This directory will have an input file (tiny), a filter file (possibly large), and an output file (tiny). In the filter file, be sure to set the field [`event.hash`](https://www.elastic.co/guide/en/ecs/master/ecs-event.html#field-event-hash) to a unique value to identify indexed documents in OpenSearch; the [fingerprint filter](https://www.elastic.co/guide/en/logstash/current/plugins-filters-fingerprint.html) may be useful for this.

Finally, in the [`./config/logstash.env` file](malcolm-config.md#MalcolmConfigEnvVars), set a new `LOGSTASH_PARSE_PIPELINE_ADDRESSES` environment variable to `cooltool-parse,zeek-parse,suricata-parse,beats-parse` (assuming the pipeline address from the previous step was named `cooltool-parse`) so that logs sent from `filebeat` to `logstash` are forwarded to all parse pipelines.

Expand Down
1 change: 1 addition & 0 deletions docs/malcolm-config.md
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@ Although the configuration script automates many of the following configuration
- See [**Tuning Suricata**](live-analysis.md#LiveAnalysisTuningSuricata) for other variables related to managing Suricata's performance and resource utilization.
* **`upload-common.env`** - settings for dealing with PCAP files [uploaded](upload.md#Upload) to Malcolm for analysis
- `AUTO_TAG` – if set to `true`, Malcolm will automatically create Arkime sessions and Zeek logs with tags based on the filename, as described in [Tagging](upload.md#Tagging) (default `true`)
- `EXTRA_TAGS` – a comma-separated list of default tags for data generated by Malcolm (default is an empty string)
- `PCAP_NODE_NAME` - specifies the node name to associate with network traffic metadata
* **`zeek.env`**, **`zeek-secret.env`**, **`zeek-live.env`** and **`zeek-offline.env`** - settings for [Zeek](https://www.zeek.org/index.html) and for scanning [extracted files](file-scanning.md#ZeekFileExtraction) Zeek observes in network traffic
- `EXTRACTED_FILE_CAPA_VERBOSE` – if set to `true`, all Capa rule hits will be logged; otherwise (`false`) only [MITRE ATT&CK® technique](https://attack.mitre.org/techniques) classifications will be logged
Expand Down
File renamed without changes.
2 changes: 1 addition & 1 deletion filebeat/scripts/clean-processed-folder.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
lockFilename = os.path.join(gettempdir(), '{}.lock'.format(os.path.basename(__file__)))
cleanLogSeconds = int(os.getenv('LOG_CLEANUP_MINUTES', "30")) * 60
cleanZipSeconds = int(os.getenv('ZIP_CLEANUP_MINUTES', "120")) * 60
fbRegFilename = os.getenv('FILEBEAT_REGISTRY_FILE', "/usr/share/filebeat/data/registry/filebeat/data.json")
fbRegFilename = os.getenv('FILEBEAT_REGISTRY_FILE', "/usr/share/filebeat-logs/data/registry/filebeat/data.json")

zeekDir = os.path.join(os.getenv('FILEBEAT_ZEEK_DIR', "/zeek/"), '')
zeekLiveDir = zeekDir + "live/logs/"
Expand Down
82 changes: 82 additions & 0 deletions filebeat/scripts/filebeat.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
#!/bin/bash

# Copyright (c) 2024 Battelle Energy Alliance, LLC. All rights reserved.
PATH_HOME=
PATH_CONFIG=
PATH_DATA=
CONFIG_FILE=
MODULES=
VERBOSE_FLAG=
[[ -n "${EXTRA_TAGS}" ]] || EXTRA_TAGS=
SLEEP_SEC=0

while getopts vh:c:d:f:m:t:s: opts; do
case ${opts} in
v)
VERBOSE_FLAG="-v"
set -x
;;
h)
PATH_HOME="${OPTARG}"
;;
c)
PATH_CONFIG="${OPTARG}"
;;
d)
PATH_DATA="${OPTARG}"
;;
f)
CONFIG_FILE="${OPTARG}"
;;
m)
MODULES="${OPTARG}"
;;
t)
EXTRA_TAGS="${OPTARG}"
;;
s)
SLEEP_SEC="${OPTARG}"
;;
?)
echo "script usage: $(basename "$0") [-v (verbose)] -h <path.home> -c <path.config> -d <path.data> -c <config YML> [-m <module>] [-s <sleep sec.>]" >&2
exit 1
;;
esac
done

INSTANCE_UID="$(tr -dc A-Za-z0-9 </dev/urandom 2>/dev/null | head -c 16; echo)"
(( ${#INSTANCE_UID} == 16 )) || INSTANCE_UID=$RANDOM

TMP_CONFIG_FILE="$(dirname "$(realpath "${CONFIG_FILE}")")/filebeat.$INSTANCE_UID.yml"

function cleanup {
rm -f "${TMP_CONFIG_FILE}"
}

trap cleanup EXIT

if [[ -n "${EXTRA_TAGS}" ]]; then
readarray -td '' EXTRA_TAGS_ARRAY < <(awk '{ gsub(/,/,"\0"); print; }' <<<"${EXTRA_TAGS},"); unset 'EXTRA_TAGS_ARRAY[-1]';
yq -P eval "(.\"filebeat.inputs\"[] | select(.type == \"log\").tags) += $(jo -a "${EXTRA_TAGS_ARRAY[@]}")" "${CONFIG_FILE}" > "${TMP_CONFIG_FILE}"
else
cp "${CONFIG_FILE}" "${TMP_CONFIG_FILE}"
fi

MODULES_ARGS=()
if [[ -n "${MODULES}" ]]; then
readarray -td '' MODULES_ARRAY < <(awk '{ gsub(/,/,"\0"); print; }' <<<"${MODULES},")
unset 'MODULES_ARRAY[-1]'
MODULES_ARGS=('-modules' "${MODULES_ARRAY[@]}")
fi

sleep ${SLEEP_SEC}

/usr/local/bin/docker-entrypoint -e --strict.perms=false \
--path.home "${PATH_HOME}" \
--path.config "${PATH_CONFIG}" \
--path.data "${PATH_DATA}" \
-c "${TMP_CONFIG_FILE}" "${MODULES_ARGS[@]}"

popd >/dev/null 2>&1


30 changes: 17 additions & 13 deletions filebeat/supervisord.conf
Original file line number Diff line number Diff line change
Expand Up @@ -18,26 +18,30 @@ supervisor.rpcinterface_factory=supervisor.rpcinterface:make_main_rpcinterface
serverurl=unix:///tmp/supervisor.sock

[program:filebeat]
command=/usr/local/bin/docker-entrypoint -e --strict.perms=false
command=/usr/local/bin/filebeat.sh
-h /usr/share/filebeat-logs
-c /usr/share/filebeat-logs
-d /usr/share/filebeat-logs/data
-f /usr/share/filebeat-logs/filebeat-logs.yml
user=%(ENV_PUSER)s
startsecs=0
startretries=0
autostart=true
autorestart=true
stopasgroup=true
killasgroup=true
directory=/usr/share/filebeat
directory=/usr/share/filebeat-logs
stdout_logfile=/dev/fd/1
stdout_logfile_maxbytes=0
redirect_stderr=true

[program:filebeat-nginx]
command=bash -c "/usr/local/bin/opensearch_status.sh -t malcolm_beats_template && /usr/local/bin/docker-entrypoint -e --strict.perms=false \
--path.home /usr/share/filebeat-nginx \
--path.config /usr/share/filebeat-nginx \
--path.data /usr/share/filebeat-nginx/data \
-c /usr/share/filebeat-nginx/filebeat-nginx.yml \
--modules nginx"
command=bash -c "/usr/local/bin/opensearch_status.sh -t malcolm_beats_template && /usr/local/bin/filebeat.sh \
-h /usr/share/filebeat-nginx \
-c /usr/share/filebeat-nginx \
-d /usr/share/filebeat-nginx/data \
-f /usr/share/filebeat-nginx/filebeat-nginx.yml \
-m nginx"
user=%(ENV_PUSER)s
autostart=%(ENV_NGINX_LOG_ACCESS_AND_ERRORS)s
autorestart=%(ENV_NGINX_LOG_ACCESS_AND_ERRORS)s
Expand All @@ -51,11 +55,11 @@ stdout_logfile_maxbytes=0
redirect_stderr=true

[program:filebeat-tcp]
command=bash -c "/usr/local/bin/opensearch_status.sh -t malcolm_beats_template && /usr/local/bin/docker-entrypoint -e --strict.perms=false \
--path.home /usr/share/filebeat-tcp \
--path.config /usr/share/filebeat-tcp \
--path.data /usr/share/filebeat-tcp/data \
-c /usr/share/filebeat-tcp/filebeat-tcp.yml"
command=bash -c "/usr/local/bin/opensearch_status.sh -t malcolm_beats_template && /usr/local/bin/filebeat.sh \
-h /usr/share/filebeat-tcp \
-c /usr/share/filebeat-tcp \
-d /usr/share/filebeat-tcp/data \
-f /usr/share/filebeat-tcp/filebeat-tcp.yml"
user=%(ENV_PUSER)s
autostart=%(ENV_FILEBEAT_TCP_LISTEN)s
autorestart=%(ENV_FILEBEAT_TCP_LISTEN)s
Expand Down
2 changes: 1 addition & 1 deletion scripts/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ fi

# we're going to do some validation that some things got pulled/built correctly
FILES_IN_IMAGES=(
"/usr/share/filebeat/filebeat.yml;filebeat-oss"
"/usr/share/filebeat-logs/filebeat-logs.yml;filebeat-oss"
"/var/www/upload/filepond/dist/filepond.js;file-upload"
"/opt/freq_server/freq_server.py;freq"
"/usr/local/bin/capa;file-monitor"
Expand Down
Loading

0 comments on commit 6f972c8

Please sign in to comment.