Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 3 additions & 11 deletions conf/default/distributed.conf.default
Original file line number Diff line number Diff line change
Expand Up @@ -32,16 +32,8 @@ fstab_socket = /tmp/cape-fstab

# Google Cloud Platform
[GCP]
# Note: Further GCP configuration (project_id, zones, autodiscovery, etc.)
# MUST be done in conf/gcp.conf. This section only controls the enabled state.
enabled = no
# Comma separated list of zones
zones = ""
project_id = ""
# rest usage instead of GCP python client
# https://cloud.google.com/docs/authentication/rest
# gcloud auth print-access-token
token = ""
# Seconds between try to discoverd new instances
autodiscovery = 600
# Instances should start with following name pattern
instance_name = cape-server


110 changes: 46 additions & 64 deletions conf/default/gcp.conf.default
Original file line number Diff line number Diff line change
@@ -1,66 +1,48 @@
[gcp]
# Specify the Google Cloud Zone (for example, europe-north2-a). This is case-sensitive
zone = <zone_name>

# Global Environment Settings
# Specify the project identifier
project = <project_id>

# pubsub
subscription_id =

# Running in a GCP environment. If true, the Compute Engine credentials will be used
running_in_gcp = true

# Specify the path to the service account key file. If not specified, the default service account will be used
service_account_path =

# Specify a comma-separated list of available machines to be used.
# Each machine will be represented by the instance-name (for example, cape-server-windows).
# For each specified instance-name you have to define a dedicated section containing the details
# on the respective machine. (E.g. cape-server-windows,cape-server-linux)
# For better performance, it is recommended to leave this empty and set autoscale = yes.
machines =

[cape-server-linux]
# Specify the label name.
# Label would be the instance-name of the current machine as specified in your GCP account.
label = cape-server-linux

# Specify the operating system platform used by current machine
# [windows/darwin/linux].
platform = linux

# Set the machine architecture
# x64 or x86
arch = x64

# Specify the IP address of the current virtual machine. Make sure that the
# IP address is valid and that the host machine is able to reach it. If not,
# the analysis will fail.
# ip =

# (Optional) Specify the name of the network interface that should be used
# when dumping network traffic from this machine with tcpdump. If specified,
# overrides the default interface specified above.
# Example (eth0 is the interface name):
# interface =

# (Optional) Specify the IP of the Result Server, as your virtual machine sees it.
# The Result Server will always bind to the address and port specified in cuckoo.conf,
# however you could set up your virtual network to use NAT/PAT, so you can specify here
# the IP address for the Result Server as your machine sees it. If you don't specify an
# address here, the machine will use the default value from cuckoo.conf.
# NOTE: if you set this option you have to set result server IP to 0.0.0.0 in cuckoo.conf.
# Example:
# resultserver_ip =

# (Optional) Specify the port for the Result Server, as your virtual machine sees it.
# The Result Server will always bind to the address and port specified in cuckoo.conf,
# however you could set up your virtual network to use NAT/PAT, so you can specify here
# the port for the Result Server as your machine sees it. If you don't specify a port
# here, the machine will use the default value from cuckoo.conf.
# resultserver_port =

# (Optional) Set your own tags. These are comma separated and help to identify
# specific VMs. You can run samples on VMs with tag you require.
# tags =
project =
# Specify the Google Cloud Zone (for example, europe-north2-a). This is case-sensitive
zone =
# Authentication method: vm (instance credentials), json (key file), or token (manual token)
auth_by = vm
# Path to the service account key file (required if auth_by = json)
# service_account_path = data/gcp-credentials.json
# Bearer token for REST API usage (optional)
# token =

[samples_pubsub]
# GCP Pub/Sub Sample Processing Service
enabled = no
# Pub/Sub subscription name
subscription_id =
# GCS bucket for sample downloads
samples_bucket = sandbox-samples-unique
# Concurrent message limit
max_messages = 5
# Lease duration in seconds
lease_duration = 1800

[distributed]
# Worker Node Autodiscovery
enabled = no
# Seconds between try to discover new instances
autodiscovery_interval = 600
# Instances should start with following name prefix
instance_name_pattern = cape-server
# Comma separated list of zones to scan (defaults to global zone if empty)
zones =

[reporting]
# Analysis Results Upload to GCS
enabled = no
# The name of your GCS bucket where reports will be uploaded
results_bucket =
# Upload mode: zip (single archive per task) or file (individual files)
mode = zip
# Delete local report after successful upload to GCS
delete_after_upload = no
# Comma-separated list of DIRECTORY names to exclude (e.g., logs, shots)
exclude_dirs = logs, shots
# Comma-separated list of exact FILENAMES to exclude
exclude_files =
24 changes: 2 additions & 22 deletions conf/default/reporting.conf.default
Original file line number Diff line number Diff line change
Expand Up @@ -226,27 +226,7 @@ enabled = no

# Google Cloud Storage
[gcs]
# Note: Further GCS configuration (bucket name, credentials, mode, etc.)
# MUST be done in conf/gcp.conf. This section only controls the enabled state.
enabled = no
# The name of your Google Cloud Storage bucket where files will be uploaded.
bucket_name = your-gcs-bucket-name

# Comma-separated list of DIRECTORY names to exclude from the upload.
# Good examples are 'shots' (contains all screenshots) or 'memory' (for full memory dumps).
exclude_dirs = logs, shots

# Comma-separated list of exact FILENAMES to exclude from the upload.
# Good examples are large report formats you don't need in GCS.
exclude_files =

# Mode: zip - will submit all files and folders as unique zip archive. Useful to not spam pubsub notification on file creation.
# Mode: file - will submit one by one.
mode = zip

# Can be vm or json
auth_by = vm
# only if auth_by = json. The absolute path to your Google Cloud service account JSON key file.
# This file is required for authentication.
credentials_path = data/gcp-credentials.json

# Delete local report after successful upload to GCS
delete_after_upload = no
41 changes: 38 additions & 3 deletions lib/cuckoo/common/demux.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,9 +168,35 @@ def is_valid_package(package: str) -> bool:
return any(ptype in package for ptype in VALID_PACKAGES)


# list of junk extensions to skip
JUNK_EXTENSIONS = {
b".yar",
b".yara",
b".md",
b".txt",
b".yml",
b".yaml",
b".gitignore",
b".gitattributes",
b".gitmodules",
}

JUNK_NAMES = {b"license", b"copying", b"makefile", b"authors", b"readme"}


# ToDo fix return type
def _sf_children(child: sfFile): # -> bytes:
path_to_extract = ""
filename_lower = child.filename.lower()

# Skip junk files
if any(filename_lower.endswith(ext) for ext in JUNK_EXTENSIONS):
return (b"", child.platform, child.magic, child.filesize)
if any(name in filename_lower for name in JUNK_NAMES):
return (b"", child.platform, child.magic, child.filesize)
if b".github/" in filename_lower or b".git/" in filename_lower:
return (b"", child.platform, child.magic, child.filesize)

_, ext = os.path.splitext(child.filename)
ext = ext.lower()
if (
Expand All @@ -191,7 +217,7 @@ def _sf_children(child: sfFile): # -> bytes:
_ = path_write_file(path_to_extract, child.contents)
except Exception as e:
log.exception(e)
return (path_to_extract.encode(), child.platform, child.magic, child.filesize)
return (path_to_extract.encode(), child.platform, child.magic or "", child.filesize)


# ToDo fix typing need to add str as error msg
Expand All @@ -211,7 +237,7 @@ def demux_sflock(filename: bytes, options: str, check_shellcode: bool = True):

if unpacked.package in whitelist_extensions:
file = File(filename)
magic_type = file.get_type()
magic_type = file.get_type() or ""
platform = file.get_platform()
file_size = file.get_size()
return [[filename, platform, magic_type, file_size]], ""
Expand Down Expand Up @@ -246,6 +272,15 @@ def demux_sample(filename: bytes, package: str, options: str, use_sflock: bool =
If file is a ZIP, extract its included files and return their file paths
If file is an email, extracts its attachments and return their file paths (later we'll also extract URLs)
"""
# Skip junk files
filename_bytes = filename if isinstance(filename, bytes) else filename.encode()
filename_lower_bytes = filename_bytes.lower()
if any(filename_lower_bytes.endswith(ext) for ext in JUNK_EXTENSIONS) or any(
name in filename_lower_bytes for name in JUNK_NAMES
):
filename_str = filename.decode(errors="ignore") if isinstance(filename, bytes) else filename
return [], [{"junk_filter": f"File {filename_str} skipped by junk filter"}]

# sflock requires filename to be bytes object for Py3
# TODO: Remove after checking all uses of demux_sample use bytes ~TheMythologist
if isinstance(filename, str) and use_sflock:
Expand Down Expand Up @@ -281,7 +316,7 @@ def demux_sample(filename: bytes, package: str, options: str, use_sflock: bool =
filename = tmp_path

# don't try to extract from office docs
magic = File(filename).get_type()
magic = File(filename).get_type() or ""
# if file is an Office doc and password is supplied, try to decrypt the doc
if "Microsoft" in magic:
pass
Expand Down
Loading
Loading