Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions conf/default/reporting.conf.default
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,10 @@ exclude_dirs = logs, shots
# Good examples are large report formats you don't need in GCS.
exclude_files =

# Mode: zip - will submit all files and folders as unique zip archive. Useful to not spam pubsub notification on file creation.
# Mode: file - will submit one by one.
mode = zip

# Can be vm or json
auth_by = vm
# only if auth_by = json. The absolute path to your Google Cloud service account JSON key file.
Expand Down
90 changes: 59 additions & 31 deletions modules/reporting/gcs.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import os
import logging
import tempfile
import zipfile
from lib.cuckoo.common.constants import CUCKOO_ROOT
from lib.cuckoo.common.abstracts import Report
from lib.cuckoo.common.exceptions import CuckooReportError
Expand Down Expand Up @@ -40,7 +42,6 @@ def run(self, results):
)
return

# Read configuration options from gcs.conf
# Read configuration options from gcs.conf and validate them
bucket_name = self.options.get("bucket_name")
if not bucket_name:
Expand All @@ -66,8 +67,7 @@ def run(self, results):
exclude_dirs_str = self.options.get("exclude_dirs", "")
exclude_files_str = self.options.get("exclude_files", "")

# --- NEW: Parse the exclusion strings into sets for efficient lookups ---
# The `if item.strip()` ensures we don't have empty strings from trailing commas
# Parse the exclusion strings into sets for efficient lookups
exclude_dirs = {item.strip() for item in exclude_dirs_str.split(",") if item.strip()}
exclude_files = {item.strip() for item in exclude_files_str.split(",") if item.strip()}

Expand All @@ -76,6 +76,9 @@ def run(self, results):
if exclude_files:
log.debug("GCS reporting will exclude files: %s", exclude_files)

# Get the upload mode, defaulting to 'file' for backward compatibility
mode = self.options.get("mode", "file")

try:
# --- Authentication ---
log.debug("Authenticating with Google Cloud Storage...")
Expand All @@ -87,39 +90,64 @@ def run(self, results):
"The specified GCS bucket '%s' does not exist or you don't have permission to access it.", bucket_name
)

# --- File Upload ---
# Use the analysis ID as a "folder" in the bucket
analysis_id = results.get("info", {}).get("id")
if not analysis_id:
raise CuckooReportError("Could not get analysis ID from results.")

log.debug("Uploading files for analysis ID %d to GCS bucket '%s'", analysis_id, bucket_name)

# self.analysis_path is the path to the analysis results directory
# e.g., /opt/cape/storage/analyses/123/
source_directory = self.analysis_path

for root, dirs, files in os.walk(source_directory):
# We modify 'dirs' in-place to prevent os.walk from descending into them.
# This is the most efficient way to skip entire directory trees.
dirs[:] = [d for d in dirs if d not in exclude_dirs]

for filename in files:
# --- NEW: File Exclusion Logic ---
if filename in exclude_files:
log.debug("Skipping excluded file: %s", os.path.join(root, filename))
continue # Skip to the next file

local_path = os.path.join(root, filename)
relative_path = os.path.relpath(local_path, source_directory)
blob_name = f"{analysis_id}/{relative_path}"

log.debug("Uploading '%s' to '%s'", local_path, blob_name)

blob = bucket.blob(blob_name)
blob.upload_from_filename(local_path)

log.info("Successfully uploaded files for analysis %d to GCS.", analysis_id)
if mode == "zip":
self.upload_zip_archive(bucket, analysis_id, source_directory, exclude_dirs, exclude_files)
elif mode == "file":
self.upload_files_individually(bucket, analysis_id, source_directory, exclude_dirs, exclude_files)
else:
raise CuckooReportError("Invalid GCS upload mode specified: %s. Must be 'file' or 'zip'.", mode)

except Exception as e:
raise CuckooReportError("Failed to upload report to GCS: %s", str(e))
raise CuckooReportError(f"Failed to upload report to GCS: {e}") from e

def _iter_files_to_upload(self, source_directory, exclude_dirs, exclude_files):
"""Generator that yields files to be uploaded, skipping excluded ones."""
for root, dirs, files in os.walk(source_directory):
# Exclude specified directories
dirs[:] = [d for d in dirs if d not in exclude_dirs]
for filename in files:
# Exclude specified files
if filename in exclude_files:
log.debug("Skipping excluded file: %s", os.path.join(root, filename))
continue

local_path = os.path.join(root, filename)
relative_path = os.path.relpath(local_path, source_directory)
yield local_path, relative_path

def upload_zip_archive(self, bucket, analysis_id, source_directory, exclude_dirs, exclude_files):
"""Compresses and uploads the analysis directory as a single zip file."""
log.debug("Compressing and uploading files for analysis ID %d to GCS bucket '%s'", analysis_id, bucket.name)
zip_name = "%s.zip" % analysis_id
blob_name = zip_name

with tempfile.NamedTemporaryFile(delete=False, suffix=".zip") as tmp_zip_file:
tmp_zip_file_name = tmp_zip_file.name
with zipfile.ZipFile(tmp_zip_file, "w", zipfile.ZIP_DEFLATED) as archive:
for local_path, relative_path in self._iter_files_to_upload(source_directory, exclude_dirs, exclude_files):
archive.write(local_path, relative_path)

try:
log.debug("Uploading '%s' to '%s'", tmp_zip_file_name, blob_name)
blob = bucket.blob(blob_name)
blob.upload_from_filename(tmp_zip_file_name)
finally:
os.unlink(tmp_zip_file_name)
log.info("Successfully uploaded archive for analysis %d to GCS.", analysis_id)

def upload_files_individually(self, bucket, analysis_id, source_directory, exclude_dirs, exclude_files):
"""Uploads analysis files individually to the GCS bucket."""
log.debug("Uploading files for analysis ID %d to GCS bucket '%s'", analysis_id, bucket.name)
for local_path, relative_path in self._iter_files_to_upload(source_directory, exclude_dirs, exclude_files):
blob_name = f"{analysis_id}/{relative_path}"
log.debug("Uploading '%s' to '%s'", local_path, blob_name)
blob = bucket.blob(blob_name)
blob.upload_from_filename(local_path)

log.info("Successfully uploaded files for analysis %d to GCS.", analysis_id)
Loading