From 09100690b4114aaad5a022b340ef4c59ab9f62c1 Mon Sep 17 00:00:00 2001 From: doomedraven Date: Mon, 27 Oct 2025 11:07:46 +0100 Subject: [PATCH] Staging (#2733) --- changelog.md | 2 +- lib/cuckoo/common/abstracts.py | 46 +++++++++++++++++- lib/cuckoo/core/scheduler.py | 28 ++++++++++- lib/cuckoo/core/startup.py | 89 ++++++++++++++++++++++++++++++++++ requirements.txt | 6 +-- tests/data | 2 +- 6 files changed, 164 insertions(+), 9 deletions(-) diff --git a/changelog.md b/changelog.md index f85cdda4632..df51c60b18a 100644 --- a/changelog.md +++ b/changelog.md @@ -67,7 +67,7 @@ * Monitor update: WMI hooks: add handling for VT_NULL and enable WMI_Get logging ### [06.06.2025] -* Monitor updates: +* Monitor updates: * WMI hooks * Fix format string vulnerability in debugger StringsOutput() function diff --git a/lib/cuckoo/common/abstracts.py b/lib/cuckoo/common/abstracts.py index e7ff18774cd..5d8547f4271 100644 --- a/lib/cuckoo/common/abstracts.py +++ b/lib/cuckoo/common/abstracts.py @@ -40,7 +40,7 @@ from lib.cuckoo.common.path_utils import path_exists, path_mkdir from lib.cuckoo.common.url_validate import url as url_validator from lib.cuckoo.common.utils import create_folder, get_memdump_path, load_categories -from lib.cuckoo.core.database import Database, Machine, _Database +from lib.cuckoo.core.database import Database, Machine, _Database, Task try: import re2 as re @@ -270,6 +270,48 @@ def find_machine_to_service_task(self, task): """ return self.db.find_machine_to_service_task(task) + def _machine_can_service_task(self, machine: Machine, task: Task) -> bool: + """Check if a machine can service a task based on platform, arch, and tags.""" + # 1. Platform check + if task.platform and machine.platform != task.platform: + return False + + task_tags = {tag.name for tag in task.tags} + machine_tags = {tag.name for tag in machine.tags} + + # Define architecture tags. + arch_tags = {"x86", "x64"} # Add other relevant archs if needed + task_arch = next((tag for tag in task_tags if tag in arch_tags), None) + + # 2. Architecture compatibility check + if task_arch: + if machine.platform == "windows": + # 32-bit Windows can't run 64-bit tasks. + if machine.arch == "x86" and task_arch == "x64": + return False + else: # Strict matching for Linux/other platforms + # The machine's arch must equal the task's arch. + if machine.arch != task_arch: + return False + + # 3. Check remaining tags + # All tags that are NOT architecture tags must be present on the machine. + other_tags = task_tags - arch_tags + if not other_tags.issubset(machine_tags): + return False + + # For strict platforms (not Windows), the machine must explicitly have the arch tag. + if task_arch and machine.platform != "windows": + if task_arch not in machine_tags: + return False + + # For a Windows machine to run an x64 task, it must have the x64 tag. + if task_arch == "x64" and machine.platform == "windows": + if "x64" not in machine_tags: + return False + + return True + def scale_pool(self, machine: Machine) -> None: """This can be overridden in sub-classes to scale the pool of machines once one has been acquired.""" return @@ -450,7 +492,7 @@ def start(self, label): try: self.vms[label].revertToSnapshot(snapshot, flags=0) except libvirt.libvirtError as e: - raise CuckooMachineError(f"Unable to restore snapshot on virtual machine {label}") from e + raise CuckooMachineError(f"Unable to restore snapshot on virtual machine {label}. Your snapshot MUST BE in running state!") from e finally: self._disconnect(conn) else: diff --git a/lib/cuckoo/core/scheduler.py b/lib/cuckoo/core/scheduler.py index aa00a7f5d64..518b77380fd 100644 --- a/lib/cuckoo/core/scheduler.py +++ b/lib/cuckoo/core/scheduler.py @@ -177,6 +177,9 @@ def find_pending_task_to_service(self) -> Tuple[Optional[Task], Optional[Machine task: Optional[Task] = None machine: Optional[Machine] = None + # Cache available machine stats to avoid repeated DB queries within the loop. + available_tags_stats = self.get_available_machine_stats() + # Get the list of all pending tasks in the order that they should be processed. for task_candidate in self.db.list_tasks( status=TASK_PENDING, @@ -192,11 +195,32 @@ def find_pending_task_to_service(self) -> Tuple[Optional[Task], Optional[Machine try: machine = self.machinery_manager.find_machine_to_service_task(task_candidate) except CuckooUnserviceableTaskError: + requested_tags = ", ".join(tag.name for tag in task_candidate.tags) + log_message = ( + "Task #{task_id}: {status} unserviceable task because no matching machine could be found. " + "Requested tags: '{tags}'. Available machine tags: {available}. " + "Please check your machinery configuration." + ) + if self.cfg.cuckoo.fail_unserviceable: - log.info("Task #%s: Failing unserviceable task", task_candidate.id) + log.info( + log_message.format( + task_id=task_candidate.id, + status="Failing", + tags=requested_tags, + available=available_tags_stats, + ) + ) self.db.set_status(task_candidate.id, TASK_FAILED_ANALYSIS) else: - log.info("Task #%s: Unserviceable task", task_candidate.id) + log.info( + log_message.format( + task_id=task_candidate.id, + status="Unserviceable", + tags=requested_tags, + available=available_tags_stats, + ) + ) continue if machine: diff --git a/lib/cuckoo/core/startup.py b/lib/cuckoo/core/startup.py index 71d29016fe1..cdac1e19496 100644 --- a/lib/cuckoo/core/startup.py +++ b/lib/cuckoo/core/startup.py @@ -305,6 +305,7 @@ def init_modules(): # Import machine manager. import_plugin(f"modules.machinery.{cuckoo.cuckoo.machinery}") + check_snapshot_state() for category, entries in list_plugins().items(): log.debug('Imported "%s" modules:', category) @@ -316,6 +317,94 @@ def init_modules(): log.debug("\t |-- %s", entry.__name__) +def check_snapshot_state(): + """Checks the state of snapshots and machine architecture for KVM/QEMU machinery.""" + if cuckoo.cuckoo.machinery not in ("kvm", "qemu"): + return + + try: + import libvirt + from xml.etree import ElementTree + except ImportError: + raise CuckooStartupError( + "The 'libvirt-python' library is required for KVM/QEMU machinery but is not installed. " + "Please install it (e.g., 'cd /opt/CAPEv2/ ; sudo -u cape /etc/poetry/bin/poetry run extra/libvirt_installer.sh')." + ) + + machinery_config = Config(cuckoo.cuckoo.machinery) + dsn = machinery_config.get(cuckoo.cuckoo.machinery).get("dsn") + conn = None + + try: + conn = libvirt.open(dsn) + except libvirt.libvirtError as e: + raise CuckooStartupError(f"Failed to connect to libvirt with DSN '{dsn}'. Error: {e}") + + if conn is None: + raise CuckooStartupError(f"Failed to connect to libvirt with DSN '{dsn}'. Please check your configuration and libvirt service.") + + try: + for machine_name in machinery_config.get(cuckoo.cuckoo.machinery).machines.split(","): + machine_name = machine_name.strip() + if not machine_name: + continue + + try: + domain = conn.lookupByName(machine_name) + machine_config = machinery_config.get(machine_name) + + # Check for valid architecture configuration. + arch = machine_config.get("arch") + if not arch: + raise CuckooStartupError(f"Missing 'arch' configuration for VM '{machine_name}'. Please specify a valid architecture (e.g., x86, x64).") + + if arch == "x86_64": + raise CuckooStartupError( + f"Invalid architecture '{arch}' for VM '{machine_name}'. Please use 'x64' instead of 'x86_64'." + ) + + if arch != arch.lower(): + raise CuckooStartupError( + f"Invalid architecture '{arch}' for VM '{machine_name}'. Architecture must be all lowercase." + ) + + # Check snapshot state. + snapshot_name = machine_config.get("snapshot") + snapshot = None + + if snapshot_name: + snapshot = domain.snapshotLookupByName(snapshot_name) + else: + if domain.hasCurrentSnapshot(0): + snapshot = domain.snapshotCurrent(0) + snapshot_name = snapshot.getName() + log.info("No snapshot name configured for VM '%s', checking latest: '%s'", machine_name, snapshot_name) + else: + log.warning("No snapshot configured or found for VM '%s'. Skipping check.", machine_name) + continue + + xml_desc = snapshot.getXMLDesc(0) + root = ElementTree.fromstring(xml_desc) + state_element = root.find("state") + + if state_element is None or state_element.text != "running": + state = state_element.text if state_element is not None else "unknown" + raise CuckooStartupError( + f"Snapshot '{snapshot_name}' for VM '{machine_name}' is not in a 'running' state (current state: '{state}'). " + "Please ensure you take snapshots of running VMs." + ) + + except libvirt.libvirtError as e: + # It's possible a snapshot name is provided but doesn't exist, which is a config error. + snapshot_identifier = f"with snapshot '{snapshot_name}'" if snapshot_name else "" + raise CuckooStartupError( + f"Error checking snapshot state for VM '{machine_name}' {snapshot_identifier}. Libvirt error: {e}" + ) + finally: + if conn: + conn.close() + + def init_rooter(): """If required, check whether the rooter is running and whether we can connect to it.""" diff --git a/requirements.txt b/requirements.txt index 3ef05b6c3ee..a5b92ba34d6 100644 --- a/requirements.txt +++ b/requirements.txt @@ -523,9 +523,9 @@ greenlet==3.0.3 ; python_version >= "3.10" and python_version < "4.0" \ gunicorn==23.0.0 ; python_version >= "3.10" and python_version < "4.0" \ --hash=sha256:ec400d38950de4dfd418cff8328b2c8faed0edb0d517d3394e457c317908ca4d \ --hash=sha256:f014447a0101dc57e294f6c18ca6b40227a4c90e9bdb586042628030cba004ec -h11==0.16.0 ; python_version >= "3.10" and python_version < "4.0" \ - --hash=sha256:4e35b956cf45792e4caa5885e69fba00bdbc6ffafbfa020300e549b208ee5ff1 \ - --hash=sha256:63cf8bbe7522de3bf65932fda1d9c2772064ffb3dae62d55932da54b31cb6c86 +h11==0.14.0 ; python_version >= "3.10" and python_version < "4.0" \ + --hash=sha256:8f19fbbe99e72420ff35c00b27a34cb9937e902a8b810e2c88300c6f0a3b699d \ + --hash=sha256:e3fe4ac4b851c468cc8363d500db52c2ead036020723024a109d37346efaa761 httptools==0.6.4 ; python_version >= "3.10" and python_version < "4.0" \ --hash=sha256:0614154d5454c21b6410fdf5262b4a3ddb0f53f1e1721cfd59d55f32138c578a \ --hash=sha256:0e563e54979e97b6d13f1bbc05a96109923e76b901f786a5eae36e99c01237bd \ diff --git a/tests/data b/tests/data index b394a00ba36..7c38432d1cc 160000 --- a/tests/data +++ b/tests/data @@ -1 +1 @@ -Subproject commit b394a00ba3649afc69e7f22ad845a3a8a659200e +Subproject commit 7c38432d1cceb6231766227ef45b1321315d3aaf