runpod · deanq · Apr 17, 2026 · Apr 16, 2026 · Apr 16, 2026 · Apr 16, 2026
diff --git a/src/runpod_flash/core/resources/serverless.py b/src/runpod_flash/core/resources/serverless.py
@@ -433,6 +433,13 @@ def config_hash(self) -> str:
         # networkVolumeId is derived from volume deployment, not user config
         config_dict.pop("networkVolumeId", None)
 
+        # instanceIds=[] (API response) and instanceIds=None (user default) are
+        # semantically identical — normalize both to absent so they don't cause
+        # false drift on GPU endpoints where instanceIds is always None locally
+        # but the RunPod API may return [] after deployment.
+        if not config_dict.get("instanceIds"):
+            config_dict.pop("instanceIds", None)
+
         # Convert to JSON string for hashing
         config_str = json.dumps(config_dict, sort_keys=True)
         hash_obj = hashlib.md5(f"{resource_type}:{config_str}".encode())
@@ -615,11 +622,10 @@ def _validate_cpu_disk_size(self) -> None:
 
     def _create_new_template(self) -> PodTemplate:
         """Create a new PodTemplate with standard configuration."""
-        return PodTemplate(
-            name=self.resource_id,
-            imageName=self.imageName,
-            env=KeyValuePair.from_dict(self.env or {}),
-        )
+        kwargs: dict = {"name": self.resource_id, "imageName": self.imageName}
+        if self.env is not None:
+            kwargs["env"] = KeyValuePair.from_dict(self.env)
+        return PodTemplate(**kwargs)
 
     def _configure_existing_template(self) -> None:
         """Configure an existing template with necessary overrides."""

diff --git a/src/runpod_flash/core/resources/serverless_cpu.py b/src/runpod_flash/core/resources/serverless_cpu.py
@@ -161,11 +161,10 @@ def config_hash(self) -> str:
 
     def _create_new_template(self) -> PodTemplate:
         """Create a new PodTemplate with CPU-appropriate disk sizing."""
-        template = PodTemplate(
-            name=self.resource_id,
-            imageName=self.imageName,
-            env=KeyValuePair.from_dict(self.env or {}),
-        )
+        kwargs: dict = {"name": self.resource_id, "imageName": self.imageName}
+        if self.env is not None:
+            kwargs["env"] = KeyValuePair.from_dict(self.env)
+        template = PodTemplate(**kwargs)
         # Apply CPU-specific disk sizing
         self._apply_cpu_disk_sizing(template)
         return template

diff --git a/tests/unit/resources/test_serverless.py b/tests/unit/resources/test_serverless.py
@@ -3389,3 +3389,96 @@ async def test_runsync_default_timeout_when_none(self):
         mock_rp_client.post.assert_called_once_with(
             "ep-none/runsync", {"input": "data"}, timeout=60
         )
+
+
+class TestInstanceIdsFalseDrift:
+    """Regression tests for instanceIds=[] vs None causing false drift on GPU endpoints.
+
+    The RunPod API may return instanceIds=[] for GPU endpoints that were deployed
+    without any instanceIds restriction. Locally the user never sets instanceIds,
+    so it stays None. Without normalization, exclude_none=True removes None but
+    keeps [] — producing a different hash and triggering a spurious update (new
+    release / cold start) on every subsequent run.
+    """
+
+    def test_instance_ids_none_and_empty_list_hash_equal(self):
+        """GPU endpoint: instanceIds=None and instanceIds=[] produce the same hash."""
+        s_none = ServerlessResource(name="test", instanceIds=None)
+        s_empty = ServerlessResource(name="test", instanceIds=[])
+
+        assert s_none.config_hash == s_empty.config_hash
+
+    def test_instance_ids_none_and_absent_hash_equal(self):
+        """GPU endpoint: not setting instanceIds at all equals instanceIds=None."""
+        s_absent = ServerlessResource(name="test")
+        s_none = ServerlessResource(name="test", instanceIds=None)
+
+        assert s_absent.config_hash == s_none.config_hash
+
+    def test_instance_ids_non_empty_still_detected_as_drift(self):
+        """Non-empty instanceIds must still change the hash so real drift is caught."""
+        s_no_restriction = ServerlessResource(name="test")
+        s_restricted = ServerlessResource(
+            name="test", instanceIds=[CpuInstanceType.CPU3C_2_4]
+        )
+
+        assert s_no_restriction.config_hash != s_restricted.config_hash
+
+
+class TestCreateNewTemplateEnvFieldSet:
+    """Regression tests for _create_new_template() spuriously marking 'env' as set.
+
+    When env=None (default), the old code passed env=[] explicitly to PodTemplate,
+    which put 'env' into Pydantic's model_fields_set. The update() logic then saw
+    has_explicit_template_env=True and set env_needs_update=True, causing
+    _inject_runtime_template_vars() to run and RUNPOD_API_KEY to oscillate between
+    being added and removed on every run.
+    """
+
+    def test_create_new_template_env_not_in_fields_set_when_env_none(self):
+        """When self.env is None, 'env' must NOT appear in template.model_fields_set."""
+        resource = ServerlessEndpoint(name="test", imageName="test:latest")
+        assert resource.env is None
+
+        template = resource._create_new_template()
+
+        assert "env" not in template.model_fields_set
+
+    def test_create_new_template_env_in_fields_set_when_env_empty_dict(self):
+        """When self.env is explicitly {}, 'env' MUST appear in template.model_fields_set.
+
+        env={} is an intentional explicit override (clear all env vars), distinct from
+        env=None (default, no opinion). Using 'is not None' preserves this distinction.
+        """
+        resource = ServerlessEndpoint(
+            name="test",
+            imageName="test:latest",
+            env={},
+        )
+        assert resource.env == {}
+
+        template = resource._create_new_template()
+
+        assert "env" in template.model_fields_set
+
+    def test_create_new_template_env_in_fields_set_when_env_set(self):
+        """When self.env is populated, 'env' MUST appear in template.model_fields_set."""
+        resource = ServerlessEndpoint(
+            name="test",
+            imageName="test:latest",
+            env={"MY_VAR": "value"},
+        )
+
+        template = resource._create_new_template()
+
+        assert "env" in template.model_fields_set
+        assert any(kv.key == "MY_VAR" for kv in template.env)
+
+    def test_create_new_template_env_not_in_fields_set_cpu_endpoint(self):
+        """CpuServerlessEndpoint: same fix applies — env=None must not set 'env' field."""
+        resource = CpuServerlessEndpoint(name="test", imageName="test:latest")
+        assert resource.env is None
+
+        template = resource._create_new_template()
+
+        assert "env" not in template.model_fields_set