pytorch · lucylq · Mar 19, 2026 · Mar 19, 2026 · Mar 19, 2026 · Mar 19, 2026
@@ -625,7 +625,7 @@ def get_serialized_buffer_index(
             f"Serializing constant data node {tensor} but tensor value has no bytes",
         )
         sha256_hash = hashlib.sha256(bytes(array))
-        named_key = tensor.name + "_" + sha256_hash.hexdigest()
+        named_key = sha256_hash.hexdigest()
-        named_key = sha256_hash.hexdigest()
+        # Use a per-tensor namespace in the key to avoid collisions across
+        # different PTD/PTE maps that may contain identical constant bytes.
+        tensor_name = getattr(tensor, "name", None)
+        if tensor_name is not None:
+            named_key = f"{tensor_name}:{sha256_hash.hexdigest()}"
+        else:
+            named_key = sha256_hash.hexdigest()
-        named_key = sha256_hash.hexdigest()
+        # Use a per-tensor namespace in the key to avoid collisions across
+        # different PTD/PTE maps that may contain identical constant bytes.
+        tensor_name = getattr(tensor, "name", None)
+        if tensor_name is not None:
+            named_key = f"{tensor_name}:{sha256_hash.hexdigest()}"
+        else:
+            named_key = sha256_hash.hexdigest()
 
         size = const_val.untyped_storage().nbytes()
         xnn_graph.constant_data.append(

@@ -26,22 +26,31 @@ def __init__(
         self.rank = rank
         self.alpha = alpha
         self.use_bias = use_bias
-        self.dropout = dropout
-
-        linear = nn.Linear(in_dim, out_dim, bias=use_bias)
-        weight = linear.weight
-        bias = linear.bias if self.use_bias else None
-        self.register_parameter("weight", nn.Parameter(weight))
-        self.register_parameter(
-            "bias", nn.Parameter(bias) if bias is not None else None
-        )
 
+        self.linear = nn.Linear(in_dim, out_dim, bias=use_bias)
         self.dropout = nn.Dropout(p=dropout) if dropout > 0.0 else nn.Identity()
         self.lora_a = nn.Linear(in_features=in_dim, out_features=rank, bias=False)
         self.lora_b = nn.Linear(in_features=rank, out_features=out_dim, bias=False)
 
+    @property
+    def weight(self):
+        return self.linear.weight
+
+    @property
+    def bias(self):
+        return self.linear.bias
+
+    def _load_from_state_dict(self, state_dict, prefix, *args, **kwargs):
+        # Remap keys to "linear.*"
+        for attr in ("weight", "bias"):
+            old_key = prefix + attr
+            new_key = prefix + "linear." + attr
+            if old_key in state_dict and new_key not in state_dict:
+                state_dict[new_key] = state_dict.pop(old_key)
+        super()._load_from_state_dict(state_dict, prefix, *args, **kwargs)
+
     def forward(self, x: torch.Tensor) -> torch.Tensor:
-        out = torch.nn.functional.linear(x, self.weight, self.bias)
+        out = self.linear(x)
         lora_out = self.lora_a(self.dropout(x))
         lora_out = (self.alpha / self.rank) * self.lora_b(lora_out)
 

@@ -144,30 +144,14 @@ def quantize(  # noqa C901
         from torchao.utils import unwrap_tensor_subclass
 
         def filter_fn(m, fqn):
-            # Check if it's a regular nn.Linear
-            is_linear = isinstance(m, nn.Linear)
-
-            # Check if it's a LoRALinear (which has a base weight parameter to quantize)
-            is_lora_linear = False
-            try:
-                from executorch.examples.models.llama.lora import LoRALinear
-
-                is_lora_linear = isinstance(m, LoRALinear)
-            except ImportError:
-                pass
-
-            # Check if the weight shape is compatible with group size
-            has_shape_compatible_with_group_size = False
-            if is_linear or is_lora_linear:
-                if group_size == 0:
-                    has_shape_compatible_with_group_size = True
-                else:
-                    has_shape_compatible_with_group_size = (
-                        m.weight.shape[1] % group_size == 0
-                    )
-            return (
-                is_linear or is_lora_linear
-            ) and has_shape_compatible_with_group_size
+            if not isinstance(m, nn.Linear):
+                return False
+            parts = fqn.split(".")
+            if "lora_a" in parts or "lora_b" in parts:
+                return False
+            if group_size == 0:
+                return True
+            return m.weight.shape[1] % group_size == 0
 
         weight_dtype = torch.int4 if qmode == "8da4w" else torch.int8
         quantize_(