Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion backends/xnnpack/operators/node_visitor.py
Original file line number Diff line number Diff line change
Expand Up @@ -625,7 +625,7 @@ def get_serialized_buffer_index(
f"Serializing constant data node {tensor} but tensor value has no bytes",
)
sha256_hash = hashlib.sha256(bytes(array))
named_key = tensor.name + "_" + sha256_hash.hexdigest()
named_key = sha256_hash.hexdigest()
Copy link

Copilot AI Mar 19, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Changing named_key to only the SHA256 digest makes keys content-addressed and no longer namespaced per tensor. When exporting multiple PTD/PTE named data maps that are loaded together (e.g., foundation + LoRA program-data separation), MergedDataMap::load rejects duplicate keys across maps even if the underlying bytes are identical, so identical constants (common for zero/one-initialized tensors) can now cause runtime load failures. Consider incorporating a stable per-parameter identifier into the key (e.g., get_attr_node.target/FQN) or otherwise namespacing by component while keeping determinism across exports.

Suggested change
named_key = sha256_hash.hexdigest()
# Use a per-tensor namespace in the key to avoid collisions across
# different PTD/PTE maps that may contain identical constant bytes.
tensor_name = getattr(tensor, "name", None)
if tensor_name is not None:
named_key = f"{tensor_name}:{sha256_hash.hexdigest()}"
else:
named_key = sha256_hash.hexdigest()

Copilot uses AI. Check for mistakes.

size = const_val.untyped_storage().nbytes()
xnn_graph.constant_data.append(
Expand Down
29 changes: 19 additions & 10 deletions examples/models/llama/lora.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,22 +26,31 @@ def __init__(
self.rank = rank
self.alpha = alpha
self.use_bias = use_bias
self.dropout = dropout

linear = nn.Linear(in_dim, out_dim, bias=use_bias)
weight = linear.weight
bias = linear.bias if self.use_bias else None
self.register_parameter("weight", nn.Parameter(weight))
self.register_parameter(
"bias", nn.Parameter(bias) if bias is not None else None
)

self.linear = nn.Linear(in_dim, out_dim, bias=use_bias)
self.dropout = nn.Dropout(p=dropout) if dropout > 0.0 else nn.Identity()
self.lora_a = nn.Linear(in_features=in_dim, out_features=rank, bias=False)
self.lora_b = nn.Linear(in_features=rank, out_features=out_dim, bias=False)

@property
def weight(self):
return self.linear.weight

@property
def bias(self):
return self.linear.bias

def _load_from_state_dict(self, state_dict, prefix, *args, **kwargs):
# Remap keys to "linear.*"
for attr in ("weight", "bias"):
old_key = prefix + attr
new_key = prefix + "linear." + attr
if old_key in state_dict and new_key not in state_dict:
state_dict[new_key] = state_dict.pop(old_key)
super()._load_from_state_dict(state_dict, prefix, *args, **kwargs)
Comment on lines +43 to +50
Copy link

Copilot AI Mar 19, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LoRALinear now changes its state_dict surface from weight/bias to linear.weight/linear.bias and adds a compatibility remap in _load_from_state_dict. This is a behavior that’s easy to regress (especially for loading older checkpoints through the full model), but there’s no test covering the backward-compat load path. Please add a unit test that constructs an old-format state_dict (with ...weight/...bias) and verifies it loads into the updated module and produces identical outputs/parameters.

Copilot uses AI. Check for mistakes.

def forward(self, x: torch.Tensor) -> torch.Tensor:
out = torch.nn.functional.linear(x, self.weight, self.bias)
out = self.linear(x)
lora_out = self.lora_a(self.dropout(x))
lora_out = (self.alpha / self.rank) * self.lora_b(lora_out)

Expand Down
32 changes: 8 additions & 24 deletions examples/models/llama/source_transformation/quantize.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,30 +144,14 @@ def quantize( # noqa C901
from torchao.utils import unwrap_tensor_subclass

def filter_fn(m, fqn):
# Check if it's a regular nn.Linear
is_linear = isinstance(m, nn.Linear)

# Check if it's a LoRALinear (which has a base weight parameter to quantize)
is_lora_linear = False
try:
from executorch.examples.models.llama.lora import LoRALinear

is_lora_linear = isinstance(m, LoRALinear)
except ImportError:
pass

# Check if the weight shape is compatible with group size
has_shape_compatible_with_group_size = False
if is_linear or is_lora_linear:
if group_size == 0:
has_shape_compatible_with_group_size = True
else:
has_shape_compatible_with_group_size = (
m.weight.shape[1] % group_size == 0
)
return (
is_linear or is_lora_linear
) and has_shape_compatible_with_group_size
if not isinstance(m, nn.Linear):
return False
parts = fqn.split(".")
if "lora_a" in parts or "lora_b" in parts:
return False
if group_size == 0:
return True
return m.weight.shape[1] % group_size == 0
Comment on lines 146 to +154
Copy link

Copilot AI Mar 19, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The new filter_fn behavior for 8da4w/8da8w quantization relies on module FQNs to skip LoRA adapter layers (lora_a/lora_b) while still quantizing the base projection (*.linear). This is subtle and currently untested; please add a regression test that runs this quantization path on a small model with LoRALinear and asserts that lora_a/lora_b weights remain unquantized while the base linear weight is quantized (and group_size filtering behaves as expected).

Copilot uses AI. Check for mistakes.

weight_dtype = torch.int4 if qmode == "8da4w" else torch.int8
quantize_(
Expand Down
Loading