diff --git a/examples/models/llama/source_transformation/apply_spin_quant_r1_r2.py b/examples/models/llama/source_transformation/apply_spin_quant_r1_r2.py index 7ec35c7b6c9..89f564935fc 100644 --- a/examples/models/llama/source_transformation/apply_spin_quant_r1_r2.py +++ b/examples/models/llama/source_transformation/apply_spin_quant_r1_r2.py @@ -146,9 +146,9 @@ def fuse_ln_linear( torch.zeros(linear.out_features, dtype=torch.float32) ) linear.bias.data = linear.bias.data.to(dtype=torch.float32) + torch.matmul( + W_, # pyre-fixme[6]: For 2nd argument expected `Tensor` but got # `Union[Tensor, Module]`. - W_, layernorm.bias.to(dtype=torch.float32), ) linear.bias.data = linear.bias.data.to(linear_dtype) diff --git a/exir/emit/_emitter.py b/exir/emit/_emitter.py index a1dcc23dcee..381bab618cd 100644 --- a/exir/emit/_emitter.py +++ b/exir/emit/_emitter.py @@ -1634,8 +1634,8 @@ def plan(self) -> ExecutionPlan: # missing in scenarios like unit test that does not enable memory planning, assume an # empty list. non_const_buffer_sizes=typing.cast( - # pyre-fixme[29]: `Union[BoundMethod[typing.Callable(torch._C.TensorB... List[int], + # pyre-fixme[29]: `Union[BoundMethod[typing.Callable(torch._C.TensorB... self.module.meta["non_const_buffer_sizes"], ), container_meta_type=self.container_meta_type,