diff --git a/quant/__init__.py b/quant/__init__.py index cd639a4..6445278 100644 --- a/quant/__init__.py +++ b/quant/__init__.py @@ -2,3 +2,4 @@ from .fused_attn import QuantLlamaAttention, make_quant_attn from .fused_mlp import QuantLlamaMLP, make_fused_mlp, autotune_warmup_fused from .quant_linear import QuantLinear, make_quant_linear, autotune_warmup_linear +from .triton_norm import TritonLlamaRMSNorm, make_quant_norm