From 0875585f90bcba904e9e77fce2c7bd995b659dd7 Mon Sep 17 00:00:00 2001 From: Qi Zhao Date: Fri, 19 Sep 2025 16:50:37 +0800 Subject: [PATCH 1/2] [LoongArch][NFC] Add tests for element extraction from binary add operation --- .../CodeGen/LoongArch/lasx/extract-binop.ll | 113 ++++++++++++++++++ .../CodeGen/LoongArch/lsx/extract-binop.ll | 113 ++++++++++++++++++ 2 files changed, 226 insertions(+) create mode 100644 llvm/test/CodeGen/LoongArch/lasx/extract-binop.ll create mode 100644 llvm/test/CodeGen/LoongArch/lsx/extract-binop.ll diff --git a/llvm/test/CodeGen/LoongArch/lasx/extract-binop.ll b/llvm/test/CodeGen/LoongArch/lasx/extract-binop.ll new file mode 100644 index 0000000000000..1517e11aa7d7a --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/extract-binop.ll @@ -0,0 +1,113 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA32 +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA64 + +define i8 @extractelt_add_v32i8(ptr %p) { +; CHECK-LABEL: extractelt_add_v32i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a0, 0 +; CHECK-NEXT: xvaddi.bu $xr0, $xr0, 13 +; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 2 +; CHECK-NEXT: ret +entry: + %x = load <32 x i8>, ptr %p + %add = add <32 x i8> %x, + %ext = extractelement <32 x i8> %add, i32 2 + ret i8 %ext +} + +define i16 @extractelt_add_v16i16(ptr %p) { +; CHECK-LABEL: extractelt_add_v16i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a0, 0 +; CHECK-NEXT: xvaddi.hu $xr0, $xr0, 13 +; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 2 +; CHECK-NEXT: ret +entry: + %x = load <16 x i16>, ptr %p + %add = add <16 x i16> %x, + %ext = extractelement <16 x i16> %add, i32 2 + ret i16 %ext +} + +define i32 @extractelt_add_v8i32(ptr %p) { +; CHECK-LABEL: extractelt_add_v8i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a0, 0 +; CHECK-NEXT: xvaddi.wu $xr0, $xr0, 13 +; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 2 +; CHECK-NEXT: ret +entry: + %x = load <8 x i32>, ptr %p + %add = add <8 x i32> %x, + %ext = extractelement <8 x i32> %add, i32 2 + ret i32 %ext +} + +define i64 @extractelt_add_v4i64(ptr %p) { +; LA32-LABEL: extractelt_add_v4i64: +; LA32: # %bb.0: # %entry +; LA32-NEXT: xvld $xr0, $a0, 0 +; LA32-NEXT: xvaddi.du $xr0, $xr0, 12 +; LA32-NEXT: xvpickve2gr.w $a0, $xr0, 2 +; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 3 +; LA32-NEXT: ret +; +; LA64-LABEL: extractelt_add_v4i64: +; LA64: # %bb.0: # %entry +; LA64-NEXT: xvld $xr0, $a0, 0 +; LA64-NEXT: xvaddi.du $xr0, $xr0, 12 +; LA64-NEXT: xvpickve2gr.d $a0, $xr0, 1 +; LA64-NEXT: ret +entry: + %x = load <4 x i64>, ptr %p + %add = add <4 x i64> %x, + %ext = extractelement <4 x i64> %add, i32 1 + ret i64 %ext +} + +define float @extractelt_fadd_v8f32(ptr %p) { +; CHECK-LABEL: extractelt_fadd_v8f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a0, 0 +; CHECK-NEXT: lu12i.w $a0, 267520 +; CHECK-NEXT: xvreplgr2vr.w $xr1, $a0 +; CHECK-NEXT: xvfadd.s $xr0, $xr0, $xr1 +; CHECK-NEXT: xvpickve.w $xr0, $xr0, 2 +; CHECK-NEXT: # kill: def $f0 killed $f0 killed $xr0 +; CHECK-NEXT: ret +entry: + %x = load <8 x float>, ptr %p + %add = fadd <8 x float> %x, + %ext = extractelement <8 x float> %add, i32 2 + ret float %ext +} + +define double @extractelt_fadd_v4f64(ptr %p) { +; LA32-LABEL: extractelt_fadd_v4f64: +; LA32: # %bb.0: # %entry +; LA32-NEXT: xvld $xr0, $a0, 0 +; LA32-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_0) +; LA32-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI5_0) +; LA32-NEXT: xvfadd.d $xr0, $xr0, $xr1 +; LA32-NEXT: xvpickve.d $xr0, $xr0, 1 +; LA32-NEXT: # kill: def $f0_64 killed $f0_64 killed $xr0 +; LA32-NEXT: ret +; +; LA64-LABEL: extractelt_fadd_v4f64: +; LA64: # %bb.0: # %entry +; LA64-NEXT: xvld $xr0, $a0, 0 +; LA64-NEXT: ori $a0, $zero, 0 +; LA64-NEXT: lu32i.d $a0, -524288 +; LA64-NEXT: lu52i.d $a0, $a0, 1026 +; LA64-NEXT: xvreplgr2vr.d $xr1, $a0 +; LA64-NEXT: xvfadd.d $xr0, $xr0, $xr1 +; LA64-NEXT: xvpickve.d $xr0, $xr0, 1 +; LA64-NEXT: # kill: def $f0_64 killed $f0_64 killed $xr0 +; LA64-NEXT: ret +entry: + %x = load <4 x double>, ptr %p + %add = fadd <4 x double> %x, + %ext = extractelement <4 x double> %add, i32 1 + ret double %ext +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/extract-binop.ll b/llvm/test/CodeGen/LoongArch/lsx/extract-binop.ll new file mode 100644 index 0000000000000..506bdf0abcc97 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/extract-binop.ll @@ -0,0 +1,113 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s --check-prefixes=CHECK,LA32 +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s --check-prefixes=CHECK,LA64 + +define i8 @extractelt_add_v16i8(ptr %p) { +; CHECK-LABEL: extractelt_add_v16i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a0, 0 +; CHECK-NEXT: vaddi.bu $vr0, $vr0, 13 +; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 2 +; CHECK-NEXT: ret +entry: + %x = load <16 x i8>, ptr %p + %add = add <16 x i8> %x, + %ext = extractelement <16 x i8> %add, i32 2 + ret i8 %ext +} + +define i16 @extractelt_add_v8i16(ptr %p) { +; CHECK-LABEL: extractelt_add_v8i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a0, 0 +; CHECK-NEXT: vaddi.hu $vr0, $vr0, 13 +; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 2 +; CHECK-NEXT: ret +entry: + %x = load <8 x i16>, ptr %p + %add = add <8 x i16> %x, + %ext = extractelement <8 x i16> %add, i32 2 + ret i16 %ext +} + +define i32 @extractelt_add_v4i32(ptr %p) { +; CHECK-LABEL: extractelt_add_v4i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a0, 0 +; CHECK-NEXT: vaddi.wu $vr0, $vr0, 13 +; CHECK-NEXT: vpickve2gr.w $a0, $vr0, 2 +; CHECK-NEXT: ret +entry: + %x = load <4 x i32>, ptr %p + %add = add <4 x i32> %x, + %ext = extractelement <4 x i32> %add, i32 2 + ret i32 %ext +} + +define i64 @extractelt_add_v2i64(ptr %p) { +; LA32-LABEL: extractelt_add_v2i64: +; LA32: # %bb.0: # %entry +; LA32-NEXT: vld $vr0, $a0, 0 +; LA32-NEXT: vaddi.du $vr0, $vr0, 12 +; LA32-NEXT: vpickve2gr.w $a0, $vr0, 2 +; LA32-NEXT: vpickve2gr.w $a1, $vr0, 3 +; LA32-NEXT: ret +; +; LA64-LABEL: extractelt_add_v2i64: +; LA64: # %bb.0: # %entry +; LA64-NEXT: vld $vr0, $a0, 0 +; LA64-NEXT: vaddi.du $vr0, $vr0, 12 +; LA64-NEXT: vpickve2gr.d $a0, $vr0, 1 +; LA64-NEXT: ret +entry: + %x = load <2 x i64>, ptr %p + %add = add <2 x i64> %x, + %ext = extractelement <2 x i64> %add, i32 1 + ret i64 %ext +} + +define float @extractelt_fadd_v4f32(ptr %p) { +; CHECK-LABEL: extractelt_fadd_v4f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a0, 0 +; CHECK-NEXT: lu12i.w $a0, 267520 +; CHECK-NEXT: vreplgr2vr.w $vr1, $a0 +; CHECK-NEXT: vfadd.s $vr0, $vr0, $vr1 +; CHECK-NEXT: vreplvei.w $vr0, $vr0, 2 +; CHECK-NEXT: # kill: def $f0 killed $f0 killed $vr0 +; CHECK-NEXT: ret +entry: + %x = load <4 x float>, ptr %p + %add = fadd <4 x float> %x, + %ext = extractelement <4 x float> %add, i32 2 + ret float %ext +} + +define double @extractelt_fadd_v2f64(ptr %p) { +; LA32-LABEL: extractelt_fadd_v2f64: +; LA32: # %bb.0: # %entry +; LA32-NEXT: vld $vr0, $a0, 0 +; LA32-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_0) +; LA32-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI5_0) +; LA32-NEXT: vfadd.d $vr0, $vr0, $vr1 +; LA32-NEXT: vreplvei.d $vr0, $vr0, 1 +; LA32-NEXT: # kill: def $f0_64 killed $f0_64 killed $vr0 +; LA32-NEXT: ret +; +; LA64-LABEL: extractelt_fadd_v2f64: +; LA64: # %bb.0: # %entry +; LA64-NEXT: vld $vr0, $a0, 0 +; LA64-NEXT: ori $a0, $zero, 0 +; LA64-NEXT: lu32i.d $a0, -524288 +; LA64-NEXT: lu52i.d $a0, $a0, 1026 +; LA64-NEXT: vreplgr2vr.d $vr1, $a0 +; LA64-NEXT: vfadd.d $vr0, $vr0, $vr1 +; LA64-NEXT: vreplvei.d $vr0, $vr0, 1 +; LA64-NEXT: # kill: def $f0_64 killed $f0_64 killed $vr0 +; LA64-NEXT: ret +entry: + %x = load <2 x double>, ptr %p + %add = fadd <2 x double> %x, + %ext = extractelement <2 x double> %add, i32 1 + ret double %ext +} From 058b3f901ddb9107d56d6fbb48d7f7b08d4f5562 Mon Sep 17 00:00:00 2001 From: Qi Zhao Date: Fri, 19 Sep 2025 16:56:53 +0800 Subject: [PATCH 2/2] [LoongArch] Override shouldScalarizeBinop hook to enable `extract(binop)->binop(extract)` combination --- .../LoongArch/LoongArchISelLowering.cpp | 19 ++++++ .../Target/LoongArch/LoongArchISelLowering.h | 2 + .../CodeGen/LoongArch/lasx/extract-binop.ll | 59 ++++++++----------- .../CodeGen/LoongArch/lsx/extract-binop.ll | 59 ++++++++----------- 4 files changed, 67 insertions(+), 72 deletions(-) diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp index add6bec686c71..a471001086b68 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -9105,3 +9105,22 @@ bool LoongArchTargetLowering::SimplifyDemandedBitsForTargetNode( return TargetLowering::SimplifyDemandedBitsForTargetNode( Op, OriginalDemandedBits, OriginalDemandedElts, Known, TLO, Depth); } + +bool LoongArchTargetLowering::shouldScalarizeBinop(SDValue VecOp) const { + unsigned Opc = VecOp.getOpcode(); + + // Assume target opcodes can't be scalarized. + // TODO - do we have any exceptions? + if (Opc >= ISD::BUILTIN_OP_END || !isBinOp(Opc)) + return false; + + // If the vector op is not supported, try to convert to scalar. + EVT VecVT = VecOp.getValueType(); + if (!isOperationLegalOrCustomOrPromote(Opc, VecVT)) + return true; + + // If the vector op is supported, but the scalar op is not, the transform may + // not be worthwhile. + EVT ScalarVT = VecVT.getScalarType(); + return isOperationLegalOrCustomOrPromote(Opc, ScalarVT); +} diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h index 9d14934a9d363..8da492570146e 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h @@ -332,6 +332,8 @@ class LoongArchTargetLowering : public TargetLowering { TargetLoweringOpt &TLO, unsigned Depth) const override; + bool shouldScalarizeBinop(SDValue VecOp) const override; + private: /// Target-specific function used to lower LoongArch calling conventions. typedef bool LoongArchCCAssignFn(const DataLayout &DL, LoongArchABI::ABI ABI, diff --git a/llvm/test/CodeGen/LoongArch/lasx/extract-binop.ll b/llvm/test/CodeGen/LoongArch/lasx/extract-binop.ll index 1517e11aa7d7a..4986b12199c31 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/extract-binop.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/extract-binop.ll @@ -31,12 +31,18 @@ entry: } define i32 @extractelt_add_v8i32(ptr %p) { -; CHECK-LABEL: extractelt_add_v8i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xvld $xr0, $a0, 0 -; CHECK-NEXT: xvaddi.wu $xr0, $xr0, 13 -; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 2 -; CHECK-NEXT: ret +; LA32-LABEL: extractelt_add_v8i32: +; LA32: # %bb.0: # %entry +; LA32-NEXT: ld.w $a0, $a0, 8 +; LA32-NEXT: addi.w $a0, $a0, 13 +; LA32-NEXT: ret +; +; LA64-LABEL: extractelt_add_v8i32: +; LA64: # %bb.0: # %entry +; LA64-NEXT: xvld $xr0, $a0, 0 +; LA64-NEXT: xvaddi.wu $xr0, $xr0, 13 +; LA64-NEXT: xvpickve2gr.w $a0, $xr0, 2 +; LA64-NEXT: ret entry: %x = load <8 x i32>, ptr %p %add = add <8 x i32> %x, @@ -55,9 +61,8 @@ define i64 @extractelt_add_v4i64(ptr %p) { ; ; LA64-LABEL: extractelt_add_v4i64: ; LA64: # %bb.0: # %entry -; LA64-NEXT: xvld $xr0, $a0, 0 -; LA64-NEXT: xvaddi.du $xr0, $xr0, 12 -; LA64-NEXT: xvpickve2gr.d $a0, $xr0, 1 +; LA64-NEXT: ld.d $a0, $a0, 8 +; LA64-NEXT: addi.d $a0, $a0, 12 ; LA64-NEXT: ret entry: %x = load <4 x i64>, ptr %p @@ -69,12 +74,9 @@ entry: define float @extractelt_fadd_v8f32(ptr %p) { ; CHECK-LABEL: extractelt_fadd_v8f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xvld $xr0, $a0, 0 -; CHECK-NEXT: lu12i.w $a0, 267520 -; CHECK-NEXT: xvreplgr2vr.w $xr1, $a0 -; CHECK-NEXT: xvfadd.s $xr0, $xr0, $xr1 -; CHECK-NEXT: xvpickve.w $xr0, $xr0, 2 -; CHECK-NEXT: # kill: def $f0 killed $f0 killed $xr0 +; CHECK-NEXT: fld.s $fa0, $a0, 8 +; CHECK-NEXT: vldi $vr1, -1238 +; CHECK-NEXT: fadd.s $fa0, $fa0, $fa1 ; CHECK-NEXT: ret entry: %x = load <8 x float>, ptr %p @@ -84,27 +86,12 @@ entry: } define double @extractelt_fadd_v4f64(ptr %p) { -; LA32-LABEL: extractelt_fadd_v4f64: -; LA32: # %bb.0: # %entry -; LA32-NEXT: xvld $xr0, $a0, 0 -; LA32-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_0) -; LA32-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI5_0) -; LA32-NEXT: xvfadd.d $xr0, $xr0, $xr1 -; LA32-NEXT: xvpickve.d $xr0, $xr0, 1 -; LA32-NEXT: # kill: def $f0_64 killed $f0_64 killed $xr0 -; LA32-NEXT: ret -; -; LA64-LABEL: extractelt_fadd_v4f64: -; LA64: # %bb.0: # %entry -; LA64-NEXT: xvld $xr0, $a0, 0 -; LA64-NEXT: ori $a0, $zero, 0 -; LA64-NEXT: lu32i.d $a0, -524288 -; LA64-NEXT: lu52i.d $a0, $a0, 1026 -; LA64-NEXT: xvreplgr2vr.d $xr1, $a0 -; LA64-NEXT: xvfadd.d $xr0, $xr0, $xr1 -; LA64-NEXT: xvpickve.d $xr0, $xr0, 1 -; LA64-NEXT: # kill: def $f0_64 killed $f0_64 killed $xr0 -; LA64-NEXT: ret +; CHECK-LABEL: extractelt_fadd_v4f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fld.d $fa0, $a0, 8 +; CHECK-NEXT: vldi $vr1, -984 +; CHECK-NEXT: fadd.d $fa0, $fa0, $fa1 +; CHECK-NEXT: ret entry: %x = load <4 x double>, ptr %p %add = fadd <4 x double> %x, diff --git a/llvm/test/CodeGen/LoongArch/lsx/extract-binop.ll b/llvm/test/CodeGen/LoongArch/lsx/extract-binop.ll index 506bdf0abcc97..e8ddf84de6dff 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/extract-binop.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/extract-binop.ll @@ -31,12 +31,18 @@ entry: } define i32 @extractelt_add_v4i32(ptr %p) { -; CHECK-LABEL: extractelt_add_v4i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vld $vr0, $a0, 0 -; CHECK-NEXT: vaddi.wu $vr0, $vr0, 13 -; CHECK-NEXT: vpickve2gr.w $a0, $vr0, 2 -; CHECK-NEXT: ret +; LA32-LABEL: extractelt_add_v4i32: +; LA32: # %bb.0: # %entry +; LA32-NEXT: ld.w $a0, $a0, 8 +; LA32-NEXT: addi.w $a0, $a0, 13 +; LA32-NEXT: ret +; +; LA64-LABEL: extractelt_add_v4i32: +; LA64: # %bb.0: # %entry +; LA64-NEXT: vld $vr0, $a0, 0 +; LA64-NEXT: vaddi.wu $vr0, $vr0, 13 +; LA64-NEXT: vpickve2gr.w $a0, $vr0, 2 +; LA64-NEXT: ret entry: %x = load <4 x i32>, ptr %p %add = add <4 x i32> %x, @@ -55,9 +61,8 @@ define i64 @extractelt_add_v2i64(ptr %p) { ; ; LA64-LABEL: extractelt_add_v2i64: ; LA64: # %bb.0: # %entry -; LA64-NEXT: vld $vr0, $a0, 0 -; LA64-NEXT: vaddi.du $vr0, $vr0, 12 -; LA64-NEXT: vpickve2gr.d $a0, $vr0, 1 +; LA64-NEXT: ld.d $a0, $a0, 8 +; LA64-NEXT: addi.d $a0, $a0, 12 ; LA64-NEXT: ret entry: %x = load <2 x i64>, ptr %p @@ -69,12 +74,9 @@ entry: define float @extractelt_fadd_v4f32(ptr %p) { ; CHECK-LABEL: extractelt_fadd_v4f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vld $vr0, $a0, 0 -; CHECK-NEXT: lu12i.w $a0, 267520 -; CHECK-NEXT: vreplgr2vr.w $vr1, $a0 -; CHECK-NEXT: vfadd.s $vr0, $vr0, $vr1 -; CHECK-NEXT: vreplvei.w $vr0, $vr0, 2 -; CHECK-NEXT: # kill: def $f0 killed $f0 killed $vr0 +; CHECK-NEXT: fld.s $fa0, $a0, 8 +; CHECK-NEXT: vldi $vr1, -1238 +; CHECK-NEXT: fadd.s $fa0, $fa0, $fa1 ; CHECK-NEXT: ret entry: %x = load <4 x float>, ptr %p @@ -84,27 +86,12 @@ entry: } define double @extractelt_fadd_v2f64(ptr %p) { -; LA32-LABEL: extractelt_fadd_v2f64: -; LA32: # %bb.0: # %entry -; LA32-NEXT: vld $vr0, $a0, 0 -; LA32-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_0) -; LA32-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI5_0) -; LA32-NEXT: vfadd.d $vr0, $vr0, $vr1 -; LA32-NEXT: vreplvei.d $vr0, $vr0, 1 -; LA32-NEXT: # kill: def $f0_64 killed $f0_64 killed $vr0 -; LA32-NEXT: ret -; -; LA64-LABEL: extractelt_fadd_v2f64: -; LA64: # %bb.0: # %entry -; LA64-NEXT: vld $vr0, $a0, 0 -; LA64-NEXT: ori $a0, $zero, 0 -; LA64-NEXT: lu32i.d $a0, -524288 -; LA64-NEXT: lu52i.d $a0, $a0, 1026 -; LA64-NEXT: vreplgr2vr.d $vr1, $a0 -; LA64-NEXT: vfadd.d $vr0, $vr0, $vr1 -; LA64-NEXT: vreplvei.d $vr0, $vr0, 1 -; LA64-NEXT: # kill: def $f0_64 killed $f0_64 killed $vr0 -; LA64-NEXT: ret +; CHECK-LABEL: extractelt_fadd_v2f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fld.d $fa0, $a0, 8 +; CHECK-NEXT: vldi $vr1, -984 +; CHECK-NEXT: fadd.d $fa0, $fa0, $fa1 +; CHECK-NEXT: ret entry: %x = load <2 x double>, ptr %p %add = fadd <2 x double> %x,