diff --git a/llvm/test/CodeGen/Mips/msa/basic_operations.ll b/llvm/test/CodeGen/Mips/msa/basic_operations.ll index c1bbb98a491ad8..9ddb91f0770d9d 100644 --- a/llvm/test/CodeGen/Mips/msa/basic_operations.ll +++ b/llvm/test/CodeGen/Mips/msa/basic_operations.ll @@ -1,21 +1,22 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -march=mips -mcpu=mips32r5 -mattr=+msa,+fp64 -relocation-model=pic \ ; RUN: -verify-machineinstrs < %s \ -; RUN: | FileCheck -check-prefixes=ALL,O32,MIPS32,ALL-BE,O32-BE %s +; RUN: | FileCheck -check-prefixes=O32,O32-BE %s ; RUN: llc -march=mipsel -mcpu=mips32r5 -mattr=+msa,+fp64 -relocation-model=pic \ ; RUN: -verify-machineinstrs < %s \ -; RUN: | FileCheck -check-prefixes=ALL,O32,MIPS32,ALL-LE,O32-LE %s +; RUN: | FileCheck -check-prefixes=O32,O32-LE %s ; RUN: llc -march=mips64 -mcpu=mips64r5 -target-abi n32 -mattr=+msa,+fp64 \ ; RUN: -relocation-model=pic -verify-machineinstrs < %s \ -; RUN: | FileCheck -check-prefixes=ALL,N32,MIPS64,ALL-BE %s +; RUN: | FileCheck -check-prefixes=N32,N32-BE %s ; RUN: llc -march=mips64el -mcpu=mips64r5 -target-abi n32 -mattr=+msa,+fp64 \ ; RUN: -relocation-model=pic -verify-machineinstrs < %s \ -; RUN: | FileCheck -check-prefixes=ALL,N32,MIPS64,ALL-LE %s +; RUN: | FileCheck -check-prefixes=N32,N32-LE %s ; RUN: llc -march=mips64 -mcpu=mips64r5 -mattr=+msa,+fp64 -relocation-model=pic \ ; RUN: -verify-machineinstrs < %s \ -; RUN: | FileCheck -check-prefixes=ALL,N64,MIPS64,ALL-BE %s +; RUN: | FileCheck -check-prefixes=N64,N64-BE %s ; RUN: llc -march=mips64el -mcpu=mips64r5 -mattr=+msa,+fp64 -relocation-model=pic \ ; RUN: -verify-machineinstrs < %s \ -; RUN: | FileCheck -check-prefixes=ALL,N64,MIPS64,ALL-LE %s +; RUN: | FileCheck -check-prefixes=N64,N64-LE %s @v4i8 = global <4 x i8> @v16i8 = global <16 x i8> @@ -26,185 +27,727 @@ @i64 = global i64 0 define void @const_v16i8() nounwind { - ; ALL-LABEL: const_v16i8: - +; O32-BE-LABEL: const_v16i8: +; O32-BE: # %bb.0: +; O32-BE-NEXT: lui $2, %hi(_gp_disp) +; O32-BE-NEXT: addiu $2, $2, %lo(_gp_disp) +; O32-BE-NEXT: addu $1, $2, $25 +; O32-BE-NEXT: lw $2, %got(v16i8)($1) +; O32-BE-NEXT: ldi.b $w0, 0 +; O32-BE-NEXT: st.b $w0, 0($2) +; O32-BE-NEXT: ldi.b $w0, 1 +; O32-BE-NEXT: st.b $w0, 0($2) +; O32-BE-NEXT: lw $3, %got($CPI0_0)($1) +; O32-BE-NEXT: addiu $3, $3, %lo($CPI0_0) +; O32-BE-NEXT: ld.b $w0, 0($3) +; O32-BE-NEXT: st.b $w0, 0($2) +; O32-BE-NEXT: lw $1, %got($CPI0_1)($1) +; O32-BE-NEXT: addiu $1, $1, %lo($CPI0_1) +; O32-BE-NEXT: ld.b $w0, 0($1) +; O32-BE-NEXT: st.b $w0, 0($2) +; O32-BE-NEXT: ldi.h $w0, 256 +; O32-BE-NEXT: st.b $w0, 0($2) +; O32-BE-NEXT: lui $1, 258 +; O32-BE-NEXT: ori $1, $1, 772 +; O32-BE-NEXT: fill.w $w0, $1 +; O32-BE-NEXT: st.b $w0, 0($2) +; O32-BE-NEXT: lui $3, 1286 +; O32-BE-NEXT: ori $3, $3, 1800 +; O32-BE-NEXT: fill.w $w0, $3 +; O32-BE-NEXT: insert.w $w0[1], $1 +; O32-BE-NEXT: splati.d $w0, $w0[0] +; O32-BE-NEXT: jr $ra +; O32-BE-NEXT: st.b $w0, 0($2) +; +; O32-LE-LABEL: const_v16i8: +; O32-LE: # %bb.0: +; O32-LE-NEXT: lui $2, %hi(_gp_disp) +; O32-LE-NEXT: addiu $2, $2, %lo(_gp_disp) +; O32-LE-NEXT: addu $1, $2, $25 +; O32-LE-NEXT: lw $2, %got(v16i8)($1) +; O32-LE-NEXT: ldi.b $w0, 0 +; O32-LE-NEXT: st.b $w0, 0($2) +; O32-LE-NEXT: ldi.b $w0, 1 +; O32-LE-NEXT: st.b $w0, 0($2) +; O32-LE-NEXT: lw $3, %got($CPI0_0)($1) +; O32-LE-NEXT: addiu $3, $3, %lo($CPI0_0) +; O32-LE-NEXT: ld.b $w0, 0($3) +; O32-LE-NEXT: st.b $w0, 0($2) +; O32-LE-NEXT: lw $1, %got($CPI0_1)($1) +; O32-LE-NEXT: addiu $1, $1, %lo($CPI0_1) +; O32-LE-NEXT: ld.b $w0, 0($1) +; O32-LE-NEXT: st.b $w0, 0($2) +; O32-LE-NEXT: ldi.h $w0, 1 +; O32-LE-NEXT: st.b $w0, 0($2) +; O32-LE-NEXT: lui $1, 1027 +; O32-LE-NEXT: ori $1, $1, 513 +; O32-LE-NEXT: fill.w $w0, $1 +; O32-LE-NEXT: st.b $w0, 0($2) +; O32-LE-NEXT: lui $1, 2055 +; O32-LE-NEXT: ori $1, $1, 1541 +; O32-LE-NEXT: insert.w $w0[1], $1 +; O32-LE-NEXT: splati.d $w0, $w0[0] +; O32-LE-NEXT: jr $ra +; O32-LE-NEXT: st.b $w0, 0($2) +; +; N32-BE-LABEL: const_v16i8: +; N32-BE: # %bb.0: +; N32-BE-NEXT: lui $1, %hi(%neg(%gp_rel(const_v16i8))) +; N32-BE-NEXT: addu $1, $1, $25 +; N32-BE-NEXT: addiu $1, $1, %lo(%neg(%gp_rel(const_v16i8))) +; N32-BE-NEXT: lw $2, %got_disp(v16i8)($1) +; N32-BE-NEXT: ldi.b $w0, 0 +; N32-BE-NEXT: st.b $w0, 0($2) +; N32-BE-NEXT: ldi.b $w0, 1 +; N32-BE-NEXT: st.b $w0, 0($2) +; N32-BE-NEXT: lw $3, %got_page(.LCPI0_0)($1) +; N32-BE-NEXT: addiu $3, $3, %got_ofst(.LCPI0_0) +; N32-BE-NEXT: ld.b $w0, 0($3) +; N32-BE-NEXT: st.b $w0, 0($2) +; N32-BE-NEXT: lw $1, %got_page(.LCPI0_1)($1) +; N32-BE-NEXT: addiu $1, $1, %got_ofst(.LCPI0_1) +; N32-BE-NEXT: ld.b $w0, 0($1) +; N32-BE-NEXT: st.b $w0, 0($2) +; N32-BE-NEXT: ldi.h $w0, 256 +; N32-BE-NEXT: st.b $w0, 0($2) +; N32-BE-NEXT: lui $1, 258 +; N32-BE-NEXT: ori $1, $1, 772 +; N32-BE-NEXT: fill.w $w0, $1 +; N32-BE-NEXT: st.b $w0, 0($2) +; N32-BE-NEXT: lui $3, 1286 +; N32-BE-NEXT: ori $3, $3, 1800 +; N32-BE-NEXT: dinsu $3, $1, 32, 32 +; N32-BE-NEXT: fill.d $w0, $3 +; N32-BE-NEXT: jr $ra +; N32-BE-NEXT: st.b $w0, 0($2) +; +; N32-LE-LABEL: const_v16i8: +; N32-LE: # %bb.0: +; N32-LE-NEXT: lui $1, %hi(%neg(%gp_rel(const_v16i8))) +; N32-LE-NEXT: addu $1, $1, $25 +; N32-LE-NEXT: addiu $1, $1, %lo(%neg(%gp_rel(const_v16i8))) +; N32-LE-NEXT: lw $2, %got_disp(v16i8)($1) +; N32-LE-NEXT: ldi.b $w0, 0 +; N32-LE-NEXT: st.b $w0, 0($2) +; N32-LE-NEXT: ldi.b $w0, 1 +; N32-LE-NEXT: st.b $w0, 0($2) +; N32-LE-NEXT: lw $3, %got_page(.LCPI0_0)($1) +; N32-LE-NEXT: addiu $3, $3, %got_ofst(.LCPI0_0) +; N32-LE-NEXT: ld.b $w0, 0($3) +; N32-LE-NEXT: st.b $w0, 0($2) +; N32-LE-NEXT: lw $1, %got_page(.LCPI0_1)($1) +; N32-LE-NEXT: addiu $1, $1, %got_ofst(.LCPI0_1) +; N32-LE-NEXT: ld.b $w0, 0($1) +; N32-LE-NEXT: st.b $w0, 0($2) +; N32-LE-NEXT: ldi.h $w0, 1 +; N32-LE-NEXT: st.b $w0, 0($2) +; N32-LE-NEXT: lui $1, 1027 +; N32-LE-NEXT: ori $1, $1, 513 +; N32-LE-NEXT: fill.w $w0, $1 +; N32-LE-NEXT: st.b $w0, 0($2) +; N32-LE-NEXT: lui $3, 2055 +; N32-LE-NEXT: ori $3, $3, 1541 +; N32-LE-NEXT: dinsu $1, $3, 32, 32 +; N32-LE-NEXT: fill.d $w0, $1 +; N32-LE-NEXT: jr $ra +; N32-LE-NEXT: st.b $w0, 0($2) +; +; N64-BE-LABEL: const_v16i8: +; N64-BE: # %bb.0: +; N64-BE-NEXT: lui $1, %hi(%neg(%gp_rel(const_v16i8))) +; N64-BE-NEXT: daddu $1, $1, $25 +; N64-BE-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(const_v16i8))) +; N64-BE-NEXT: ld $2, %got_disp(v16i8)($1) +; N64-BE-NEXT: ldi.b $w0, 0 +; N64-BE-NEXT: st.b $w0, 0($2) +; N64-BE-NEXT: ldi.b $w0, 1 +; N64-BE-NEXT: st.b $w0, 0($2) +; N64-BE-NEXT: ld $3, %got_page(.LCPI0_0)($1) +; N64-BE-NEXT: daddiu $3, $3, %got_ofst(.LCPI0_0) +; N64-BE-NEXT: ld.b $w0, 0($3) +; N64-BE-NEXT: st.b $w0, 0($2) +; N64-BE-NEXT: ld $1, %got_page(.LCPI0_1)($1) +; N64-BE-NEXT: daddiu $1, $1, %got_ofst(.LCPI0_1) +; N64-BE-NEXT: ld.b $w0, 0($1) +; N64-BE-NEXT: st.b $w0, 0($2) +; N64-BE-NEXT: ldi.h $w0, 256 +; N64-BE-NEXT: st.b $w0, 0($2) +; N64-BE-NEXT: lui $1, 258 +; N64-BE-NEXT: ori $1, $1, 772 +; N64-BE-NEXT: fill.w $w0, $1 +; N64-BE-NEXT: st.b $w0, 0($2) +; N64-BE-NEXT: lui $3, 1286 +; N64-BE-NEXT: ori $3, $3, 1800 +; N64-BE-NEXT: dinsu $3, $1, 32, 32 +; N64-BE-NEXT: fill.d $w0, $3 +; N64-BE-NEXT: jr $ra +; N64-BE-NEXT: st.b $w0, 0($2) +; +; N64-LE-LABEL: const_v16i8: +; N64-LE: # %bb.0: +; N64-LE-NEXT: lui $1, %hi(%neg(%gp_rel(const_v16i8))) +; N64-LE-NEXT: daddu $1, $1, $25 +; N64-LE-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(const_v16i8))) +; N64-LE-NEXT: ld $2, %got_disp(v16i8)($1) +; N64-LE-NEXT: ldi.b $w0, 0 +; N64-LE-NEXT: st.b $w0, 0($2) +; N64-LE-NEXT: ldi.b $w0, 1 +; N64-LE-NEXT: st.b $w0, 0($2) +; N64-LE-NEXT: ld $3, %got_page(.LCPI0_0)($1) +; N64-LE-NEXT: daddiu $3, $3, %got_ofst(.LCPI0_0) +; N64-LE-NEXT: ld.b $w0, 0($3) +; N64-LE-NEXT: st.b $w0, 0($2) +; N64-LE-NEXT: ld $1, %got_page(.LCPI0_1)($1) +; N64-LE-NEXT: daddiu $1, $1, %got_ofst(.LCPI0_1) +; N64-LE-NEXT: ld.b $w0, 0($1) +; N64-LE-NEXT: st.b $w0, 0($2) +; N64-LE-NEXT: ldi.h $w0, 1 +; N64-LE-NEXT: st.b $w0, 0($2) +; N64-LE-NEXT: lui $1, 1027 +; N64-LE-NEXT: ori $1, $1, 513 +; N64-LE-NEXT: fill.w $w0, $1 +; N64-LE-NEXT: st.b $w0, 0($2) +; N64-LE-NEXT: lui $3, 2055 +; N64-LE-NEXT: ori $3, $3, 1541 +; N64-LE-NEXT: dinsu $1, $3, 32, 32 +; N64-LE-NEXT: fill.d $w0, $1 +; N64-LE-NEXT: jr $ra +; N64-LE-NEXT: st.b $w0, 0($2) store volatile <16 x i8> , <16 x i8>*@v16i8 - ; ALL: ldi.b [[R1:\$w[0-9]+]], 0 - store volatile <16 x i8> , <16 x i8>*@v16i8 - ; ALL: ldi.b [[R1:\$w[0-9]+]], 1 - store volatile <16 x i8> , <16 x i8>*@v16i8 - ; O32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %lo($ - ; N32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst(.L - ; N64: daddiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst(.L - ; ALL: ld.b [[R1:\$w[0-9]+]], 0([[G_PTR]]) - store volatile <16 x i8> , <16 x i8>*@v16i8 - ; O32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %lo($ - ; N32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst(.L - ; N64: daddiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst(.L - ; ALL: ld.b [[R1:\$w[0-9]+]], 0([[G_PTR]]) - store volatile <16 x i8> , <16 x i8>*@v16i8 - ; ALL-BE: ldi.h [[R1:\$w[0-9]+]], 256 - ; ALL-LE: ldi.h [[R1:\$w[0-9]+]], 1 - store volatile <16 x i8> , <16 x i8>*@v16i8 - ; ALL-BE-DAG: lui [[R2:\$[0-9]+]], 258 - ; ALL-LE-DAG: lui [[R2:\$[0-9]+]], 1027 - ; ALL-BE-DAG: ori [[R2]], [[R2]], 772 - ; ALL-LE-DAG: ori [[R2]], [[R2]], 513 - ; ALL-DAG: fill.w [[R1:\$w[0-9]+]], [[R2]] - store volatile <16 x i8> , <16 x i8>*@v16i8 - ; ALL-BE-DAG: lui [[R3:\$[0-9]+]], 1286 - ; ALL-LE-DAG: lui [[R3:\$[0-9]+]], 2055 - ; ALL-BE-DAG: ori [[R4:\$[0-9]+]], [[R3]], 1800 - ; ALL-LE-DAG: ori [[R4:\$[0-9]+]], [[R3]], 1541 - ; O32-BE: fill.w [[R1:\$w[0-9]+]], [[R4]] - - ; O32: insert.w [[R1]][1], [[R2]] - ; O32: splati.d $w{{.*}}, [[R1]][0] - - ; MIPS64-BE: dinsu [[R4]], [[R2]], 32, 32 - ; MIPS64-LE: dinsu [[R2]], [[R4]], 32, 32 - ; MIPS64-BE: fill.d $w{{.*}}, [[R4]] - ; MIPS64-LE: fill.d $w{{.*}}, [[R2]] - ret void } define void @const_v8i16() nounwind { - ; ALL-LABEL: const_v8i16: - +; O32-BE-LABEL: const_v8i16: +; O32-BE: # %bb.0: +; O32-BE-NEXT: lui $2, %hi(_gp_disp) +; O32-BE-NEXT: addiu $2, $2, %lo(_gp_disp) +; O32-BE-NEXT: addu $1, $2, $25 +; O32-BE-NEXT: ldi.b $w0, 0 +; O32-BE-NEXT: lw $2, %got(v8i16)($1) +; O32-BE-NEXT: st.h $w0, 0($2) +; O32-BE-NEXT: ldi.h $w0, 1 +; O32-BE-NEXT: st.h $w0, 0($2) +; O32-BE-NEXT: lw $1, %got($CPI1_0)($1) +; O32-BE-NEXT: addiu $1, $1, %lo($CPI1_0) +; O32-BE-NEXT: ld.h $w0, 0($1) +; O32-BE-NEXT: st.h $w0, 0($2) +; O32-BE-NEXT: ldi.b $w0, 4 +; O32-BE-NEXT: st.h $w0, 0($2) +; O32-BE-NEXT: lui $1, 1 +; O32-BE-NEXT: ori $1, $1, 2 +; O32-BE-NEXT: fill.w $w0, $1 +; O32-BE-NEXT: st.h $w0, 0($2) +; O32-BE-NEXT: lui $3, 3 +; O32-BE-NEXT: ori $3, $3, 4 +; O32-BE-NEXT: fill.w $w0, $3 +; O32-BE-NEXT: insert.w $w0[1], $1 +; O32-BE-NEXT: splati.d $w0, $w0[0] +; O32-BE-NEXT: jr $ra +; O32-BE-NEXT: st.h $w0, 0($2) +; +; O32-LE-LABEL: const_v8i16: +; O32-LE: # %bb.0: +; O32-LE-NEXT: lui $2, %hi(_gp_disp) +; O32-LE-NEXT: addiu $2, $2, %lo(_gp_disp) +; O32-LE-NEXT: addu $1, $2, $25 +; O32-LE-NEXT: ldi.b $w0, 0 +; O32-LE-NEXT: lw $2, %got(v8i16)($1) +; O32-LE-NEXT: st.h $w0, 0($2) +; O32-LE-NEXT: ldi.h $w0, 1 +; O32-LE-NEXT: st.h $w0, 0($2) +; O32-LE-NEXT: lw $1, %got($CPI1_0)($1) +; O32-LE-NEXT: addiu $1, $1, %lo($CPI1_0) +; O32-LE-NEXT: ld.h $w0, 0($1) +; O32-LE-NEXT: st.h $w0, 0($2) +; O32-LE-NEXT: ldi.b $w0, 4 +; O32-LE-NEXT: st.h $w0, 0($2) +; O32-LE-NEXT: lui $1, 2 +; O32-LE-NEXT: ori $1, $1, 1 +; O32-LE-NEXT: fill.w $w0, $1 +; O32-LE-NEXT: st.h $w0, 0($2) +; O32-LE-NEXT: lui $1, 4 +; O32-LE-NEXT: ori $1, $1, 3 +; O32-LE-NEXT: insert.w $w0[1], $1 +; O32-LE-NEXT: splati.d $w0, $w0[0] +; O32-LE-NEXT: jr $ra +; O32-LE-NEXT: st.h $w0, 0($2) +; +; N32-BE-LABEL: const_v8i16: +; N32-BE: # %bb.0: +; N32-BE-NEXT: lui $1, %hi(%neg(%gp_rel(const_v8i16))) +; N32-BE-NEXT: addu $1, $1, $25 +; N32-BE-NEXT: addiu $1, $1, %lo(%neg(%gp_rel(const_v8i16))) +; N32-BE-NEXT: ldi.b $w0, 0 +; N32-BE-NEXT: lw $2, %got_disp(v8i16)($1) +; N32-BE-NEXT: st.h $w0, 0($2) +; N32-BE-NEXT: ldi.h $w0, 1 +; N32-BE-NEXT: st.h $w0, 0($2) +; N32-BE-NEXT: lw $1, %got_page(.LCPI1_0)($1) +; N32-BE-NEXT: addiu $1, $1, %got_ofst(.LCPI1_0) +; N32-BE-NEXT: ld.h $w0, 0($1) +; N32-BE-NEXT: st.h $w0, 0($2) +; N32-BE-NEXT: ldi.b $w0, 4 +; N32-BE-NEXT: st.h $w0, 0($2) +; N32-BE-NEXT: lui $1, 1 +; N32-BE-NEXT: ori $1, $1, 2 +; N32-BE-NEXT: fill.w $w0, $1 +; N32-BE-NEXT: st.h $w0, 0($2) +; N32-BE-NEXT: lui $3, 3 +; N32-BE-NEXT: ori $3, $3, 4 +; N32-BE-NEXT: dinsu $3, $1, 32, 32 +; N32-BE-NEXT: fill.d $w0, $3 +; N32-BE-NEXT: jr $ra +; N32-BE-NEXT: st.h $w0, 0($2) +; +; N32-LE-LABEL: const_v8i16: +; N32-LE: # %bb.0: +; N32-LE-NEXT: lui $1, %hi(%neg(%gp_rel(const_v8i16))) +; N32-LE-NEXT: addu $1, $1, $25 +; N32-LE-NEXT: addiu $1, $1, %lo(%neg(%gp_rel(const_v8i16))) +; N32-LE-NEXT: ldi.b $w0, 0 +; N32-LE-NEXT: lw $2, %got_disp(v8i16)($1) +; N32-LE-NEXT: st.h $w0, 0($2) +; N32-LE-NEXT: ldi.h $w0, 1 +; N32-LE-NEXT: st.h $w0, 0($2) +; N32-LE-NEXT: lw $1, %got_page(.LCPI1_0)($1) +; N32-LE-NEXT: addiu $1, $1, %got_ofst(.LCPI1_0) +; N32-LE-NEXT: ld.h $w0, 0($1) +; N32-LE-NEXT: st.h $w0, 0($2) +; N32-LE-NEXT: ldi.b $w0, 4 +; N32-LE-NEXT: st.h $w0, 0($2) +; N32-LE-NEXT: lui $1, 2 +; N32-LE-NEXT: ori $1, $1, 1 +; N32-LE-NEXT: fill.w $w0, $1 +; N32-LE-NEXT: st.h $w0, 0($2) +; N32-LE-NEXT: lui $3, 4 +; N32-LE-NEXT: ori $3, $3, 3 +; N32-LE-NEXT: dinsu $1, $3, 32, 32 +; N32-LE-NEXT: fill.d $w0, $1 +; N32-LE-NEXT: jr $ra +; N32-LE-NEXT: st.h $w0, 0($2) +; +; N64-BE-LABEL: const_v8i16: +; N64-BE: # %bb.0: +; N64-BE-NEXT: lui $1, %hi(%neg(%gp_rel(const_v8i16))) +; N64-BE-NEXT: daddu $1, $1, $25 +; N64-BE-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(const_v8i16))) +; N64-BE-NEXT: ldi.b $w0, 0 +; N64-BE-NEXT: ld $2, %got_disp(v8i16)($1) +; N64-BE-NEXT: st.h $w0, 0($2) +; N64-BE-NEXT: ldi.h $w0, 1 +; N64-BE-NEXT: st.h $w0, 0($2) +; N64-BE-NEXT: ld $1, %got_page(.LCPI1_0)($1) +; N64-BE-NEXT: daddiu $1, $1, %got_ofst(.LCPI1_0) +; N64-BE-NEXT: ld.h $w0, 0($1) +; N64-BE-NEXT: st.h $w0, 0($2) +; N64-BE-NEXT: ldi.b $w0, 4 +; N64-BE-NEXT: st.h $w0, 0($2) +; N64-BE-NEXT: lui $1, 1 +; N64-BE-NEXT: ori $1, $1, 2 +; N64-BE-NEXT: fill.w $w0, $1 +; N64-BE-NEXT: st.h $w0, 0($2) +; N64-BE-NEXT: lui $3, 3 +; N64-BE-NEXT: ori $3, $3, 4 +; N64-BE-NEXT: dinsu $3, $1, 32, 32 +; N64-BE-NEXT: fill.d $w0, $3 +; N64-BE-NEXT: jr $ra +; N64-BE-NEXT: st.h $w0, 0($2) +; +; N64-LE-LABEL: const_v8i16: +; N64-LE: # %bb.0: +; N64-LE-NEXT: lui $1, %hi(%neg(%gp_rel(const_v8i16))) +; N64-LE-NEXT: daddu $1, $1, $25 +; N64-LE-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(const_v8i16))) +; N64-LE-NEXT: ldi.b $w0, 0 +; N64-LE-NEXT: ld $2, %got_disp(v8i16)($1) +; N64-LE-NEXT: st.h $w0, 0($2) +; N64-LE-NEXT: ldi.h $w0, 1 +; N64-LE-NEXT: st.h $w0, 0($2) +; N64-LE-NEXT: ld $1, %got_page(.LCPI1_0)($1) +; N64-LE-NEXT: daddiu $1, $1, %got_ofst(.LCPI1_0) +; N64-LE-NEXT: ld.h $w0, 0($1) +; N64-LE-NEXT: st.h $w0, 0($2) +; N64-LE-NEXT: ldi.b $w0, 4 +; N64-LE-NEXT: st.h $w0, 0($2) +; N64-LE-NEXT: lui $1, 2 +; N64-LE-NEXT: ori $1, $1, 1 +; N64-LE-NEXT: fill.w $w0, $1 +; N64-LE-NEXT: st.h $w0, 0($2) +; N64-LE-NEXT: lui $3, 4 +; N64-LE-NEXT: ori $3, $3, 3 +; N64-LE-NEXT: dinsu $1, $3, 32, 32 +; N64-LE-NEXT: fill.d $w0, $1 +; N64-LE-NEXT: jr $ra +; N64-LE-NEXT: st.h $w0, 0($2) store volatile <8 x i16> , <8 x i16>*@v8i16 - ; ALL: ldi.b [[R1:\$w[0-9]+]], 0 - store volatile <8 x i16> , <8 x i16>*@v8i16 - ; ALL: ldi.h [[R1:\$w[0-9]+]], 1 - store volatile <8 x i16> , <8 x i16>*@v8i16 - ; O32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %lo($ - ; N32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst(.L - ; N64: daddiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst(.L - ; ALL: ld.h [[R1:\$w[0-9]+]], 0([[G_PTR]]) - store volatile <8 x i16> , <8 x i16>*@v8i16 - ; ALL: ldi.b [[R1:\$w[0-9]+]], 4 - store volatile <8 x i16> , <8 x i16>*@v8i16 - ; ALL-BE-DAG: lui [[R2:\$[0-9]+]], 1 - ; ALL-LE-DAG: lui [[R2:\$[0-9]+]], 2 - ; ALL-BE-DAG: ori [[R2]], [[R2]], 2 - ; ALL-LE-DAG: ori [[R2]], [[R2]], 1 - ; ALL-DAG: fill.w [[R1:\$w[0-9]+]], [[R2]] - store volatile <8 x i16> , <8 x i16>*@v8i16 - ; ALL-BE-DAG: lui [[R3:\$[0-9]+]], 3 - ; ALL-LE-DAG: lui [[R3:\$[0-9]+]], 4 - ; ALL-BE-DAG: ori [[R4:\$[0-9]+]], [[R3]], 4 - ; ALL-LE-DAG: ori [[R4:\$[0-9]+]], [[R3]], 3 - - ; O32-BE: fill.w [[R1:\$w[0-9]+]], [[R4]] - ; O32: insert.w [[R1]][1], [[R2]] - ; O32: splati.d $w{{.*}}, [[R1]][0] - - ; MIPS64-BE: dinsu [[R4]], [[R2]], 32, 32 - ; MIPS64-LE: dinsu [[R2]], [[R4]], 32, 32 - ; MIPS64-BE: fill.d $w{{.*}}, [[R4]] - ; MIPS64-LE: fill.d $w{{.*}}, [[R2]] - ret void } define void @const_v4i32() nounwind { - ; ALL-LABEL: const_v4i32: - +; O32-BE-LABEL: const_v4i32: +; O32-BE: # %bb.0: +; O32-BE-NEXT: lui $2, %hi(_gp_disp) +; O32-BE-NEXT: addiu $2, $2, %lo(_gp_disp) +; O32-BE-NEXT: addu $1, $2, $25 +; O32-BE-NEXT: ldi.b $w0, 0 +; O32-BE-NEXT: lw $2, %got(v4i32)($1) +; O32-BE-NEXT: st.w $w0, 0($2) +; O32-BE-NEXT: ldi.w $w0, 1 +; O32-BE-NEXT: st.w $w0, 0($2) +; O32-BE-NEXT: lw $3, %got($CPI2_0)($1) +; O32-BE-NEXT: addiu $3, $3, %lo($CPI2_0) +; O32-BE-NEXT: ld.w $w0, 0($3) +; O32-BE-NEXT: st.w $w0, 0($2) +; O32-BE-NEXT: ldi.b $w0, 1 +; O32-BE-NEXT: st.w $w0, 0($2) +; O32-BE-NEXT: ldi.h $w0, 1 +; O32-BE-NEXT: st.w $w0, 0($2) +; O32-BE-NEXT: ori $3, $zero, 1 +; O32-BE-NEXT: ori $4, $zero, 2 +; O32-BE-NEXT: fill.w $w0, $4 +; O32-BE-NEXT: insert.w $w0[1], $3 +; O32-BE-NEXT: splati.d $w0, $w0[0] +; O32-BE-NEXT: st.w $w0, 0($2) +; O32-BE-NEXT: lw $1, %got($CPI2_1)($1) +; O32-BE-NEXT: addiu $1, $1, %lo($CPI2_1) +; O32-BE-NEXT: ld.w $w0, 0($1) +; O32-BE-NEXT: jr $ra +; O32-BE-NEXT: st.w $w0, 0($2) +; +; O32-LE-LABEL: const_v4i32: +; O32-LE: # %bb.0: +; O32-LE-NEXT: lui $2, %hi(_gp_disp) +; O32-LE-NEXT: addiu $2, $2, %lo(_gp_disp) +; O32-LE-NEXT: addu $1, $2, $25 +; O32-LE-NEXT: ldi.b $w0, 0 +; O32-LE-NEXT: lw $2, %got(v4i32)($1) +; O32-LE-NEXT: st.w $w0, 0($2) +; O32-LE-NEXT: ldi.w $w0, 1 +; O32-LE-NEXT: st.w $w0, 0($2) +; O32-LE-NEXT: lw $3, %got($CPI2_0)($1) +; O32-LE-NEXT: addiu $3, $3, %lo($CPI2_0) +; O32-LE-NEXT: ld.w $w0, 0($3) +; O32-LE-NEXT: st.w $w0, 0($2) +; O32-LE-NEXT: ldi.b $w0, 1 +; O32-LE-NEXT: st.w $w0, 0($2) +; O32-LE-NEXT: ldi.h $w0, 1 +; O32-LE-NEXT: st.w $w0, 0($2) +; O32-LE-NEXT: ori $3, $zero, 2 +; O32-LE-NEXT: ori $4, $zero, 1 +; O32-LE-NEXT: fill.w $w0, $4 +; O32-LE-NEXT: insert.w $w0[1], $3 +; O32-LE-NEXT: splati.d $w0, $w0[0] +; O32-LE-NEXT: st.w $w0, 0($2) +; O32-LE-NEXT: lw $1, %got($CPI2_1)($1) +; O32-LE-NEXT: addiu $1, $1, %lo($CPI2_1) +; O32-LE-NEXT: ld.w $w0, 0($1) +; O32-LE-NEXT: jr $ra +; O32-LE-NEXT: st.w $w0, 0($2) +; +; N32-BE-LABEL: const_v4i32: +; N32-BE: # %bb.0: +; N32-BE-NEXT: lui $1, %hi(%neg(%gp_rel(const_v4i32))) +; N32-BE-NEXT: addu $1, $1, $25 +; N32-BE-NEXT: addiu $1, $1, %lo(%neg(%gp_rel(const_v4i32))) +; N32-BE-NEXT: ldi.b $w0, 0 +; N32-BE-NEXT: lw $2, %got_disp(v4i32)($1) +; N32-BE-NEXT: st.w $w0, 0($2) +; N32-BE-NEXT: ldi.w $w0, 1 +; N32-BE-NEXT: st.w $w0, 0($2) +; N32-BE-NEXT: lw $3, %got_page(.LCPI2_0)($1) +; N32-BE-NEXT: addiu $3, $3, %got_ofst(.LCPI2_0) +; N32-BE-NEXT: ld.w $w0, 0($3) +; N32-BE-NEXT: st.w $w0, 0($2) +; N32-BE-NEXT: ldi.b $w0, 1 +; N32-BE-NEXT: st.w $w0, 0($2) +; N32-BE-NEXT: ldi.h $w0, 1 +; N32-BE-NEXT: st.w $w0, 0($2) +; N32-BE-NEXT: ori $3, $zero, 2 +; N32-BE-NEXT: ori $4, $zero, 1 +; N32-BE-NEXT: dinsu $3, $4, 32, 32 +; N32-BE-NEXT: fill.d $w0, $3 +; N32-BE-NEXT: st.w $w0, 0($2) +; N32-BE-NEXT: lw $1, %got_page(.LCPI2_1)($1) +; N32-BE-NEXT: addiu $1, $1, %got_ofst(.LCPI2_1) +; N32-BE-NEXT: ld.w $w0, 0($1) +; N32-BE-NEXT: jr $ra +; N32-BE-NEXT: st.w $w0, 0($2) +; +; N32-LE-LABEL: const_v4i32: +; N32-LE: # %bb.0: +; N32-LE-NEXT: lui $1, %hi(%neg(%gp_rel(const_v4i32))) +; N32-LE-NEXT: addu $1, $1, $25 +; N32-LE-NEXT: addiu $1, $1, %lo(%neg(%gp_rel(const_v4i32))) +; N32-LE-NEXT: ldi.b $w0, 0 +; N32-LE-NEXT: lw $2, %got_disp(v4i32)($1) +; N32-LE-NEXT: st.w $w0, 0($2) +; N32-LE-NEXT: ldi.w $w0, 1 +; N32-LE-NEXT: st.w $w0, 0($2) +; N32-LE-NEXT: lw $3, %got_page(.LCPI2_0)($1) +; N32-LE-NEXT: addiu $3, $3, %got_ofst(.LCPI2_0) +; N32-LE-NEXT: ld.w $w0, 0($3) +; N32-LE-NEXT: st.w $w0, 0($2) +; N32-LE-NEXT: ldi.b $w0, 1 +; N32-LE-NEXT: st.w $w0, 0($2) +; N32-LE-NEXT: ldi.h $w0, 1 +; N32-LE-NEXT: st.w $w0, 0($2) +; N32-LE-NEXT: ori $3, $zero, 1 +; N32-LE-NEXT: ori $4, $zero, 2 +; N32-LE-NEXT: dinsu $3, $4, 32, 32 +; N32-LE-NEXT: fill.d $w0, $3 +; N32-LE-NEXT: st.w $w0, 0($2) +; N32-LE-NEXT: lw $1, %got_page(.LCPI2_1)($1) +; N32-LE-NEXT: addiu $1, $1, %got_ofst(.LCPI2_1) +; N32-LE-NEXT: ld.w $w0, 0($1) +; N32-LE-NEXT: jr $ra +; N32-LE-NEXT: st.w $w0, 0($2) +; +; N64-BE-LABEL: const_v4i32: +; N64-BE: # %bb.0: +; N64-BE-NEXT: lui $1, %hi(%neg(%gp_rel(const_v4i32))) +; N64-BE-NEXT: daddu $1, $1, $25 +; N64-BE-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(const_v4i32))) +; N64-BE-NEXT: ldi.b $w0, 0 +; N64-BE-NEXT: ld $2, %got_disp(v4i32)($1) +; N64-BE-NEXT: st.w $w0, 0($2) +; N64-BE-NEXT: ldi.w $w0, 1 +; N64-BE-NEXT: st.w $w0, 0($2) +; N64-BE-NEXT: ld $3, %got_page(.LCPI2_0)($1) +; N64-BE-NEXT: daddiu $3, $3, %got_ofst(.LCPI2_0) +; N64-BE-NEXT: ld.w $w0, 0($3) +; N64-BE-NEXT: st.w $w0, 0($2) +; N64-BE-NEXT: ldi.b $w0, 1 +; N64-BE-NEXT: st.w $w0, 0($2) +; N64-BE-NEXT: ldi.h $w0, 1 +; N64-BE-NEXT: st.w $w0, 0($2) +; N64-BE-NEXT: ori $3, $zero, 2 +; N64-BE-NEXT: ori $4, $zero, 1 +; N64-BE-NEXT: dinsu $3, $4, 32, 32 +; N64-BE-NEXT: fill.d $w0, $3 +; N64-BE-NEXT: st.w $w0, 0($2) +; N64-BE-NEXT: ld $1, %got_page(.LCPI2_1)($1) +; N64-BE-NEXT: daddiu $1, $1, %got_ofst(.LCPI2_1) +; N64-BE-NEXT: ld.w $w0, 0($1) +; N64-BE-NEXT: jr $ra +; N64-BE-NEXT: st.w $w0, 0($2) +; +; N64-LE-LABEL: const_v4i32: +; N64-LE: # %bb.0: +; N64-LE-NEXT: lui $1, %hi(%neg(%gp_rel(const_v4i32))) +; N64-LE-NEXT: daddu $1, $1, $25 +; N64-LE-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(const_v4i32))) +; N64-LE-NEXT: ldi.b $w0, 0 +; N64-LE-NEXT: ld $2, %got_disp(v4i32)($1) +; N64-LE-NEXT: st.w $w0, 0($2) +; N64-LE-NEXT: ldi.w $w0, 1 +; N64-LE-NEXT: st.w $w0, 0($2) +; N64-LE-NEXT: ld $3, %got_page(.LCPI2_0)($1) +; N64-LE-NEXT: daddiu $3, $3, %got_ofst(.LCPI2_0) +; N64-LE-NEXT: ld.w $w0, 0($3) +; N64-LE-NEXT: st.w $w0, 0($2) +; N64-LE-NEXT: ldi.b $w0, 1 +; N64-LE-NEXT: st.w $w0, 0($2) +; N64-LE-NEXT: ldi.h $w0, 1 +; N64-LE-NEXT: st.w $w0, 0($2) +; N64-LE-NEXT: ori $3, $zero, 1 +; N64-LE-NEXT: ori $4, $zero, 2 +; N64-LE-NEXT: dinsu $3, $4, 32, 32 +; N64-LE-NEXT: fill.d $w0, $3 +; N64-LE-NEXT: st.w $w0, 0($2) +; N64-LE-NEXT: ld $1, %got_page(.LCPI2_1)($1) +; N64-LE-NEXT: daddiu $1, $1, %got_ofst(.LCPI2_1) +; N64-LE-NEXT: ld.w $w0, 0($1) +; N64-LE-NEXT: jr $ra +; N64-LE-NEXT: st.w $w0, 0($2) store volatile <4 x i32> , <4 x i32>*@v4i32 - ; ALL: ldi.b [[R1:\$w[0-9]+]], 0 - store volatile <4 x i32> , <4 x i32>*@v4i32 - ; ALL: ldi.w [[R1:\$w[0-9]+]], 1 - store volatile <4 x i32> , <4 x i32>*@v4i32 - ; O32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %lo($ - ; N32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst(.L - ; N64: daddiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst(.L - ; ALL: ld.w [[R1:\$w[0-9]+]], 0([[G_PTR]]) - store volatile <4 x i32> , <4 x i32>*@v4i32 - ; ALL: ldi.b [[R1:\$w[0-9]+]], 1 - store volatile <4 x i32> , <4 x i32>*@v4i32 - ; ALL: ldi.h [[R1:\$w[0-9]+]], 1 - store volatile <4 x i32> , <4 x i32>*@v4i32 - ; -BE-DAG: ori [[R2:\$[0-9]+]], $zero, 1 - ; O32-BE-DAG: ori [[R3:\$[0-9]+]], $zero, 1 - ; O32-BE-DAG: ori [[R4:\$[0-9]+]], $zero, 2 - ; O32-LE-DAG: ori [[R3:\$[0-9]+]], $zero, 2 - ; O32-LE-DAG: ori [[R4:\$[0-9]+]], $zero, 1 - ; O32: fill.w [[W0:\$w[0-9]+]], [[R4]] - ; O32: insert.w [[W0]][1], [[R3]] - ; O32: splati.d [[W1:\$w[0-9]+]], [[W0]] - - ; MIPS64-DAG: ori [[R5:\$[0-9]+]], $zero, 2 - ; MIPS64-DAG: ori [[R6:\$[0-9]+]], $zero, 1 - - ; MIPS64-BE: dinsu [[R5]], [[R6]], 32, 32 - ; MIPS64-LE: dinsu [[R6]], [[R5]], 32, 32 - ; MIPS64-BE: fill.d $w{{.*}}, [[R4]] - ; MIPS64-LE: fill.d $w{{.*}}, [[R2]] - - store volatile <4 x i32> , <4 x i32>*@v4i32 - ; O32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %lo($ - ; N32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst(.L - ; N64: daddiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst(.L - ; ALL: ld.w [[R1:\$w[0-9]+]], 0([[G_PTR]]) - ret void } define void @const_v2i64() nounwind { - ; ALL-LABEL: const_v2i64: - +; O32-LABEL: const_v2i64: +; O32: # %bb.0: +; O32-NEXT: lui $2, %hi(_gp_disp) +; O32-NEXT: addiu $2, $2, %lo(_gp_disp) +; O32-NEXT: addu $1, $2, $25 +; O32-NEXT: ldi.b $w0, 0 +; O32-NEXT: lw $2, %got(v2i64)($1) +; O32-NEXT: st.w $w0, 0($2) +; O32-NEXT: ldi.b $w0, 1 +; O32-NEXT: st.w $w0, 0($2) +; O32-NEXT: ldi.h $w0, 1 +; O32-NEXT: st.w $w0, 0($2) +; O32-NEXT: ldi.w $w0, 1 +; O32-NEXT: st.w $w0, 0($2) +; O32-NEXT: ldi.d $w0, 1 +; O32-NEXT: st.w $w0, 0($2) +; O32-NEXT: lw $3, %got($CPI3_0)($1) +; O32-NEXT: addiu $3, $3, %lo($CPI3_0) +; O32-NEXT: ld.w $w0, 0($3) +; O32-NEXT: st.w $w0, 0($2) +; O32-NEXT: lw $1, %got($CPI3_1)($1) +; O32-NEXT: addiu $1, $1, %lo($CPI3_1) +; O32-NEXT: ld.w $w0, 0($1) +; O32-NEXT: jr $ra +; O32-NEXT: st.w $w0, 0($2) +; +; N32-LABEL: const_v2i64: +; N32: # %bb.0: +; N32-NEXT: lui $1, %hi(%neg(%gp_rel(const_v2i64))) +; N32-NEXT: addu $1, $1, $25 +; N32-NEXT: addiu $1, $1, %lo(%neg(%gp_rel(const_v2i64))) +; N32-NEXT: ldi.b $w0, 0 +; N32-NEXT: lw $2, %got_disp(v2i64)($1) +; N32-NEXT: st.d $w0, 0($2) +; N32-NEXT: ldi.b $w0, 1 +; N32-NEXT: st.d $w0, 0($2) +; N32-NEXT: ldi.h $w0, 1 +; N32-NEXT: st.d $w0, 0($2) +; N32-NEXT: ldi.w $w0, 1 +; N32-NEXT: st.d $w0, 0($2) +; N32-NEXT: ldi.d $w0, 1 +; N32-NEXT: st.d $w0, 0($2) +; N32-NEXT: lw $3, %got_page(.LCPI3_0)($1) +; N32-NEXT: addiu $3, $3, %got_ofst(.LCPI3_0) +; N32-NEXT: ld.d $w0, 0($3) +; N32-NEXT: st.d $w0, 0($2) +; N32-NEXT: lw $1, %got_page(.LCPI3_1)($1) +; N32-NEXT: addiu $1, $1, %got_ofst(.LCPI3_1) +; N32-NEXT: ld.d $w0, 0($1) +; N32-NEXT: jr $ra +; N32-NEXT: st.d $w0, 0($2) +; +; N64-LABEL: const_v2i64: +; N64: # %bb.0: +; N64-NEXT: lui $1, %hi(%neg(%gp_rel(const_v2i64))) +; N64-NEXT: daddu $1, $1, $25 +; N64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(const_v2i64))) +; N64-NEXT: ldi.b $w0, 0 +; N64-NEXT: ld $2, %got_disp(v2i64)($1) +; N64-NEXT: st.d $w0, 0($2) +; N64-NEXT: ldi.b $w0, 1 +; N64-NEXT: st.d $w0, 0($2) +; N64-NEXT: ldi.h $w0, 1 +; N64-NEXT: st.d $w0, 0($2) +; N64-NEXT: ldi.w $w0, 1 +; N64-NEXT: st.d $w0, 0($2) +; N64-NEXT: ldi.d $w0, 1 +; N64-NEXT: st.d $w0, 0($2) +; N64-NEXT: ld $3, %got_page(.LCPI3_0)($1) +; N64-NEXT: daddiu $3, $3, %got_ofst(.LCPI3_0) +; N64-NEXT: ld.d $w0, 0($3) +; N64-NEXT: st.d $w0, 0($2) +; N64-NEXT: ld $1, %got_page(.LCPI3_1)($1) +; N64-NEXT: daddiu $1, $1, %got_ofst(.LCPI3_1) +; N64-NEXT: ld.d $w0, 0($1) +; N64-NEXT: jr $ra +; N64-NEXT: st.d $w0, 0($2) store volatile <2 x i64> , <2 x i64>*@v2i64 - ; ALL: ldi.b [[R1:\$w[0-9]+]], 0 - store volatile <2 x i64> , <2 x i64>*@v2i64 - ; ALL: ldi.b [[R1:\$w[0-9]+]], 1 - store volatile <2 x i64> , <2 x i64>*@v2i64 - ; ALL: ldi.h [[R1:\$w[0-9]+]], 1 - store volatile <2 x i64> , <2 x i64>*@v2i64 - ; ALL: ldi.w [[R1:\$w[0-9]+]], 1 - store volatile <2 x i64> , <2 x i64>*@v2i64 - ; ALL: ldi.d [[R1:\$w[0-9]+]], 1 - store volatile <2 x i64> , <2 x i64>*@v2i64 - ; O32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %lo($ - ; N32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst(.L - ; N64: daddiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst(.L - ; MIPS32: ld.w [[R1:\$w[0-9]+]], 0([[G_PTR]]) - ; MIPS64: ld.d [[R1:\$w[0-9]+]], 0([[G_PTR]]) - store volatile <2 x i64> , <2 x i64>*@v2i64 - ; O32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %lo($ - ; N32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst(.L - ; N64: daddiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst(.L - ; MIPS32: ld.w [[R1:\$w[0-9]+]], 0([[G_PTR]]) - ; MIPS64: ld.d [[R1:\$w[0-9]+]], 0([[G_PTR]]) - ret void } define void @nonconst_v16i8(i8 signext %a, i8 signext %b, i8 signext %c, i8 signext %d, i8 signext %e, i8 signext %f, i8 signext %g, i8 signext %h) nounwind { - ; ALL-LABEL: nonconst_v16i8: - +; O32-LABEL: nonconst_v16i8: +; O32: # %bb.0: +; O32-NEXT: lui $2, %hi(_gp_disp) +; O32-NEXT: addiu $2, $2, %lo(_gp_disp) +; O32-NEXT: addu $1, $2, $25 +; O32-NEXT: insert.b $w0[0], $4 +; O32-NEXT: insert.b $w0[1], $5 +; O32-NEXT: insert.b $w0[2], $6 +; O32-NEXT: insert.b $w0[3], $7 +; O32-NEXT: lw $2, 16($sp) +; O32-NEXT: insert.b $w0[4], $2 +; O32-NEXT: lw $2, 20($sp) +; O32-NEXT: insert.b $w0[5], $2 +; O32-NEXT: lw $2, 28($sp) +; O32-NEXT: lw $3, 24($sp) +; O32-NEXT: lw $1, %got(v16i8)($1) +; O32-NEXT: insert.b $w0[6], $3 +; O32-NEXT: insert.b $w0[7], $2 +; O32-NEXT: insert.b $w0[8], $2 +; O32-NEXT: insert.b $w0[9], $2 +; O32-NEXT: insert.b $w0[10], $2 +; O32-NEXT: insert.b $w0[11], $2 +; O32-NEXT: insert.b $w0[12], $2 +; O32-NEXT: insert.b $w0[13], $2 +; O32-NEXT: insert.b $w0[14], $2 +; O32-NEXT: insert.b $w0[15], $2 +; O32-NEXT: jr $ra +; O32-NEXT: st.b $w0, 0($1) +; +; N32-LABEL: nonconst_v16i8: +; N32: # %bb.0: +; N32-NEXT: lui $1, %hi(%neg(%gp_rel(nonconst_v16i8))) +; N32-NEXT: addu $1, $1, $25 +; N32-NEXT: addiu $1, $1, %lo(%neg(%gp_rel(nonconst_v16i8))) +; N32-NEXT: insert.b $w0[0], $4 +; N32-NEXT: insert.b $w0[1], $5 +; N32-NEXT: insert.b $w0[2], $6 +; N32-NEXT: insert.b $w0[3], $7 +; N32-NEXT: insert.b $w0[4], $8 +; N32-NEXT: lw $1, %got_disp(v16i8)($1) +; N32-NEXT: insert.b $w0[5], $9 +; N32-NEXT: insert.b $w0[6], $10 +; N32-NEXT: insert.b $w0[7], $11 +; N32-NEXT: insert.b $w0[8], $11 +; N32-NEXT: insert.b $w0[9], $11 +; N32-NEXT: insert.b $w0[10], $11 +; N32-NEXT: insert.b $w0[11], $11 +; N32-NEXT: insert.b $w0[12], $11 +; N32-NEXT: insert.b $w0[13], $11 +; N32-NEXT: insert.b $w0[14], $11 +; N32-NEXT: insert.b $w0[15], $11 +; N32-NEXT: jr $ra +; N32-NEXT: st.b $w0, 0($1) +; +; N64-LABEL: nonconst_v16i8: +; N64: # %bb.0: +; N64-NEXT: lui $1, %hi(%neg(%gp_rel(nonconst_v16i8))) +; N64-NEXT: daddu $1, $1, $25 +; N64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(nonconst_v16i8))) +; N64-NEXT: insert.b $w0[0], $4 +; N64-NEXT: insert.b $w0[1], $5 +; N64-NEXT: insert.b $w0[2], $6 +; N64-NEXT: insert.b $w0[3], $7 +; N64-NEXT: insert.b $w0[4], $8 +; N64-NEXT: ld $1, %got_disp(v16i8)($1) +; N64-NEXT: insert.b $w0[5], $9 +; N64-NEXT: insert.b $w0[6], $10 +; N64-NEXT: insert.b $w0[7], $11 +; N64-NEXT: insert.b $w0[8], $11 +; N64-NEXT: insert.b $w0[9], $11 +; N64-NEXT: insert.b $w0[10], $11 +; N64-NEXT: insert.b $w0[11], $11 +; N64-NEXT: insert.b $w0[12], $11 +; N64-NEXT: insert.b $w0[13], $11 +; N64-NEXT: insert.b $w0[14], $11 +; N64-NEXT: insert.b $w0[15], $11 +; N64-NEXT: jr $ra +; N64-NEXT: st.b $w0, 0($1) %1 = insertelement <16 x i8> undef, i8 %a, i32 0 %2 = insertelement <16 x i8> %1, i8 %b, i32 1 %3 = insertelement <16 x i8> %2, i8 %c, i32 2 @@ -221,39 +764,65 @@ define void @nonconst_v16i8(i8 signext %a, i8 signext %b, i8 signext %c, i8 sign %14 = insertelement <16 x i8> %13, i8 %h, i32 13 %15 = insertelement <16 x i8> %14, i8 %h, i32 14 %16 = insertelement <16 x i8> %15, i8 %h, i32 15 - ; ALL-DAG: insert.b [[R1:\$w[0-9]+]][0], $4 - ; ALL-DAG: insert.b [[R1]][1], $5 - ; ALL-DAG: insert.b [[R1]][2], $6 - ; ALL-DAG: insert.b [[R1]][3], $7 - ; MIPS32-DAG: lw [[R2:\$[0-9]+]], 16($sp) - ; MIPS32-DAG: insert.b [[R1]][4], [[R2]] - ; MIPS64-DAG: insert.b [[R1]][4], $8 - ; MIPS32-DAG: lw [[R3:\$[0-9]+]], 20($sp) - ; MIPS32-DAG: insert.b [[R1]][5], [[R3]] - ; MIPS64-DAG: insert.b [[R1]][5], $9 - ; MIPS32-DAG: lw [[R4:\$[0-9]+]], 24($sp) - ; MIPS32-DAG: insert.b [[R1]][6], [[R4]] - ; MIPS64-DAG: insert.b [[R1]][6], $10 - ; MIPS32-DAG: lw [[R5:\$[0-9]+]], 28($sp) - ; MIPS32-DAG: insert.b [[R1]][7], [[R5]] - ; MIPS64-DAG: insert.b [[R1]][7], [[R5:\$11]] - ; ALL-DAG: insert.b [[R1]][8], [[R5]] - ; ALL-DAG: insert.b [[R1]][9], [[R5]] - ; ALL-DAG: insert.b [[R1]][10], [[R5]] - ; ALL-DAG: insert.b [[R1]][11], [[R5]] - ; ALL-DAG: insert.b [[R1]][12], [[R5]] - ; ALL-DAG: insert.b [[R1]][13], [[R5]] - ; ALL-DAG: insert.b [[R1]][14], [[R5]] - ; ALL-DAG: insert.b [[R1]][15], [[R5]] - store volatile <16 x i8> %16, <16 x i8>*@v16i8 - ret void } define void @nonconst_v8i16(i16 signext %a, i16 signext %b, i16 signext %c, i16 signext %d, i16 signext %e, i16 signext %f, i16 signext %g, i16 signext %h) nounwind { - ; ALL-LABEL: nonconst_v8i16: - +; O32-LABEL: nonconst_v8i16: +; O32: # %bb.0: +; O32-NEXT: lui $2, %hi(_gp_disp) +; O32-NEXT: addiu $2, $2, %lo(_gp_disp) +; O32-NEXT: addu $1, $2, $25 +; O32-NEXT: insert.h $w0[0], $4 +; O32-NEXT: insert.h $w0[1], $5 +; O32-NEXT: insert.h $w0[2], $6 +; O32-NEXT: insert.h $w0[3], $7 +; O32-NEXT: lw $2, 16($sp) +; O32-NEXT: insert.h $w0[4], $2 +; O32-NEXT: lw $2, 20($sp) +; O32-NEXT: insert.h $w0[5], $2 +; O32-NEXT: lw $1, %got(v8i16)($1) +; O32-NEXT: lw $2, 28($sp) +; O32-NEXT: lw $3, 24($sp) +; O32-NEXT: insert.h $w0[6], $3 +; O32-NEXT: insert.h $w0[7], $2 +; O32-NEXT: jr $ra +; O32-NEXT: st.h $w0, 0($1) +; +; N32-LABEL: nonconst_v8i16: +; N32: # %bb.0: +; N32-NEXT: lui $1, %hi(%neg(%gp_rel(nonconst_v8i16))) +; N32-NEXT: addu $1, $1, $25 +; N32-NEXT: addiu $1, $1, %lo(%neg(%gp_rel(nonconst_v8i16))) +; N32-NEXT: insert.h $w0[0], $4 +; N32-NEXT: insert.h $w0[1], $5 +; N32-NEXT: insert.h $w0[2], $6 +; N32-NEXT: insert.h $w0[3], $7 +; N32-NEXT: insert.h $w0[4], $8 +; N32-NEXT: lw $1, %got_disp(v8i16)($1) +; N32-NEXT: insert.h $w0[5], $9 +; N32-NEXT: insert.h $w0[6], $10 +; N32-NEXT: insert.h $w0[7], $11 +; N32-NEXT: jr $ra +; N32-NEXT: st.h $w0, 0($1) +; +; N64-LABEL: nonconst_v8i16: +; N64: # %bb.0: +; N64-NEXT: lui $1, %hi(%neg(%gp_rel(nonconst_v8i16))) +; N64-NEXT: daddu $1, $1, $25 +; N64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(nonconst_v8i16))) +; N64-NEXT: insert.h $w0[0], $4 +; N64-NEXT: insert.h $w0[1], $5 +; N64-NEXT: insert.h $w0[2], $6 +; N64-NEXT: insert.h $w0[3], $7 +; N64-NEXT: insert.h $w0[4], $8 +; N64-NEXT: ld $1, %got_disp(v8i16)($1) +; N64-NEXT: insert.h $w0[5], $9 +; N64-NEXT: insert.h $w0[6], $10 +; N64-NEXT: insert.h $w0[7], $11 +; N64-NEXT: jr $ra +; N64-NEXT: st.h $w0, 0($1) %1 = insertelement <8 x i16> undef, i16 %a, i32 0 %2 = insertelement <8 x i16> %1, i16 %b, i32 1 %3 = insertelement <8 x i16> %2, i16 %c, i32 2 @@ -262,650 +831,1356 @@ define void @nonconst_v8i16(i16 signext %a, i16 signext %b, i16 signext %c, i16 %6 = insertelement <8 x i16> %5, i16 %f, i32 5 %7 = insertelement <8 x i16> %6, i16 %g, i32 6 %8 = insertelement <8 x i16> %7, i16 %h, i32 7 - ; ALL-DAG: insert.h [[R1:\$w[0-9]+]][0], $4 - ; ALL-DAG: insert.h [[R1]][1], $5 - ; ALL-DAG: insert.h [[R1]][2], $6 - ; ALL-DAG: insert.h [[R1]][3], $7 - ; MIPS32-DAG: lw [[R2:\$[0-9]+]], 16($sp) - ; MIPS32-DAG: insert.h [[R1]][4], [[R2]] - ; MIPS64-DAG: insert.h [[R1]][4], $8 - ; MIPS32-DAG: lw [[R2:\$[0-9]+]], 20($sp) - ; MIPS32-DAG: insert.h [[R1]][5], [[R2]] - ; MIPS64-DAG: insert.h [[R1]][5], $9 - ; MIPS32-DAG: lw [[R2:\$[0-9]+]], 24($sp) - ; MIPS32-DAG: insert.h [[R1]][6], [[R2]] - ; MIPS64-DAG: insert.h [[R1]][6], $10 - ; MIPS32-DAG: lw [[R2:\$[0-9]+]], 28($sp) - ; MIPS32-DAG: insert.h [[R1]][7], [[R2]] - ; MIPS64-DAG: insert.h [[R1]][7], $11 - store volatile <8 x i16> %8, <8 x i16>*@v8i16 - ret void } define void @nonconst_v4i32(i32 signext %a, i32 signext %b, i32 signext %c, i32 signext %d) nounwind { - ; ALL-LABEL: nonconst_v4i32: - +; O32-LABEL: nonconst_v4i32: +; O32: # %bb.0: +; O32-NEXT: lui $2, %hi(_gp_disp) +; O32-NEXT: addiu $2, $2, %lo(_gp_disp) +; O32-NEXT: addu $1, $2, $25 +; O32-NEXT: insert.w $w0[0], $4 +; O32-NEXT: insert.w $w0[1], $5 +; O32-NEXT: insert.w $w0[2], $6 +; O32-NEXT: insert.w $w0[3], $7 +; O32-NEXT: lw $1, %got(v4i32)($1) +; O32-NEXT: jr $ra +; O32-NEXT: st.w $w0, 0($1) +; +; N32-LABEL: nonconst_v4i32: +; N32: # %bb.0: +; N32-NEXT: lui $1, %hi(%neg(%gp_rel(nonconst_v4i32))) +; N32-NEXT: addu $1, $1, $25 +; N32-NEXT: addiu $1, $1, %lo(%neg(%gp_rel(nonconst_v4i32))) +; N32-NEXT: insert.w $w0[0], $4 +; N32-NEXT: insert.w $w0[1], $5 +; N32-NEXT: insert.w $w0[2], $6 +; N32-NEXT: insert.w $w0[3], $7 +; N32-NEXT: lw $1, %got_disp(v4i32)($1) +; N32-NEXT: jr $ra +; N32-NEXT: st.w $w0, 0($1) +; +; N64-LABEL: nonconst_v4i32: +; N64: # %bb.0: +; N64-NEXT: lui $1, %hi(%neg(%gp_rel(nonconst_v4i32))) +; N64-NEXT: daddu $1, $1, $25 +; N64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(nonconst_v4i32))) +; N64-NEXT: insert.w $w0[0], $4 +; N64-NEXT: insert.w $w0[1], $5 +; N64-NEXT: insert.w $w0[2], $6 +; N64-NEXT: insert.w $w0[3], $7 +; N64-NEXT: ld $1, %got_disp(v4i32)($1) +; N64-NEXT: jr $ra +; N64-NEXT: st.w $w0, 0($1) %1 = insertelement <4 x i32> undef, i32 %a, i32 0 %2 = insertelement <4 x i32> %1, i32 %b, i32 1 %3 = insertelement <4 x i32> %2, i32 %c, i32 2 %4 = insertelement <4 x i32> %3, i32 %d, i32 3 - ; ALL: insert.w [[R1:\$w[0-9]+]][0], $4 - ; ALL: insert.w [[R1]][1], $5 - ; ALL: insert.w [[R1]][2], $6 - ; ALL: insert.w [[R1]][3], $7 - store volatile <4 x i32> %4, <4 x i32>*@v4i32 - ret void } define void @nonconst_v2i64(i64 signext %a, i64 signext %b) nounwind { - ; ALL-LABEL: nonconst_v2i64: - +; O32-LABEL: nonconst_v2i64: +; O32: # %bb.0: +; O32-NEXT: lui $2, %hi(_gp_disp) +; O32-NEXT: addiu $2, $2, %lo(_gp_disp) +; O32-NEXT: addu $1, $2, $25 +; O32-NEXT: insert.w $w0[0], $4 +; O32-NEXT: insert.w $w0[1], $5 +; O32-NEXT: insert.w $w0[2], $6 +; O32-NEXT: insert.w $w0[3], $7 +; O32-NEXT: lw $1, %got(v2i64)($1) +; O32-NEXT: jr $ra +; O32-NEXT: st.w $w0, 0($1) +; +; N32-LABEL: nonconst_v2i64: +; N32: # %bb.0: +; N32-NEXT: lui $1, %hi(%neg(%gp_rel(nonconst_v2i64))) +; N32-NEXT: addu $1, $1, $25 +; N32-NEXT: addiu $1, $1, %lo(%neg(%gp_rel(nonconst_v2i64))) +; N32-NEXT: insert.d $w0[0], $4 +; N32-NEXT: insert.d $w0[1], $5 +; N32-NEXT: lw $1, %got_disp(v2i64)($1) +; N32-NEXT: jr $ra +; N32-NEXT: st.d $w0, 0($1) +; +; N64-LABEL: nonconst_v2i64: +; N64: # %bb.0: +; N64-NEXT: lui $1, %hi(%neg(%gp_rel(nonconst_v2i64))) +; N64-NEXT: daddu $1, $1, $25 +; N64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(nonconst_v2i64))) +; N64-NEXT: insert.d $w0[0], $4 +; N64-NEXT: insert.d $w0[1], $5 +; N64-NEXT: ld $1, %got_disp(v2i64)($1) +; N64-NEXT: jr $ra +; N64-NEXT: st.d $w0, 0($1) %1 = insertelement <2 x i64> undef, i64 %a, i32 0 %2 = insertelement <2 x i64> %1, i64 %b, i32 1 - ; MIPS32: insert.w [[R1:\$w[0-9]+]][0], $4 - ; MIPS32: insert.w [[R1]][1], $5 - ; MIPS32: insert.w [[R1]][2], $6 - ; MIPS32: insert.w [[R1]][3], $7 - ; MIPS64: insert.d [[R1:\$w[0-9]+]][0], $4 - ; MIPS64: insert.d [[R1]][1], $5 - store volatile <2 x i64> %2, <2 x i64>*@v2i64 - ret void } define i32 @extract_sext_v16i8() nounwind { - ; ALL-LABEL: extract_sext_v16i8: - +; O32-LABEL: extract_sext_v16i8: +; O32: # %bb.0: +; O32-NEXT: lui $2, %hi(_gp_disp) +; O32-NEXT: addiu $2, $2, %lo(_gp_disp) +; O32-NEXT: addu $1, $2, $25 +; O32-NEXT: lw $1, %got(v16i8)($1) +; O32-NEXT: ld.b $w0, 0($1) +; O32-NEXT: addv.b $w0, $w0, $w0 +; O32-NEXT: copy_s.b $1, $w0[1] +; O32-NEXT: jr $ra +; O32-NEXT: seb $2, $1 +; +; N32-LABEL: extract_sext_v16i8: +; N32: # %bb.0: +; N32-NEXT: lui $1, %hi(%neg(%gp_rel(extract_sext_v16i8))) +; N32-NEXT: addu $1, $1, $25 +; N32-NEXT: addiu $1, $1, %lo(%neg(%gp_rel(extract_sext_v16i8))) +; N32-NEXT: lw $1, %got_disp(v16i8)($1) +; N32-NEXT: ld.b $w0, 0($1) +; N32-NEXT: addv.b $w0, $w0, $w0 +; N32-NEXT: copy_s.b $1, $w0[1] +; N32-NEXT: jr $ra +; N32-NEXT: seb $2, $1 +; +; N64-LABEL: extract_sext_v16i8: +; N64: # %bb.0: +; N64-NEXT: lui $1, %hi(%neg(%gp_rel(extract_sext_v16i8))) +; N64-NEXT: daddu $1, $1, $25 +; N64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(extract_sext_v16i8))) +; N64-NEXT: ld $1, %got_disp(v16i8)($1) +; N64-NEXT: ld.b $w0, 0($1) +; N64-NEXT: addv.b $w0, $w0, $w0 +; N64-NEXT: copy_s.b $1, $w0[1] +; N64-NEXT: jr $ra +; N64-NEXT: seb $2, $1 %1 = load <16 x i8>, <16 x i8>* @v16i8 - ; ALL-DAG: ld.b [[R1:\$w[0-9]+]], - %2 = add <16 x i8> %1, %1 - ; ALL-DAG: addv.b [[R2:\$w[0-9]+]], [[R1]], [[R1]] - %3 = extractelement <16 x i8> %2, i32 1 %4 = sext i8 %3 to i32 - ; ALL-DAG: copy_s.b [[R3:\$[0-9]+]], [[R1]][1] - ; ALL-NOT: sll - ; ALL-NOT: sra - ret i32 %4 } define i32 @extract_sext_v8i16() nounwind { - ; ALL-LABEL: extract_sext_v8i16: - +; O32-LABEL: extract_sext_v8i16: +; O32: # %bb.0: +; O32-NEXT: lui $2, %hi(_gp_disp) +; O32-NEXT: addiu $2, $2, %lo(_gp_disp) +; O32-NEXT: addu $1, $2, $25 +; O32-NEXT: lw $1, %got(v8i16)($1) +; O32-NEXT: ld.h $w0, 0($1) +; O32-NEXT: addv.h $w0, $w0, $w0 +; O32-NEXT: copy_s.h $1, $w0[1] +; O32-NEXT: jr $ra +; O32-NEXT: seh $2, $1 +; +; N32-LABEL: extract_sext_v8i16: +; N32: # %bb.0: +; N32-NEXT: lui $1, %hi(%neg(%gp_rel(extract_sext_v8i16))) +; N32-NEXT: addu $1, $1, $25 +; N32-NEXT: addiu $1, $1, %lo(%neg(%gp_rel(extract_sext_v8i16))) +; N32-NEXT: lw $1, %got_disp(v8i16)($1) +; N32-NEXT: ld.h $w0, 0($1) +; N32-NEXT: addv.h $w0, $w0, $w0 +; N32-NEXT: copy_s.h $1, $w0[1] +; N32-NEXT: jr $ra +; N32-NEXT: seh $2, $1 +; +; N64-LABEL: extract_sext_v8i16: +; N64: # %bb.0: +; N64-NEXT: lui $1, %hi(%neg(%gp_rel(extract_sext_v8i16))) +; N64-NEXT: daddu $1, $1, $25 +; N64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(extract_sext_v8i16))) +; N64-NEXT: ld $1, %got_disp(v8i16)($1) +; N64-NEXT: ld.h $w0, 0($1) +; N64-NEXT: addv.h $w0, $w0, $w0 +; N64-NEXT: copy_s.h $1, $w0[1] +; N64-NEXT: jr $ra +; N64-NEXT: seh $2, $1 %1 = load <8 x i16>, <8 x i16>* @v8i16 - ; ALL-DAG: ld.h [[R1:\$w[0-9]+]], - %2 = add <8 x i16> %1, %1 - ; ALL-DAG: addv.h [[R2:\$w[0-9]+]], [[R1]], [[R1]] - %3 = extractelement <8 x i16> %2, i32 1 %4 = sext i16 %3 to i32 - ; ALL-DAG: copy_s.h [[R3:\$[0-9]+]], [[R1]][1] - ; ALL-NOT: sll - ; ALL-NOT: sra - ret i32 %4 } define i32 @extract_sext_v4i32() nounwind { - ; ALL-LABEL: extract_sext_v4i32: - +; O32-LABEL: extract_sext_v4i32: +; O32: # %bb.0: +; O32-NEXT: lui $2, %hi(_gp_disp) +; O32-NEXT: addiu $2, $2, %lo(_gp_disp) +; O32-NEXT: addu $1, $2, $25 +; O32-NEXT: lw $1, %got(v4i32)($1) +; O32-NEXT: ld.w $w0, 0($1) +; O32-NEXT: addv.w $w0, $w0, $w0 +; O32-NEXT: jr $ra +; O32-NEXT: copy_s.w $2, $w0[1] +; +; N32-LABEL: extract_sext_v4i32: +; N32: # %bb.0: +; N32-NEXT: lui $1, %hi(%neg(%gp_rel(extract_sext_v4i32))) +; N32-NEXT: addu $1, $1, $25 +; N32-NEXT: addiu $1, $1, %lo(%neg(%gp_rel(extract_sext_v4i32))) +; N32-NEXT: lw $1, %got_disp(v4i32)($1) +; N32-NEXT: ld.w $w0, 0($1) +; N32-NEXT: addv.w $w0, $w0, $w0 +; N32-NEXT: jr $ra +; N32-NEXT: copy_s.w $2, $w0[1] +; +; N64-LABEL: extract_sext_v4i32: +; N64: # %bb.0: +; N64-NEXT: lui $1, %hi(%neg(%gp_rel(extract_sext_v4i32))) +; N64-NEXT: daddu $1, $1, $25 +; N64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(extract_sext_v4i32))) +; N64-NEXT: ld $1, %got_disp(v4i32)($1) +; N64-NEXT: ld.w $w0, 0($1) +; N64-NEXT: addv.w $w0, $w0, $w0 +; N64-NEXT: jr $ra +; N64-NEXT: copy_s.w $2, $w0[1] %1 = load <4 x i32>, <4 x i32>* @v4i32 - ; ALL-DAG: ld.w [[R1:\$w[0-9]+]], - %2 = add <4 x i32> %1, %1 - ; ALL-DAG: addv.w [[R2:\$w[0-9]+]], [[R1]], [[R1]] - %3 = extractelement <4 x i32> %2, i32 1 - ; ALL-DAG: copy_s.w [[R3:\$[0-9]+]], [[R1]][1] - ret i32 %3 } define i64 @extract_sext_v2i64() nounwind { - ; ALL-LABEL: extract_sext_v2i64: - +; O32-BE-LABEL: extract_sext_v2i64: +; O32-BE: # %bb.0: +; O32-BE-NEXT: lui $2, %hi(_gp_disp) +; O32-BE-NEXT: addiu $2, $2, %lo(_gp_disp) +; O32-BE-NEXT: addu $1, $2, $25 +; O32-BE-NEXT: lw $1, %got(v2i64)($1) +; O32-BE-NEXT: ld.d $w0, 0($1) +; O32-BE-NEXT: addv.d $w0, $w0, $w0 +; O32-BE-NEXT: shf.w $w0, $w0, 177 +; O32-BE-NEXT: copy_s.w $2, $w0[2] +; O32-BE-NEXT: jr $ra +; O32-BE-NEXT: copy_s.w $3, $w0[3] +; +; O32-LE-LABEL: extract_sext_v2i64: +; O32-LE: # %bb.0: +; O32-LE-NEXT: lui $2, %hi(_gp_disp) +; O32-LE-NEXT: addiu $2, $2, %lo(_gp_disp) +; O32-LE-NEXT: addu $1, $2, $25 +; O32-LE-NEXT: lw $1, %got(v2i64)($1) +; O32-LE-NEXT: ld.d $w0, 0($1) +; O32-LE-NEXT: addv.d $w0, $w0, $w0 +; O32-LE-NEXT: copy_s.w $2, $w0[2] +; O32-LE-NEXT: jr $ra +; O32-LE-NEXT: copy_s.w $3, $w0[3] +; +; N32-LABEL: extract_sext_v2i64: +; N32: # %bb.0: +; N32-NEXT: lui $1, %hi(%neg(%gp_rel(extract_sext_v2i64))) +; N32-NEXT: addu $1, $1, $25 +; N32-NEXT: addiu $1, $1, %lo(%neg(%gp_rel(extract_sext_v2i64))) +; N32-NEXT: lw $1, %got_disp(v2i64)($1) +; N32-NEXT: ld.d $w0, 0($1) +; N32-NEXT: addv.d $w0, $w0, $w0 +; N32-NEXT: jr $ra +; N32-NEXT: copy_s.d $2, $w0[1] +; +; N64-LABEL: extract_sext_v2i64: +; N64: # %bb.0: +; N64-NEXT: lui $1, %hi(%neg(%gp_rel(extract_sext_v2i64))) +; N64-NEXT: daddu $1, $1, $25 +; N64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(extract_sext_v2i64))) +; N64-NEXT: ld $1, %got_disp(v2i64)($1) +; N64-NEXT: ld.d $w0, 0($1) +; N64-NEXT: addv.d $w0, $w0, $w0 +; N64-NEXT: jr $ra +; N64-NEXT: copy_s.d $2, $w0[1] %1 = load <2 x i64>, <2 x i64>* @v2i64 - ; ALL-DAG: ld.d [[R1:\$w[0-9]+]], - %2 = add <2 x i64> %1, %1 - ; ALL-DAG: addv.d [[R2:\$w[0-9]+]], [[R1]], [[R1]] - %3 = extractelement <2 x i64> %2, i32 1 - ; MIPS32-DAG: copy_s.w [[R3:\$[0-9]+]], [[R1]][2] - ; MIPS32-DAG: copy_s.w [[R4:\$[0-9]+]], [[R1]][3] - ; MIPS64-DAG: copy_s.d [[R3:\$[0-9]+]], [[R1]][1] - ; ALL-NOT: sll - ; ALL-NOT: sra - ret i64 %3 } define i32 @extract_zext_v16i8() nounwind { - ; ALL-LABEL: extract_zext_v16i8: - +; O32-LABEL: extract_zext_v16i8: +; O32: # %bb.0: +; O32-NEXT: lui $2, %hi(_gp_disp) +; O32-NEXT: addiu $2, $2, %lo(_gp_disp) +; O32-NEXT: addu $1, $2, $25 +; O32-NEXT: lw $1, %got(v16i8)($1) +; O32-NEXT: ld.b $w0, 0($1) +; O32-NEXT: addv.b $w0, $w0, $w0 +; O32-NEXT: jr $ra +; O32-NEXT: copy_u.b $2, $w0[1] +; +; N32-LABEL: extract_zext_v16i8: +; N32: # %bb.0: +; N32-NEXT: lui $1, %hi(%neg(%gp_rel(extract_zext_v16i8))) +; N32-NEXT: addu $1, $1, $25 +; N32-NEXT: addiu $1, $1, %lo(%neg(%gp_rel(extract_zext_v16i8))) +; N32-NEXT: lw $1, %got_disp(v16i8)($1) +; N32-NEXT: ld.b $w0, 0($1) +; N32-NEXT: addv.b $w0, $w0, $w0 +; N32-NEXT: jr $ra +; N32-NEXT: copy_u.b $2, $w0[1] +; +; N64-LABEL: extract_zext_v16i8: +; N64: # %bb.0: +; N64-NEXT: lui $1, %hi(%neg(%gp_rel(extract_zext_v16i8))) +; N64-NEXT: daddu $1, $1, $25 +; N64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(extract_zext_v16i8))) +; N64-NEXT: ld $1, %got_disp(v16i8)($1) +; N64-NEXT: ld.b $w0, 0($1) +; N64-NEXT: addv.b $w0, $w0, $w0 +; N64-NEXT: jr $ra +; N64-NEXT: copy_u.b $2, $w0[1] %1 = load <16 x i8>, <16 x i8>* @v16i8 - ; ALL-DAG: ld.b [[R1:\$w[0-9]+]], - %2 = add <16 x i8> %1, %1 - ; ALL-DAG: addv.b [[R2:\$w[0-9]+]], [[R1]], [[R1]] - %3 = extractelement <16 x i8> %2, i32 1 %4 = zext i8 %3 to i32 - ; ALL-DAG: copy_u.b [[R3:\$[0-9]+]], [[R1]][1] - ; ALL-NOT: andi - ret i32 %4 } define i32 @extract_zext_v8i16() nounwind { - ; ALL-LABEL: extract_zext_v8i16: - +; O32-LABEL: extract_zext_v8i16: +; O32: # %bb.0: +; O32-NEXT: lui $2, %hi(_gp_disp) +; O32-NEXT: addiu $2, $2, %lo(_gp_disp) +; O32-NEXT: addu $1, $2, $25 +; O32-NEXT: lw $1, %got(v8i16)($1) +; O32-NEXT: ld.h $w0, 0($1) +; O32-NEXT: addv.h $w0, $w0, $w0 +; O32-NEXT: jr $ra +; O32-NEXT: copy_u.h $2, $w0[1] +; +; N32-LABEL: extract_zext_v8i16: +; N32: # %bb.0: +; N32-NEXT: lui $1, %hi(%neg(%gp_rel(extract_zext_v8i16))) +; N32-NEXT: addu $1, $1, $25 +; N32-NEXT: addiu $1, $1, %lo(%neg(%gp_rel(extract_zext_v8i16))) +; N32-NEXT: lw $1, %got_disp(v8i16)($1) +; N32-NEXT: ld.h $w0, 0($1) +; N32-NEXT: addv.h $w0, $w0, $w0 +; N32-NEXT: jr $ra +; N32-NEXT: copy_u.h $2, $w0[1] +; +; N64-LABEL: extract_zext_v8i16: +; N64: # %bb.0: +; N64-NEXT: lui $1, %hi(%neg(%gp_rel(extract_zext_v8i16))) +; N64-NEXT: daddu $1, $1, $25 +; N64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(extract_zext_v8i16))) +; N64-NEXT: ld $1, %got_disp(v8i16)($1) +; N64-NEXT: ld.h $w0, 0($1) +; N64-NEXT: addv.h $w0, $w0, $w0 +; N64-NEXT: jr $ra +; N64-NEXT: copy_u.h $2, $w0[1] %1 = load <8 x i16>, <8 x i16>* @v8i16 - ; ALL-DAG: ld.h [[R1:\$w[0-9]+]], - %2 = add <8 x i16> %1, %1 - ; ALL-DAG: addv.h [[R2:\$w[0-9]+]], [[R1]], [[R1]] - %3 = extractelement <8 x i16> %2, i32 1 %4 = zext i16 %3 to i32 - ; ALL-DAG: copy_u.h [[R3:\$[0-9]+]], [[R1]][1] - ; ALL-NOT: andi - ret i32 %4 } define i32 @extract_zext_v4i32() nounwind { - ; ALL-LABEL: extract_zext_v4i32: - +; O32-LABEL: extract_zext_v4i32: +; O32: # %bb.0: +; O32-NEXT: lui $2, %hi(_gp_disp) +; O32-NEXT: addiu $2, $2, %lo(_gp_disp) +; O32-NEXT: addu $1, $2, $25 +; O32-NEXT: lw $1, %got(v4i32)($1) +; O32-NEXT: ld.w $w0, 0($1) +; O32-NEXT: addv.w $w0, $w0, $w0 +; O32-NEXT: jr $ra +; O32-NEXT: copy_s.w $2, $w0[1] +; +; N32-LABEL: extract_zext_v4i32: +; N32: # %bb.0: +; N32-NEXT: lui $1, %hi(%neg(%gp_rel(extract_zext_v4i32))) +; N32-NEXT: addu $1, $1, $25 +; N32-NEXT: addiu $1, $1, %lo(%neg(%gp_rel(extract_zext_v4i32))) +; N32-NEXT: lw $1, %got_disp(v4i32)($1) +; N32-NEXT: ld.w $w0, 0($1) +; N32-NEXT: addv.w $w0, $w0, $w0 +; N32-NEXT: jr $ra +; N32-NEXT: copy_s.w $2, $w0[1] +; +; N64-LABEL: extract_zext_v4i32: +; N64: # %bb.0: +; N64-NEXT: lui $1, %hi(%neg(%gp_rel(extract_zext_v4i32))) +; N64-NEXT: daddu $1, $1, $25 +; N64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(extract_zext_v4i32))) +; N64-NEXT: ld $1, %got_disp(v4i32)($1) +; N64-NEXT: ld.w $w0, 0($1) +; N64-NEXT: addv.w $w0, $w0, $w0 +; N64-NEXT: jr $ra +; N64-NEXT: copy_s.w $2, $w0[1] %1 = load <4 x i32>, <4 x i32>* @v4i32 - ; ALL-DAG: ld.w [[R1:\$w[0-9]+]], - %2 = add <4 x i32> %1, %1 - ; ALL-DAG: addv.w [[R2:\$w[0-9]+]], [[R1]], [[R1]] - %3 = extractelement <4 x i32> %2, i32 1 - ; ALL-DAG: copy_{{[su]}}.w [[R3:\$[0-9]+]], [[R1]][1] - ret i32 %3 } define i64 @extract_zext_v2i64() nounwind { - ; ALL-LABEL: extract_zext_v2i64: - +; O32-BE-LABEL: extract_zext_v2i64: +; O32-BE: # %bb.0: +; O32-BE-NEXT: lui $2, %hi(_gp_disp) +; O32-BE-NEXT: addiu $2, $2, %lo(_gp_disp) +; O32-BE-NEXT: addu $1, $2, $25 +; O32-BE-NEXT: lw $1, %got(v2i64)($1) +; O32-BE-NEXT: ld.d $w0, 0($1) +; O32-BE-NEXT: addv.d $w0, $w0, $w0 +; O32-BE-NEXT: shf.w $w0, $w0, 177 +; O32-BE-NEXT: copy_s.w $2, $w0[2] +; O32-BE-NEXT: jr $ra +; O32-BE-NEXT: copy_s.w $3, $w0[3] +; +; O32-LE-LABEL: extract_zext_v2i64: +; O32-LE: # %bb.0: +; O32-LE-NEXT: lui $2, %hi(_gp_disp) +; O32-LE-NEXT: addiu $2, $2, %lo(_gp_disp) +; O32-LE-NEXT: addu $1, $2, $25 +; O32-LE-NEXT: lw $1, %got(v2i64)($1) +; O32-LE-NEXT: ld.d $w0, 0($1) +; O32-LE-NEXT: addv.d $w0, $w0, $w0 +; O32-LE-NEXT: copy_s.w $2, $w0[2] +; O32-LE-NEXT: jr $ra +; O32-LE-NEXT: copy_s.w $3, $w0[3] +; +; N32-LABEL: extract_zext_v2i64: +; N32: # %bb.0: +; N32-NEXT: lui $1, %hi(%neg(%gp_rel(extract_zext_v2i64))) +; N32-NEXT: addu $1, $1, $25 +; N32-NEXT: addiu $1, $1, %lo(%neg(%gp_rel(extract_zext_v2i64))) +; N32-NEXT: lw $1, %got_disp(v2i64)($1) +; N32-NEXT: ld.d $w0, 0($1) +; N32-NEXT: addv.d $w0, $w0, $w0 +; N32-NEXT: jr $ra +; N32-NEXT: copy_s.d $2, $w0[1] +; +; N64-LABEL: extract_zext_v2i64: +; N64: # %bb.0: +; N64-NEXT: lui $1, %hi(%neg(%gp_rel(extract_zext_v2i64))) +; N64-NEXT: daddu $1, $1, $25 +; N64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(extract_zext_v2i64))) +; N64-NEXT: ld $1, %got_disp(v2i64)($1) +; N64-NEXT: ld.d $w0, 0($1) +; N64-NEXT: addv.d $w0, $w0, $w0 +; N64-NEXT: jr $ra +; N64-NEXT: copy_s.d $2, $w0[1] %1 = load <2 x i64>, <2 x i64>* @v2i64 - ; ALL-DAG: ld.d [[R1:\$w[0-9]+]], - %2 = add <2 x i64> %1, %1 - ; ALL-DAG: addv.d [[R2:\$w[0-9]+]], [[R1]], [[R1]] - %3 = extractelement <2 x i64> %2, i32 1 - ; MIPS32-DAG: copy_{{[su]}}.w [[R3:\$[0-9]+]], [[R1]][2] - ; MIPS32-DAG: copy_{{[su]}}.w [[R4:\$[0-9]+]], [[R1]][3] - ; MIPS64-DAG: copy_{{[su]}}.d [[R3:\$[0-9]+]], [[R1]][1] - ; ALL-NOT: andi - ret i64 %3 } define i32 @extract_sext_v16i8_vidx() nounwind { - ; ALL-LABEL: extract_sext_v16i8_vidx: - +; O32-LABEL: extract_sext_v16i8_vidx: +; O32: # %bb.0: +; O32-NEXT: lui $2, %hi(_gp_disp) +; O32-NEXT: addiu $2, $2, %lo(_gp_disp) +; O32-NEXT: addu $1, $2, $25 +; O32-NEXT: lw $2, %got(i32)($1) +; O32-NEXT: lw $2, 0($2) +; O32-NEXT: lw $1, %got(v16i8)($1) +; O32-NEXT: ld.b $w0, 0($1) +; O32-NEXT: addv.b $w0, $w0, $w0 +; O32-NEXT: splat.b $w0, $w0[$2] +; O32-NEXT: mfc1 $1, $f0 +; O32-NEXT: sra $1, $1, 24 +; O32-NEXT: jr $ra +; O32-NEXT: seb $2, $1 +; +; N32-LABEL: extract_sext_v16i8_vidx: +; N32: # %bb.0: +; N32-NEXT: lui $1, %hi(%neg(%gp_rel(extract_sext_v16i8_vidx))) +; N32-NEXT: addu $1, $1, $25 +; N32-NEXT: addiu $1, $1, %lo(%neg(%gp_rel(extract_sext_v16i8_vidx))) +; N32-NEXT: lw $2, %got_disp(i32)($1) +; N32-NEXT: lw $2, 0($2) +; N32-NEXT: lw $1, %got_disp(v16i8)($1) +; N32-NEXT: ld.b $w0, 0($1) +; N32-NEXT: addv.b $w0, $w0, $w0 +; N32-NEXT: splat.b $w0, $w0[$2] +; N32-NEXT: mfc1 $1, $f0 +; N32-NEXT: sra $1, $1, 24 +; N32-NEXT: jr $ra +; N32-NEXT: seb $2, $1 +; +; N64-LABEL: extract_sext_v16i8_vidx: +; N64: # %bb.0: +; N64-NEXT: lui $1, %hi(%neg(%gp_rel(extract_sext_v16i8_vidx))) +; N64-NEXT: daddu $1, $1, $25 +; N64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(extract_sext_v16i8_vidx))) +; N64-NEXT: ld $2, %got_disp(v16i8)($1) +; N64-NEXT: ld.b $w0, 0($2) +; N64-NEXT: addv.b $w0, $w0, $w0 +; N64-NEXT: ld $1, %got_disp(i32)($1) +; N64-NEXT: lw $1, 0($1) +; N64-NEXT: splat.b $w0, $w0[$1] +; N64-NEXT: mfc1 $1, $f0 +; N64-NEXT: sra $1, $1, 24 +; N64-NEXT: jr $ra +; N64-NEXT: seb $2, $1 %1 = load <16 x i8>, <16 x i8>* @v16i8 - ; O32-DAG: lw [[PTR_V:\$[0-9]+]], %got(v16i8)( - ; N32-DAG: lw [[PTR_V:\$[0-9]+]], %got_disp(v16i8)( - ; N64-DAG: ld [[PTR_V:\$[0-9]+]], %got_disp(v16i8)( - ; ALL-DAG: ld.b [[R1:\$w[0-9]+]], 0([[PTR_V]]) - %2 = add <16 x i8> %1, %1 - ; ALL-DAG: addv.b [[R2:\$w[0-9]+]], [[R1]], [[R1]] - %3 = load i32, i32* @i32 - ; O32-DAG: lw [[PTR_I:\$[0-9]+]], %got(i32)( - ; N32-DAG: lw [[PTR_I:\$[0-9]+]], %got_disp(i32)( - ; N64-DAG: ld [[PTR_I:\$[0-9]+]], %got_disp(i32)( - ; ALL-DAG: lw [[IDX:\$[0-9]+]], 0([[PTR_I]]) - %4 = extractelement <16 x i8> %2, i32 %3 %5 = sext i8 %4 to i32 - ; ALL-DAG: splat.b $w[[R3:[0-9]+]], [[R1]]{{\[}}[[IDX]]] - ; ALL-DAG: mfc1 [[R5:\$[0-9]+]], $f[[R3]] - ; ALL-DAG: sra [[R6:\$[0-9]+]], [[R5]], 24 - ret i32 %5 } define i32 @extract_sext_v8i16_vidx() nounwind { - ; ALL-LABEL: extract_sext_v8i16_vidx: - +; O32-LABEL: extract_sext_v8i16_vidx: +; O32: # %bb.0: +; O32-NEXT: lui $2, %hi(_gp_disp) +; O32-NEXT: addiu $2, $2, %lo(_gp_disp) +; O32-NEXT: addu $1, $2, $25 +; O32-NEXT: lw $2, %got(i32)($1) +; O32-NEXT: lw $2, 0($2) +; O32-NEXT: lw $1, %got(v8i16)($1) +; O32-NEXT: ld.h $w0, 0($1) +; O32-NEXT: addv.h $w0, $w0, $w0 +; O32-NEXT: splat.h $w0, $w0[$2] +; O32-NEXT: mfc1 $1, $f0 +; O32-NEXT: sra $1, $1, 16 +; O32-NEXT: jr $ra +; O32-NEXT: seh $2, $1 +; +; N32-LABEL: extract_sext_v8i16_vidx: +; N32: # %bb.0: +; N32-NEXT: lui $1, %hi(%neg(%gp_rel(extract_sext_v8i16_vidx))) +; N32-NEXT: addu $1, $1, $25 +; N32-NEXT: addiu $1, $1, %lo(%neg(%gp_rel(extract_sext_v8i16_vidx))) +; N32-NEXT: lw $2, %got_disp(i32)($1) +; N32-NEXT: lw $2, 0($2) +; N32-NEXT: lw $1, %got_disp(v8i16)($1) +; N32-NEXT: ld.h $w0, 0($1) +; N32-NEXT: addv.h $w0, $w0, $w0 +; N32-NEXT: splat.h $w0, $w0[$2] +; N32-NEXT: mfc1 $1, $f0 +; N32-NEXT: sra $1, $1, 16 +; N32-NEXT: jr $ra +; N32-NEXT: seh $2, $1 +; +; N64-LABEL: extract_sext_v8i16_vidx: +; N64: # %bb.0: +; N64-NEXT: lui $1, %hi(%neg(%gp_rel(extract_sext_v8i16_vidx))) +; N64-NEXT: daddu $1, $1, $25 +; N64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(extract_sext_v8i16_vidx))) +; N64-NEXT: ld $2, %got_disp(v8i16)($1) +; N64-NEXT: ld.h $w0, 0($2) +; N64-NEXT: addv.h $w0, $w0, $w0 +; N64-NEXT: ld $1, %got_disp(i32)($1) +; N64-NEXT: lw $1, 0($1) +; N64-NEXT: splat.h $w0, $w0[$1] +; N64-NEXT: mfc1 $1, $f0 +; N64-NEXT: sra $1, $1, 16 +; N64-NEXT: jr $ra +; N64-NEXT: seh $2, $1 %1 = load <8 x i16>, <8 x i16>* @v8i16 - ; O32-DAG: lw [[PTR_V:\$[0-9]+]], %got(v8i16)( - ; N32-DAG: lw [[PTR_V:\$[0-9]+]], %got_disp(v8i16)( - ; N64-DAG: ld [[PTR_V:\$[0-9]+]], %got_disp(v8i16)( - ; ALL-DAG: ld.h [[R1:\$w[0-9]+]], 0([[PTR_V]]) - %2 = add <8 x i16> %1, %1 - ; ALL-DAG: addv.h [[R2:\$w[0-9]+]], [[R1]], [[R1]] - %3 = load i32, i32* @i32 - ; O32-DAG: lw [[PTR_I:\$[0-9]+]], %got(i32)( - ; N32-DAG: lw [[PTR_I:\$[0-9]+]], %got_disp(i32)( - ; N64-DAG: ld [[PTR_I:\$[0-9]+]], %got_disp(i32)( - ; ALL-DAG: lw [[IDX:\$[0-9]+]], 0([[PTR_I]]) - %4 = extractelement <8 x i16> %2, i32 %3 %5 = sext i16 %4 to i32 - ; ALL-DAG: splat.h $w[[R3:[0-9]+]], [[R1]]{{\[}}[[IDX]]] - ; ALL-DAG: mfc1 [[R5:\$[0-9]+]], $f[[R3]] - ; ALL-DAG: sra [[R6:\$[0-9]+]], [[R5]], 16 - ret i32 %5 } define i32 @extract_sext_v4i32_vidx() nounwind { - ; ALL-LABEL: extract_sext_v4i32_vidx: - +; O32-LABEL: extract_sext_v4i32_vidx: +; O32: # %bb.0: +; O32-NEXT: lui $2, %hi(_gp_disp) +; O32-NEXT: addiu $2, $2, %lo(_gp_disp) +; O32-NEXT: addu $1, $2, $25 +; O32-NEXT: lw $2, %got(i32)($1) +; O32-NEXT: lw $2, 0($2) +; O32-NEXT: lw $1, %got(v4i32)($1) +; O32-NEXT: ld.w $w0, 0($1) +; O32-NEXT: addv.w $w0, $w0, $w0 +; O32-NEXT: splat.w $w0, $w0[$2] +; O32-NEXT: jr $ra +; O32-NEXT: mfc1 $2, $f0 +; +; N32-LABEL: extract_sext_v4i32_vidx: +; N32: # %bb.0: +; N32-NEXT: lui $1, %hi(%neg(%gp_rel(extract_sext_v4i32_vidx))) +; N32-NEXT: addu $1, $1, $25 +; N32-NEXT: addiu $1, $1, %lo(%neg(%gp_rel(extract_sext_v4i32_vidx))) +; N32-NEXT: lw $2, %got_disp(i32)($1) +; N32-NEXT: lw $2, 0($2) +; N32-NEXT: lw $1, %got_disp(v4i32)($1) +; N32-NEXT: ld.w $w0, 0($1) +; N32-NEXT: addv.w $w0, $w0, $w0 +; N32-NEXT: splat.w $w0, $w0[$2] +; N32-NEXT: jr $ra +; N32-NEXT: mfc1 $2, $f0 +; +; N64-LABEL: extract_sext_v4i32_vidx: +; N64: # %bb.0: +; N64-NEXT: lui $1, %hi(%neg(%gp_rel(extract_sext_v4i32_vidx))) +; N64-NEXT: daddu $1, $1, $25 +; N64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(extract_sext_v4i32_vidx))) +; N64-NEXT: ld $2, %got_disp(v4i32)($1) +; N64-NEXT: ld.w $w0, 0($2) +; N64-NEXT: addv.w $w0, $w0, $w0 +; N64-NEXT: ld $1, %got_disp(i32)($1) +; N64-NEXT: lw $1, 0($1) +; N64-NEXT: splat.w $w0, $w0[$1] +; N64-NEXT: jr $ra +; N64-NEXT: mfc1 $2, $f0 %1 = load <4 x i32>, <4 x i32>* @v4i32 - ; O32-DAG: lw [[PTR_V:\$[0-9]+]], %got(v4i32)( - ; N32-DAG: lw [[PTR_V:\$[0-9]+]], %got_disp(v4i32)( - ; N64-DAG: ld [[PTR_V:\$[0-9]+]], %got_disp(v4i32)( - ; ALL-DAG: ld.w [[R1:\$w[0-9]+]], 0([[PTR_V]]) - %2 = add <4 x i32> %1, %1 - ; ALL-DAG: addv.w [[R2:\$w[0-9]+]], [[R1]], [[R1]] - %3 = load i32, i32* @i32 - ; O32-DAG: lw [[PTR_I:\$[0-9]+]], %got(i32)( - ; N32-DAG: lw [[PTR_I:\$[0-9]+]], %got_disp(i32)( - ; N64-DAG: ld [[PTR_I:\$[0-9]+]], %got_disp(i32)( - ; ALL-DAG: lw [[IDX:\$[0-9]+]], 0([[PTR_I]]) - %4 = extractelement <4 x i32> %2, i32 %3 - ; ALL-DAG: splat.w $w[[R3:[0-9]+]], [[R1]]{{\[}}[[IDX]]] - ; ALL-DAG: mfc1 [[R5:\$[0-9]+]], $f[[R3]] - ; ALL-NOT: sra - ret i32 %4 } define i64 @extract_sext_v2i64_vidx() nounwind { - ; ALL-LABEL: extract_sext_v2i64_vidx: - +; O32-BE-LABEL: extract_sext_v2i64_vidx: +; O32-BE: # %bb.0: +; O32-BE-NEXT: lui $2, %hi(_gp_disp) +; O32-BE-NEXT: addiu $2, $2, %lo(_gp_disp) +; O32-BE-NEXT: addu $1, $2, $25 +; O32-BE-NEXT: lw $2, %got(i32)($1) +; O32-BE-NEXT: lw $2, 0($2) +; O32-BE-NEXT: addu $2, $2, $2 +; O32-BE-NEXT: addiu $3, $2, 1 +; O32-BE-NEXT: lw $1, %got(v2i64)($1) +; O32-BE-NEXT: ld.d $w0, 0($1) +; O32-BE-NEXT: addv.d $w0, $w0, $w0 +; O32-BE-NEXT: shf.w $w0, $w0, 177 +; O32-BE-NEXT: splat.w $w1, $w0[$3] +; O32-BE-NEXT: mfc1 $3, $f1 +; O32-BE-NEXT: splat.w $w0, $w0[$2] +; O32-BE-NEXT: jr $ra +; O32-BE-NEXT: mfc1 $2, $f0 +; +; O32-LE-LABEL: extract_sext_v2i64_vidx: +; O32-LE: # %bb.0: +; O32-LE-NEXT: lui $2, %hi(_gp_disp) +; O32-LE-NEXT: addiu $2, $2, %lo(_gp_disp) +; O32-LE-NEXT: addu $1, $2, $25 +; O32-LE-NEXT: lw $2, %got(i32)($1) +; O32-LE-NEXT: lw $2, 0($2) +; O32-LE-NEXT: addu $2, $2, $2 +; O32-LE-NEXT: addiu $3, $2, 1 +; O32-LE-NEXT: lw $1, %got(v2i64)($1) +; O32-LE-NEXT: ld.d $w0, 0($1) +; O32-LE-NEXT: addv.d $w0, $w0, $w0 +; O32-LE-NEXT: splat.w $w1, $w0[$3] +; O32-LE-NEXT: mfc1 $3, $f1 +; O32-LE-NEXT: splat.w $w0, $w0[$2] +; O32-LE-NEXT: jr $ra +; O32-LE-NEXT: mfc1 $2, $f0 +; +; N32-LABEL: extract_sext_v2i64_vidx: +; N32: # %bb.0: +; N32-NEXT: lui $1, %hi(%neg(%gp_rel(extract_sext_v2i64_vidx))) +; N32-NEXT: addu $1, $1, $25 +; N32-NEXT: addiu $1, $1, %lo(%neg(%gp_rel(extract_sext_v2i64_vidx))) +; N32-NEXT: lw $2, %got_disp(i32)($1) +; N32-NEXT: lw $2, 0($2) +; N32-NEXT: lw $1, %got_disp(v2i64)($1) +; N32-NEXT: ld.d $w0, 0($1) +; N32-NEXT: addv.d $w0, $w0, $w0 +; N32-NEXT: splat.d $w0, $w0[$2] +; N32-NEXT: jr $ra +; N32-NEXT: dmfc1 $2, $f0 +; +; N64-LABEL: extract_sext_v2i64_vidx: +; N64: # %bb.0: +; N64-NEXT: lui $1, %hi(%neg(%gp_rel(extract_sext_v2i64_vidx))) +; N64-NEXT: daddu $1, $1, $25 +; N64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(extract_sext_v2i64_vidx))) +; N64-NEXT: ld $2, %got_disp(v2i64)($1) +; N64-NEXT: ld.d $w0, 0($2) +; N64-NEXT: addv.d $w0, $w0, $w0 +; N64-NEXT: ld $1, %got_disp(i32)($1) +; N64-NEXT: lw $1, 0($1) +; N64-NEXT: splat.d $w0, $w0[$1] +; N64-NEXT: jr $ra +; N64-NEXT: dmfc1 $2, $f0 %1 = load <2 x i64>, <2 x i64>* @v2i64 - ; O32-DAG: lw [[PTR_V:\$[0-9]+]], %got(v2i64)( - ; N32-DAG: lw [[PTR_V:\$[0-9]+]], %got_disp(v2i64)( - ; N64-DAG: ld [[PTR_V:\$[0-9]+]], %got_disp(v2i64)( - ; ALL-DAG: ld.d [[R1:\$w[0-9]+]], 0([[PTR_V]]) - %2 = add <2 x i64> %1, %1 - ; ALL-DAG: addv.d [[R2:\$w[0-9]+]], [[R1]], [[R1]] - %3 = load i32, i32* @i32 - ; O32-DAG: lw [[PTR_I:\$[0-9]+]], %got(i32)( - ; N32-DAG: lw [[PTR_I:\$[0-9]+]], %got_disp(i32)( - ; N64-DAG: ld [[PTR_I:\$[0-9]+]], %got_disp(i32)( - ; ALL-DAG: lw [[IDX:\$[0-9]+]], 0([[PTR_I]]) - ; O32-DAG: addiu [[IDY:\$[0-9]+]], [[IDX]], 1 - %4 = extractelement <2 x i64> %2, i32 %3 - ; MIPS32-DAG: splat.w $w[[R3:[0-9]+]], [[R1]]{{\[}}[[IDY]]] - ; MIPS32-DAG: mfc1 [[R5:\$[0-9]+]], $f[[R3]] - ; MIPS32-DAG: splat.w $w[[R4:[0-9]+]], [[R1]]{{\[}}[[IDX]]] - ; MIPS32-DAG: mfc1 [[R6:\$[0-9]+]], $f[[R4]] - ; MIPS64-DAG: splat.d $w[[R3:[0-9]+]], [[R1]]{{\[}}[[IDX]]] - ; MIPS64-DAG: dmfc1 [[R5:\$[0-9]+]], $f[[R3]] - ; ALL-NOT: sra - ret i64 %4 } define i32 @extract_zext_v16i8_vidx() nounwind { - ; ALL-LABEL: extract_zext_v16i8_vidx: - +; O32-LABEL: extract_zext_v16i8_vidx: +; O32: # %bb.0: +; O32-NEXT: lui $2, %hi(_gp_disp) +; O32-NEXT: addiu $2, $2, %lo(_gp_disp) +; O32-NEXT: addu $1, $2, $25 +; O32-NEXT: lw $2, %got(i32)($1) +; O32-NEXT: lw $2, 0($2) +; O32-NEXT: lw $1, %got(v16i8)($1) +; O32-NEXT: ld.b $w0, 0($1) +; O32-NEXT: addv.b $w0, $w0, $w0 +; O32-NEXT: splat.b $w0, $w0[$2] +; O32-NEXT: mfc1 $1, $f0 +; O32-NEXT: jr $ra +; O32-NEXT: srl $2, $1, 24 +; +; N32-LABEL: extract_zext_v16i8_vidx: +; N32: # %bb.0: +; N32-NEXT: lui $1, %hi(%neg(%gp_rel(extract_zext_v16i8_vidx))) +; N32-NEXT: addu $1, $1, $25 +; N32-NEXT: addiu $1, $1, %lo(%neg(%gp_rel(extract_zext_v16i8_vidx))) +; N32-NEXT: lw $2, %got_disp(i32)($1) +; N32-NEXT: lw $2, 0($2) +; N32-NEXT: lw $1, %got_disp(v16i8)($1) +; N32-NEXT: ld.b $w0, 0($1) +; N32-NEXT: addv.b $w0, $w0, $w0 +; N32-NEXT: splat.b $w0, $w0[$2] +; N32-NEXT: mfc1 $1, $f0 +; N32-NEXT: jr $ra +; N32-NEXT: srl $2, $1, 24 +; +; N64-LABEL: extract_zext_v16i8_vidx: +; N64: # %bb.0: +; N64-NEXT: lui $1, %hi(%neg(%gp_rel(extract_zext_v16i8_vidx))) +; N64-NEXT: daddu $1, $1, $25 +; N64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(extract_zext_v16i8_vidx))) +; N64-NEXT: ld $2, %got_disp(v16i8)($1) +; N64-NEXT: ld.b $w0, 0($2) +; N64-NEXT: addv.b $w0, $w0, $w0 +; N64-NEXT: ld $1, %got_disp(i32)($1) +; N64-NEXT: lw $1, 0($1) +; N64-NEXT: splat.b $w0, $w0[$1] +; N64-NEXT: mfc1 $1, $f0 +; N64-NEXT: jr $ra +; N64-NEXT: srl $2, $1, 24 %1 = load <16 x i8>, <16 x i8>* @v16i8 - ; O32-DAG: lw [[PTR_V:\$[0-9]+]], %got(v16i8)( - ; N32-DAG: lw [[PTR_V:\$[0-9]+]], %got_disp(v16i8)( - ; N64-DAG: ld [[PTR_V:\$[0-9]+]], %got_disp(v16i8)( - ; ALL-DAG: ld.b [[R1:\$w[0-9]+]], 0([[PTR_V]]) - %2 = add <16 x i8> %1, %1 - ; ALL-DAG: addv.b [[R2:\$w[0-9]+]], [[R1]], [[R1]] - %3 = load i32, i32* @i32 - ; O32-DAG: lw [[PTR_I:\$[0-9]+]], %got(i32)( - ; N32-DAG: lw [[PTR_I:\$[0-9]+]], %got_disp(i32)( - ; N64-DAG: ld [[PTR_I:\$[0-9]+]], %got_disp(i32)( - ; ALL-DAG: lw [[IDX:\$[0-9]+]], 0([[PTR_I]]) - %4 = extractelement <16 x i8> %2, i32 %3 %5 = zext i8 %4 to i32 - ; ALL-DAG: splat.b $w[[R3:[0-9]+]], [[R1]]{{\[}}[[IDX]]] - ; ALL-DAG: mfc1 [[R5:\$[0-9]+]], $f[[R3]] - ; ALL-DAG: srl [[R6:\$[0-9]+]], [[R5]], 24 - ret i32 %5 } define i32 @extract_zext_v8i16_vidx() nounwind { - ; ALL-LABEL: extract_zext_v8i16_vidx: - +; O32-LABEL: extract_zext_v8i16_vidx: +; O32: # %bb.0: +; O32-NEXT: lui $2, %hi(_gp_disp) +; O32-NEXT: addiu $2, $2, %lo(_gp_disp) +; O32-NEXT: addu $1, $2, $25 +; O32-NEXT: lw $2, %got(i32)($1) +; O32-NEXT: lw $2, 0($2) +; O32-NEXT: lw $1, %got(v8i16)($1) +; O32-NEXT: ld.h $w0, 0($1) +; O32-NEXT: addv.h $w0, $w0, $w0 +; O32-NEXT: splat.h $w0, $w0[$2] +; O32-NEXT: mfc1 $1, $f0 +; O32-NEXT: jr $ra +; O32-NEXT: srl $2, $1, 16 +; +; N32-LABEL: extract_zext_v8i16_vidx: +; N32: # %bb.0: +; N32-NEXT: lui $1, %hi(%neg(%gp_rel(extract_zext_v8i16_vidx))) +; N32-NEXT: addu $1, $1, $25 +; N32-NEXT: addiu $1, $1, %lo(%neg(%gp_rel(extract_zext_v8i16_vidx))) +; N32-NEXT: lw $2, %got_disp(i32)($1) +; N32-NEXT: lw $2, 0($2) +; N32-NEXT: lw $1, %got_disp(v8i16)($1) +; N32-NEXT: ld.h $w0, 0($1) +; N32-NEXT: addv.h $w0, $w0, $w0 +; N32-NEXT: splat.h $w0, $w0[$2] +; N32-NEXT: mfc1 $1, $f0 +; N32-NEXT: jr $ra +; N32-NEXT: srl $2, $1, 16 +; +; N64-LABEL: extract_zext_v8i16_vidx: +; N64: # %bb.0: +; N64-NEXT: lui $1, %hi(%neg(%gp_rel(extract_zext_v8i16_vidx))) +; N64-NEXT: daddu $1, $1, $25 +; N64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(extract_zext_v8i16_vidx))) +; N64-NEXT: ld $2, %got_disp(v8i16)($1) +; N64-NEXT: ld.h $w0, 0($2) +; N64-NEXT: addv.h $w0, $w0, $w0 +; N64-NEXT: ld $1, %got_disp(i32)($1) +; N64-NEXT: lw $1, 0($1) +; N64-NEXT: splat.h $w0, $w0[$1] +; N64-NEXT: mfc1 $1, $f0 +; N64-NEXT: jr $ra +; N64-NEXT: srl $2, $1, 16 %1 = load <8 x i16>, <8 x i16>* @v8i16 - ; O32-DAG: lw [[PTR_V:\$[0-9]+]], %got(v8i16)( - ; N32-DAG: lw [[PTR_V:\$[0-9]+]], %got_disp(v8i16)( - ; N64-DAG: ld [[PTR_V:\$[0-9]+]], %got_disp(v8i16)( - ; ALL-DAG: ld.h [[R1:\$w[0-9]+]], 0([[PTR_V]]) - %2 = add <8 x i16> %1, %1 - ; ALL-DAG: addv.h [[R2:\$w[0-9]+]], [[R1]], [[R1]] - %3 = load i32, i32* @i32 - ; O32-DAG: lw [[PTR_I:\$[0-9]+]], %got(i32)( - ; N32-DAG: lw [[PTR_I:\$[0-9]+]], %got_disp(i32)( - ; N64-DAG: ld [[PTR_I:\$[0-9]+]], %got_disp(i32)( - ; ALL-DAG: lw [[IDX:\$[0-9]+]], 0([[PTR_I]]) - %4 = extractelement <8 x i16> %2, i32 %3 %5 = zext i16 %4 to i32 - ; ALL-DAG: splat.h $w[[R3:[0-9]+]], [[R1]]{{\[}}[[IDX]]] - ; ALL-DAG: mfc1 [[R5:\$[0-9]+]], $f[[R3]] - ; ALL-DAG: srl [[R6:\$[0-9]+]], [[R5]], 16 - ret i32 %5 } define i32 @extract_zext_v4i32_vidx() nounwind { - ; ALL-LABEL: extract_zext_v4i32_vidx: - +; O32-LABEL: extract_zext_v4i32_vidx: +; O32: # %bb.0: +; O32-NEXT: lui $2, %hi(_gp_disp) +; O32-NEXT: addiu $2, $2, %lo(_gp_disp) +; O32-NEXT: addu $1, $2, $25 +; O32-NEXT: lw $2, %got(i32)($1) +; O32-NEXT: lw $2, 0($2) +; O32-NEXT: lw $1, %got(v4i32)($1) +; O32-NEXT: ld.w $w0, 0($1) +; O32-NEXT: addv.w $w0, $w0, $w0 +; O32-NEXT: splat.w $w0, $w0[$2] +; O32-NEXT: jr $ra +; O32-NEXT: mfc1 $2, $f0 +; +; N32-LABEL: extract_zext_v4i32_vidx: +; N32: # %bb.0: +; N32-NEXT: lui $1, %hi(%neg(%gp_rel(extract_zext_v4i32_vidx))) +; N32-NEXT: addu $1, $1, $25 +; N32-NEXT: addiu $1, $1, %lo(%neg(%gp_rel(extract_zext_v4i32_vidx))) +; N32-NEXT: lw $2, %got_disp(i32)($1) +; N32-NEXT: lw $2, 0($2) +; N32-NEXT: lw $1, %got_disp(v4i32)($1) +; N32-NEXT: ld.w $w0, 0($1) +; N32-NEXT: addv.w $w0, $w0, $w0 +; N32-NEXT: splat.w $w0, $w0[$2] +; N32-NEXT: jr $ra +; N32-NEXT: mfc1 $2, $f0 +; +; N64-LABEL: extract_zext_v4i32_vidx: +; N64: # %bb.0: +; N64-NEXT: lui $1, %hi(%neg(%gp_rel(extract_zext_v4i32_vidx))) +; N64-NEXT: daddu $1, $1, $25 +; N64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(extract_zext_v4i32_vidx))) +; N64-NEXT: ld $2, %got_disp(v4i32)($1) +; N64-NEXT: ld.w $w0, 0($2) +; N64-NEXT: addv.w $w0, $w0, $w0 +; N64-NEXT: ld $1, %got_disp(i32)($1) +; N64-NEXT: lw $1, 0($1) +; N64-NEXT: splat.w $w0, $w0[$1] +; N64-NEXT: jr $ra +; N64-NEXT: mfc1 $2, $f0 %1 = load <4 x i32>, <4 x i32>* @v4i32 - ; O32-DAG: lw [[PTR_V:\$[0-9]+]], %got(v4i32)( - ; N32-DAG: lw [[PTR_V:\$[0-9]+]], %got_disp(v4i32)( - ; N64-DAG: ld [[PTR_V:\$[0-9]+]], %got_disp(v4i32)( - ; ALL-DAG: ld.w [[R1:\$w[0-9]+]], 0([[PTR_V]]) - %2 = add <4 x i32> %1, %1 - ; ALL-DAG: addv.w [[R2:\$w[0-9]+]], [[R1]], [[R1]] - %3 = load i32, i32* @i32 - ; O32-DAG: lw [[PTR_I:\$[0-9]+]], %got(i32)( - ; N32-DAG: lw [[PTR_I:\$[0-9]+]], %got_disp(i32)( - ; N64-DAG: ld [[PTR_I:\$[0-9]+]], %got_disp(i32)( - ; ALL-DAG: lw [[IDX:\$[0-9]+]], 0([[PTR_I]]) - %4 = extractelement <4 x i32> %2, i32 %3 - ; ALL-DAG: splat.w $w[[R3:[0-9]+]], [[R1]]{{\[}}[[IDX]]] - ; ALL-DAG: mfc1 [[R5:\$[0-9]+]], $f[[R3]] - ; ALL-NOT: srl - ret i32 %4 } define i64 @extract_zext_v2i64_vidx() nounwind { - ; ALL-LABEL: extract_zext_v2i64_vidx: - +; O32-BE-LABEL: extract_zext_v2i64_vidx: +; O32-BE: # %bb.0: +; O32-BE-NEXT: lui $2, %hi(_gp_disp) +; O32-BE-NEXT: addiu $2, $2, %lo(_gp_disp) +; O32-BE-NEXT: addu $1, $2, $25 +; O32-BE-NEXT: lw $2, %got(i32)($1) +; O32-BE-NEXT: lw $2, 0($2) +; O32-BE-NEXT: addu $2, $2, $2 +; O32-BE-NEXT: addiu $3, $2, 1 +; O32-BE-NEXT: lw $1, %got(v2i64)($1) +; O32-BE-NEXT: ld.d $w0, 0($1) +; O32-BE-NEXT: addv.d $w0, $w0, $w0 +; O32-BE-NEXT: shf.w $w0, $w0, 177 +; O32-BE-NEXT: splat.w $w1, $w0[$3] +; O32-BE-NEXT: mfc1 $3, $f1 +; O32-BE-NEXT: splat.w $w0, $w0[$2] +; O32-BE-NEXT: jr $ra +; O32-BE-NEXT: mfc1 $2, $f0 +; +; O32-LE-LABEL: extract_zext_v2i64_vidx: +; O32-LE: # %bb.0: +; O32-LE-NEXT: lui $2, %hi(_gp_disp) +; O32-LE-NEXT: addiu $2, $2, %lo(_gp_disp) +; O32-LE-NEXT: addu $1, $2, $25 +; O32-LE-NEXT: lw $2, %got(i32)($1) +; O32-LE-NEXT: lw $2, 0($2) +; O32-LE-NEXT: addu $2, $2, $2 +; O32-LE-NEXT: addiu $3, $2, 1 +; O32-LE-NEXT: lw $1, %got(v2i64)($1) +; O32-LE-NEXT: ld.d $w0, 0($1) +; O32-LE-NEXT: addv.d $w0, $w0, $w0 +; O32-LE-NEXT: splat.w $w1, $w0[$3] +; O32-LE-NEXT: mfc1 $3, $f1 +; O32-LE-NEXT: splat.w $w0, $w0[$2] +; O32-LE-NEXT: jr $ra +; O32-LE-NEXT: mfc1 $2, $f0 +; +; N32-LABEL: extract_zext_v2i64_vidx: +; N32: # %bb.0: +; N32-NEXT: lui $1, %hi(%neg(%gp_rel(extract_zext_v2i64_vidx))) +; N32-NEXT: addu $1, $1, $25 +; N32-NEXT: addiu $1, $1, %lo(%neg(%gp_rel(extract_zext_v2i64_vidx))) +; N32-NEXT: lw $2, %got_disp(i32)($1) +; N32-NEXT: lw $2, 0($2) +; N32-NEXT: lw $1, %got_disp(v2i64)($1) +; N32-NEXT: ld.d $w0, 0($1) +; N32-NEXT: addv.d $w0, $w0, $w0 +; N32-NEXT: splat.d $w0, $w0[$2] +; N32-NEXT: jr $ra +; N32-NEXT: dmfc1 $2, $f0 +; +; N64-LABEL: extract_zext_v2i64_vidx: +; N64: # %bb.0: +; N64-NEXT: lui $1, %hi(%neg(%gp_rel(extract_zext_v2i64_vidx))) +; N64-NEXT: daddu $1, $1, $25 +; N64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(extract_zext_v2i64_vidx))) +; N64-NEXT: ld $2, %got_disp(v2i64)($1) +; N64-NEXT: ld.d $w0, 0($2) +; N64-NEXT: addv.d $w0, $w0, $w0 +; N64-NEXT: ld $1, %got_disp(i32)($1) +; N64-NEXT: lw $1, 0($1) +; N64-NEXT: splat.d $w0, $w0[$1] +; N64-NEXT: jr $ra +; N64-NEXT: dmfc1 $2, $f0 %1 = load <2 x i64>, <2 x i64>* @v2i64 - ; O32-DAG: lw [[PTR_V:\$[0-9]+]], %got(v2i64)( - ; N32-DAG: lw [[PTR_V:\$[0-9]+]], %got_disp(v2i64)( - ; N64-DAG: ld [[PTR_V:\$[0-9]+]], %got_disp(v2i64)( - ; ALL-DAG: ld.d [[R1:\$w[0-9]+]], 0([[PTR_V]]) - %2 = add <2 x i64> %1, %1 - ; ALL-DAG: addv.d [[R2:\$w[0-9]+]], [[R1]], [[R1]] - %3 = load i32, i32* @i32 - ; O32-DAG: lw [[PTR_I:\$[0-9]+]], %got(i32)( - ; N32-DAG: lw [[PTR_I:\$[0-9]+]], %got_disp(i32)( - ; N64-DAG: ld [[PTR_I:\$[0-9]+]], %got_disp(i32)( - ; ALL-DAG: lw [[IDX:\$[0-9]+]], 0([[PTR_I]]) - ; O32-DAG: addiu [[IDY:\$[0-9]+]], [[IDX]], 1 - %4 = extractelement <2 x i64> %2, i32 %3 - ; MIPS32-DAG: splat.w $w[[R3:[0-9]+]], [[R1]]{{\[}}[[IDY]]] - ; MIPS32-DAG: mfc1 [[R5:\$[0-9]+]], $f[[R3]] - ; MIPS32-DAG: splat.w $w[[R4:[0-9]+]], [[R1]]{{\[}}[[IDX]]] - ; MIPS32-DAG: mfc1 [[R6:\$[0-9]+]], $f[[R4]] - ; MIPS64-DAG: splat.d $w[[R3:[0-9]+]], [[R1]]{{\[}}[[IDX]]] - ; MIPS64-DAG: dmfc1 [[R5:\$[0-9]+]], $f[[R3]] - ; ALL-NOT: srl - ret i64 %4 } define void @insert_v16i8(i32 signext %a) nounwind { - ; ALL-LABEL: insert_v16i8: - +; O32-LABEL: insert_v16i8: +; O32: # %bb.0: +; O32-NEXT: lui $2, %hi(_gp_disp) +; O32-NEXT: addiu $2, $2, %lo(_gp_disp) +; O32-NEXT: addu $1, $2, $25 +; O32-NEXT: lw $1, %got(v16i8)($1) +; O32-NEXT: ld.b $w0, 0($1) +; O32-NEXT: insert.b $w0[1], $4 +; O32-NEXT: jr $ra +; O32-NEXT: st.b $w0, 0($1) +; +; N32-LABEL: insert_v16i8: +; N32: # %bb.0: +; N32-NEXT: lui $1, %hi(%neg(%gp_rel(insert_v16i8))) +; N32-NEXT: addu $1, $1, $25 +; N32-NEXT: addiu $1, $1, %lo(%neg(%gp_rel(insert_v16i8))) +; N32-NEXT: lw $1, %got_disp(v16i8)($1) +; N32-NEXT: ld.b $w0, 0($1) +; N32-NEXT: insert.b $w0[1], $4 +; N32-NEXT: jr $ra +; N32-NEXT: st.b $w0, 0($1) +; +; N64-LABEL: insert_v16i8: +; N64: # %bb.0: +; N64-NEXT: lui $1, %hi(%neg(%gp_rel(insert_v16i8))) +; N64-NEXT: daddu $1, $1, $25 +; N64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(insert_v16i8))) +; N64-NEXT: ld $1, %got_disp(v16i8)($1) +; N64-NEXT: ld.b $w0, 0($1) +; N64-NEXT: insert.b $w0[1], $4 +; N64-NEXT: jr $ra +; N64-NEXT: st.b $w0, 0($1) %1 = load <16 x i8>, <16 x i8>* @v16i8 - ; ALL-DAG: ld.b [[R1:\$w[0-9]+]], - %a2 = trunc i32 %a to i8 %a3 = sext i8 %a2 to i32 %a4 = trunc i32 %a3 to i8 - ; ALL-NOT: andi - ; ALL-NOT: sra - %2 = insertelement <16 x i8> %1, i8 %a4, i32 1 - ; ALL-DAG: insert.b [[R1]][1], $4 - store <16 x i8> %2, <16 x i8>* @v16i8 - ; ALL-DAG: st.b [[R1]] - ret void } define void @insert_v8i16(i32 signext %a) nounwind { - ; ALL-LABEL: insert_v8i16: - +; O32-LABEL: insert_v8i16: +; O32: # %bb.0: +; O32-NEXT: lui $2, %hi(_gp_disp) +; O32-NEXT: addiu $2, $2, %lo(_gp_disp) +; O32-NEXT: addu $1, $2, $25 +; O32-NEXT: lw $1, %got(v8i16)($1) +; O32-NEXT: ld.h $w0, 0($1) +; O32-NEXT: insert.h $w0[1], $4 +; O32-NEXT: jr $ra +; O32-NEXT: st.h $w0, 0($1) +; +; N32-LABEL: insert_v8i16: +; N32: # %bb.0: +; N32-NEXT: lui $1, %hi(%neg(%gp_rel(insert_v8i16))) +; N32-NEXT: addu $1, $1, $25 +; N32-NEXT: addiu $1, $1, %lo(%neg(%gp_rel(insert_v8i16))) +; N32-NEXT: lw $1, %got_disp(v8i16)($1) +; N32-NEXT: ld.h $w0, 0($1) +; N32-NEXT: insert.h $w0[1], $4 +; N32-NEXT: jr $ra +; N32-NEXT: st.h $w0, 0($1) +; +; N64-LABEL: insert_v8i16: +; N64: # %bb.0: +; N64-NEXT: lui $1, %hi(%neg(%gp_rel(insert_v8i16))) +; N64-NEXT: daddu $1, $1, $25 +; N64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(insert_v8i16))) +; N64-NEXT: ld $1, %got_disp(v8i16)($1) +; N64-NEXT: ld.h $w0, 0($1) +; N64-NEXT: insert.h $w0[1], $4 +; N64-NEXT: jr $ra +; N64-NEXT: st.h $w0, 0($1) %1 = load <8 x i16>, <8 x i16>* @v8i16 - ; ALL-DAG: ld.h [[R1:\$w[0-9]+]], - %a2 = trunc i32 %a to i16 %a3 = sext i16 %a2 to i32 %a4 = trunc i32 %a3 to i16 - ; ALL-NOT: andi - ; ALL-NOT: sra - %2 = insertelement <8 x i16> %1, i16 %a4, i32 1 - ; ALL-DAG: insert.h [[R1]][1], $4 - store <8 x i16> %2, <8 x i16>* @v8i16 - ; ALL-DAG: st.h [[R1]] - ret void } define void @insert_v4i32(i32 signext %a) nounwind { - ; ALL-LABEL: insert_v4i32: - +; O32-LABEL: insert_v4i32: +; O32: # %bb.0: +; O32-NEXT: lui $2, %hi(_gp_disp) +; O32-NEXT: addiu $2, $2, %lo(_gp_disp) +; O32-NEXT: addu $1, $2, $25 +; O32-NEXT: lw $1, %got(v4i32)($1) +; O32-NEXT: ld.w $w0, 0($1) +; O32-NEXT: insert.w $w0[1], $4 +; O32-NEXT: jr $ra +; O32-NEXT: st.w $w0, 0($1) +; +; N32-LABEL: insert_v4i32: +; N32: # %bb.0: +; N32-NEXT: lui $1, %hi(%neg(%gp_rel(insert_v4i32))) +; N32-NEXT: addu $1, $1, $25 +; N32-NEXT: addiu $1, $1, %lo(%neg(%gp_rel(insert_v4i32))) +; N32-NEXT: lw $1, %got_disp(v4i32)($1) +; N32-NEXT: ld.w $w0, 0($1) +; N32-NEXT: insert.w $w0[1], $4 +; N32-NEXT: jr $ra +; N32-NEXT: st.w $w0, 0($1) +; +; N64-LABEL: insert_v4i32: +; N64: # %bb.0: +; N64-NEXT: lui $1, %hi(%neg(%gp_rel(insert_v4i32))) +; N64-NEXT: daddu $1, $1, $25 +; N64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(insert_v4i32))) +; N64-NEXT: ld $1, %got_disp(v4i32)($1) +; N64-NEXT: ld.w $w0, 0($1) +; N64-NEXT: insert.w $w0[1], $4 +; N64-NEXT: jr $ra +; N64-NEXT: st.w $w0, 0($1) %1 = load <4 x i32>, <4 x i32>* @v4i32 - ; ALL-DAG: ld.w [[R1:\$w[0-9]+]], - - ; ALL-NOT: andi - ; ALL-NOT: sra - %2 = insertelement <4 x i32> %1, i32 %a, i32 1 - ; ALL-DAG: insert.w [[R1]][1], $4 - store <4 x i32> %2, <4 x i32>* @v4i32 - ; ALL-DAG: st.w [[R1]] - ret void } - define void @insert_v2i64(i64 signext %a) nounwind { - ; ALL-LABEL: insert_v2i64: - +; O32-LABEL: insert_v2i64: +; O32: # %bb.0: +; O32-NEXT: lui $2, %hi(_gp_disp) +; O32-NEXT: addiu $2, $2, %lo(_gp_disp) +; O32-NEXT: addu $1, $2, $25 +; O32-NEXT: lw $1, %got(v2i64)($1) +; O32-NEXT: ld.w $w0, 0($1) +; O32-NEXT: insert.w $w0[2], $4 +; O32-NEXT: insert.w $w0[3], $5 +; O32-NEXT: jr $ra +; O32-NEXT: st.w $w0, 0($1) +; +; N32-LABEL: insert_v2i64: +; N32: # %bb.0: +; N32-NEXT: lui $1, %hi(%neg(%gp_rel(insert_v2i64))) +; N32-NEXT: addu $1, $1, $25 +; N32-NEXT: addiu $1, $1, %lo(%neg(%gp_rel(insert_v2i64))) +; N32-NEXT: lw $1, %got_disp(v2i64)($1) +; N32-NEXT: ld.d $w0, 0($1) +; N32-NEXT: insert.d $w0[1], $4 +; N32-NEXT: jr $ra +; N32-NEXT: st.d $w0, 0($1) +; +; N64-LABEL: insert_v2i64: +; N64: # %bb.0: +; N64-NEXT: lui $1, %hi(%neg(%gp_rel(insert_v2i64))) +; N64-NEXT: daddu $1, $1, $25 +; N64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(insert_v2i64))) +; N64-NEXT: ld $1, %got_disp(v2i64)($1) +; N64-NEXT: ld.d $w0, 0($1) +; N64-NEXT: insert.d $w0[1], $4 +; N64-NEXT: jr $ra +; N64-NEXT: st.d $w0, 0($1) %1 = load <2 x i64>, <2 x i64>* @v2i64 - ; MIPS32-DAG: ld.w [[R1:\$w[0-9]+]], - ; MIPS64-DAG: ld.d [[R1:\$w[0-9]+]], - - ; ALL-NOT: andi - ; ALL-NOT: sra - %2 = insertelement <2 x i64> %1, i64 %a, i32 1 - ; MIPS32-DAG: insert.w [[R1]][2], $4 - ; MIPS32-DAG: insert.w [[R1]][3], $5 - ; MIPS64-DAG: insert.d [[R1]][1], $4 - store <2 x i64> %2, <2 x i64>* @v2i64 - ; MIPS32-DAG: st.w [[R1]] - ; MIPS64-DAG: st.d [[R1]] - ret void } define void @insert_v16i8_vidx(i32 signext %a) nounwind { - ; ALL-LABEL: insert_v16i8_vidx: - +; O32-LABEL: insert_v16i8_vidx: +; O32: # %bb.0: +; O32-NEXT: lui $2, %hi(_gp_disp) +; O32-NEXT: addiu $2, $2, %lo(_gp_disp) +; O32-NEXT: addu $1, $2, $25 +; O32-NEXT: lw $2, %got(i32)($1) +; O32-NEXT: lw $2, 0($2) +; O32-NEXT: lw $1, %got(v16i8)($1) +; O32-NEXT: ld.b $w0, 0($1) +; O32-NEXT: sld.b $w0, $w0[$2] +; O32-NEXT: insert.b $w0[0], $4 +; O32-NEXT: neg $2, $2 +; O32-NEXT: sld.b $w0, $w0[$2] +; O32-NEXT: jr $ra +; O32-NEXT: st.b $w0, 0($1) +; +; N32-LABEL: insert_v16i8_vidx: +; N32: # %bb.0: +; N32-NEXT: lui $1, %hi(%neg(%gp_rel(insert_v16i8_vidx))) +; N32-NEXT: addu $1, $1, $25 +; N32-NEXT: addiu $1, $1, %lo(%neg(%gp_rel(insert_v16i8_vidx))) +; N32-NEXT: lw $2, %got_disp(i32)($1) +; N32-NEXT: lw $2, 0($2) +; N32-NEXT: lw $1, %got_disp(v16i8)($1) +; N32-NEXT: ld.b $w0, 0($1) +; N32-NEXT: sld.b $w0, $w0[$2] +; N32-NEXT: insert.b $w0[0], $4 +; N32-NEXT: neg $2, $2 +; N32-NEXT: sld.b $w0, $w0[$2] +; N32-NEXT: jr $ra +; N32-NEXT: st.b $w0, 0($1) +; +; N64-LABEL: insert_v16i8_vidx: +; N64: # %bb.0: +; N64-NEXT: lui $1, %hi(%neg(%gp_rel(insert_v16i8_vidx))) +; N64-NEXT: daddu $1, $1, $25 +; N64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(insert_v16i8_vidx))) +; N64-NEXT: ld $2, %got_disp(i32)($1) +; N64-NEXT: lw $2, 0($2) +; N64-NEXT: ld $1, %got_disp(v16i8)($1) +; N64-NEXT: ld.b $w0, 0($1) +; N64-NEXT: sld.b $w0, $w0[$2] +; N64-NEXT: insert.b $w0[0], $4 +; N64-NEXT: dneg $2, $2 +; N64-NEXT: sld.b $w0, $w0[$2] +; N64-NEXT: jr $ra +; N64-NEXT: st.b $w0, 0($1) %1 = load <16 x i8>, <16 x i8>* @v16i8 - ; ALL-DAG: ld.b [[R1:\$w[0-9]+]], - %2 = load i32, i32* @i32 - ; O32-DAG: lw [[PTR_I:\$[0-9]+]], %got(i32)( - ; N32-DAG: lw [[PTR_I:\$[0-9]+]], %got_disp(i32)( - ; N64-DAG: ld [[PTR_I:\$[0-9]+]], %got_disp(i32)( - ; ALL-DAG: lw [[IDX:\$[0-9]+]], 0([[PTR_I]]) - %a2 = trunc i32 %a to i8 %a3 = sext i8 %a2 to i32 %a4 = trunc i32 %a3 to i8 - ; ALL-NOT: andi - ; ALL-NOT: sra - %3 = insertelement <16 x i8> %1, i8 %a4, i32 %2 - ; ALL-DAG: sld.b [[R1]], [[R1]]{{\[}}[[IDX]]] - ; ALL-DAG: insert.b [[R1]][0], $4 - ; O32-DAG: neg [[NIDX:\$[0-9]+]], [[IDX]] - ; N32-DAG: neg [[NIDX:\$[0-9]+]], [[IDX]] - ; N64-DAG: dneg [[NIDX:\$[0-9]+]], [[IDX]] - ; ALL-DAG: sld.b [[R1]], [[R1]]{{\[}}[[NIDX]]] - store <16 x i8> %3, <16 x i8>* @v16i8 - ; ALL-DAG: st.b [[R1]] - ret void } define void @insert_v8i16_vidx(i32 signext %a) nounwind { - ; ALL-LABEL: insert_v8i16_vidx: - +; O32-LABEL: insert_v8i16_vidx: +; O32: # %bb.0: +; O32-NEXT: lui $2, %hi(_gp_disp) +; O32-NEXT: addiu $2, $2, %lo(_gp_disp) +; O32-NEXT: addu $1, $2, $25 +; O32-NEXT: lw $2, %got(i32)($1) +; O32-NEXT: lw $2, 0($2) +; O32-NEXT: lw $1, %got(v8i16)($1) +; O32-NEXT: ld.h $w0, 0($1) +; O32-NEXT: sll $2, $2, 1 +; O32-NEXT: sld.b $w0, $w0[$2] +; O32-NEXT: insert.h $w0[0], $4 +; O32-NEXT: neg $2, $2 +; O32-NEXT: sld.b $w0, $w0[$2] +; O32-NEXT: jr $ra +; O32-NEXT: st.h $w0, 0($1) +; +; N32-LABEL: insert_v8i16_vidx: +; N32: # %bb.0: +; N32-NEXT: lui $1, %hi(%neg(%gp_rel(insert_v8i16_vidx))) +; N32-NEXT: addu $1, $1, $25 +; N32-NEXT: addiu $1, $1, %lo(%neg(%gp_rel(insert_v8i16_vidx))) +; N32-NEXT: lw $2, %got_disp(i32)($1) +; N32-NEXT: lw $2, 0($2) +; N32-NEXT: lw $1, %got_disp(v8i16)($1) +; N32-NEXT: ld.h $w0, 0($1) +; N32-NEXT: sll $2, $2, 1 +; N32-NEXT: sld.b $w0, $w0[$2] +; N32-NEXT: insert.h $w0[0], $4 +; N32-NEXT: neg $2, $2 +; N32-NEXT: sld.b $w0, $w0[$2] +; N32-NEXT: jr $ra +; N32-NEXT: st.h $w0, 0($1) +; +; N64-LABEL: insert_v8i16_vidx: +; N64: # %bb.0: +; N64-NEXT: lui $1, %hi(%neg(%gp_rel(insert_v8i16_vidx))) +; N64-NEXT: daddu $1, $1, $25 +; N64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(insert_v8i16_vidx))) +; N64-NEXT: ld $2, %got_disp(i32)($1) +; N64-NEXT: lw $2, 0($2) +; N64-NEXT: ld $1, %got_disp(v8i16)($1) +; N64-NEXT: ld.h $w0, 0($1) +; N64-NEXT: dsll $2, $2, 1 +; N64-NEXT: sld.b $w0, $w0[$2] +; N64-NEXT: insert.h $w0[0], $4 +; N64-NEXT: dneg $2, $2 +; N64-NEXT: sld.b $w0, $w0[$2] +; N64-NEXT: jr $ra +; N64-NEXT: st.h $w0, 0($1) %1 = load <8 x i16>, <8 x i16>* @v8i16 - ; ALL-DAG: ld.h [[R1:\$w[0-9]+]], - %2 = load i32, i32* @i32 - ; O32-DAG: lw [[PTR_I:\$[0-9]+]], %got(i32)( - ; N32-DAG: lw [[PTR_I:\$[0-9]+]], %got_disp(i32)( - ; N64-DAG: ld [[PTR_I:\$[0-9]+]], %got_disp(i32)( - ; ALL-DAG: lw [[IDX:\$[0-9]+]], 0([[PTR_I]]) - %a2 = trunc i32 %a to i16 %a3 = sext i16 %a2 to i32 %a4 = trunc i32 %a3 to i16 - ; ALL-NOT: andi - ; ALL-NOT: sra - %3 = insertelement <8 x i16> %1, i16 %a4, i32 %2 - ; ALL-DAG: sll [[BIDX:\$[0-9]+]], [[IDX]], 1 - ; ALL-DAG: sld.b [[R1]], [[R1]]{{\[}}[[BIDX]]] - ; ALL-DAG: insert.h [[R1]][0], $4 - ; O32-DAG: neg [[NIDX:\$[0-9]+]], [[BIDX]] - ; N32-DAG: neg [[NIDX:\$[0-9]+]], [[BIDX]] - ; N64-DAG: dneg [[NIDX:\$[0-9]+]], [[BIDX]] - ; ALL-DAG: sld.b [[R1]], [[R1]]{{\[}}[[NIDX]]] - store <8 x i16> %3, <8 x i16>* @v8i16 - ; ALL-DAG: st.h [[R1]] - ret void } define void @insert_v4i32_vidx(i32 signext %a) nounwind { - ; ALL-LABEL: insert_v4i32_vidx: - +; O32-LABEL: insert_v4i32_vidx: +; O32: # %bb.0: +; O32-NEXT: lui $2, %hi(_gp_disp) +; O32-NEXT: addiu $2, $2, %lo(_gp_disp) +; O32-NEXT: addu $1, $2, $25 +; O32-NEXT: lw $2, %got(i32)($1) +; O32-NEXT: lw $2, 0($2) +; O32-NEXT: lw $1, %got(v4i32)($1) +; O32-NEXT: ld.w $w0, 0($1) +; O32-NEXT: sll $2, $2, 2 +; O32-NEXT: sld.b $w0, $w0[$2] +; O32-NEXT: insert.w $w0[0], $4 +; O32-NEXT: neg $2, $2 +; O32-NEXT: sld.b $w0, $w0[$2] +; O32-NEXT: jr $ra +; O32-NEXT: st.w $w0, 0($1) +; +; N32-LABEL: insert_v4i32_vidx: +; N32: # %bb.0: +; N32-NEXT: lui $1, %hi(%neg(%gp_rel(insert_v4i32_vidx))) +; N32-NEXT: addu $1, $1, $25 +; N32-NEXT: addiu $1, $1, %lo(%neg(%gp_rel(insert_v4i32_vidx))) +; N32-NEXT: lw $2, %got_disp(i32)($1) +; N32-NEXT: lw $2, 0($2) +; N32-NEXT: lw $1, %got_disp(v4i32)($1) +; N32-NEXT: ld.w $w0, 0($1) +; N32-NEXT: sll $2, $2, 2 +; N32-NEXT: sld.b $w0, $w0[$2] +; N32-NEXT: insert.w $w0[0], $4 +; N32-NEXT: neg $2, $2 +; N32-NEXT: sld.b $w0, $w0[$2] +; N32-NEXT: jr $ra +; N32-NEXT: st.w $w0, 0($1) +; +; N64-LABEL: insert_v4i32_vidx: +; N64: # %bb.0: +; N64-NEXT: lui $1, %hi(%neg(%gp_rel(insert_v4i32_vidx))) +; N64-NEXT: daddu $1, $1, $25 +; N64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(insert_v4i32_vidx))) +; N64-NEXT: ld $2, %got_disp(i32)($1) +; N64-NEXT: lw $2, 0($2) +; N64-NEXT: ld $1, %got_disp(v4i32)($1) +; N64-NEXT: ld.w $w0, 0($1) +; N64-NEXT: dsll $2, $2, 2 +; N64-NEXT: sld.b $w0, $w0[$2] +; N64-NEXT: insert.w $w0[0], $4 +; N64-NEXT: dneg $2, $2 +; N64-NEXT: sld.b $w0, $w0[$2] +; N64-NEXT: jr $ra +; N64-NEXT: st.w $w0, 0($1) %1 = load <4 x i32>, <4 x i32>* @v4i32 - ; ALL-DAG: ld.w [[R1:\$w[0-9]+]], - %2 = load i32, i32* @i32 - ; O32-DAG: lw [[PTR_I:\$[0-9]+]], %got(i32)( - ; N32-DAG: lw [[PTR_I:\$[0-9]+]], %got_disp(i32)( - ; N64-DAG: ld [[PTR_I:\$[0-9]+]], %got_disp(i32)( - ; ALL-DAG: lw [[IDX:\$[0-9]+]], 0([[PTR_I]]) - - ; ALL-NOT: andi - ; ALL-NOT: sra - %3 = insertelement <4 x i32> %1, i32 %a, i32 %2 - ; ALL-DAG: sll [[BIDX:\$[0-9]+]], [[IDX]], 2 - ; ALL-DAG: sld.b [[R1]], [[R1]]{{\[}}[[BIDX]]] - ; ALL-DAG: insert.w [[R1]][0], $4 - ; O32-DAG: neg [[NIDX:\$[0-9]+]], [[BIDX]] - ; N32-DAG: neg [[NIDX:\$[0-9]+]], [[BIDX]] - ; N64-DAG: dneg [[NIDX:\$[0-9]+]], [[BIDX]] - ; ALL-DAG: sld.b [[R1]], [[R1]]{{\[}}[[NIDX]]] - store <4 x i32> %3, <4 x i32>* @v4i32 - ; ALL-DAG: st.w [[R1]] - ret void } +; TODO: This code could be a lot better but it works. The legalizer splits +; 64-bit inserts into two 32-bit inserts because there is no i64 type on +; MIPS32. The obvious optimisation is to perform both insert.w's at once while +; the vector is rotated. define void @insert_v2i64_vidx(i64 signext %a) nounwind { - ; ALL-LABEL: insert_v2i64_vidx: - +; O32-LABEL: insert_v2i64_vidx: +; O32: # %bb.0: +; O32-NEXT: lui $2, %hi(_gp_disp) +; O32-NEXT: addiu $2, $2, %lo(_gp_disp) +; O32-NEXT: addu $1, $2, $25 +; O32-NEXT: lw $2, %got(i32)($1) +; O32-NEXT: lw $2, 0($2) +; O32-NEXT: addu $2, $2, $2 +; O32-NEXT: lw $1, %got(v2i64)($1) +; O32-NEXT: ld.w $w0, 0($1) +; O32-NEXT: sll $3, $2, 2 +; O32-NEXT: sld.b $w0, $w0[$3] +; O32-NEXT: insert.w $w0[0], $4 +; O32-NEXT: neg $3, $3 +; O32-NEXT: sld.b $w0, $w0[$3] +; O32-NEXT: addiu $2, $2, 1 +; O32-NEXT: sll $2, $2, 2 +; O32-NEXT: sld.b $w0, $w0[$2] +; O32-NEXT: insert.w $w0[0], $5 +; O32-NEXT: neg $2, $2 +; O32-NEXT: sld.b $w0, $w0[$2] +; O32-NEXT: jr $ra +; O32-NEXT: st.w $w0, 0($1) +; +; N32-LABEL: insert_v2i64_vidx: +; N32: # %bb.0: +; N32-NEXT: lui $1, %hi(%neg(%gp_rel(insert_v2i64_vidx))) +; N32-NEXT: addu $1, $1, $25 +; N32-NEXT: addiu $1, $1, %lo(%neg(%gp_rel(insert_v2i64_vidx))) +; N32-NEXT: lw $2, %got_disp(i32)($1) +; N32-NEXT: lw $2, 0($2) +; N32-NEXT: lw $1, %got_disp(v2i64)($1) +; N32-NEXT: ld.d $w0, 0($1) +; N32-NEXT: sll $2, $2, 3 +; N32-NEXT: sld.b $w0, $w0[$2] +; N32-NEXT: insert.d $w0[0], $4 +; N32-NEXT: neg $2, $2 +; N32-NEXT: sld.b $w0, $w0[$2] +; N32-NEXT: jr $ra +; N32-NEXT: st.d $w0, 0($1) +; +; N64-LABEL: insert_v2i64_vidx: +; N64: # %bb.0: +; N64-NEXT: lui $1, %hi(%neg(%gp_rel(insert_v2i64_vidx))) +; N64-NEXT: daddu $1, $1, $25 +; N64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(insert_v2i64_vidx))) +; N64-NEXT: ld $2, %got_disp(i32)($1) +; N64-NEXT: lw $2, 0($2) +; N64-NEXT: ld $1, %got_disp(v2i64)($1) +; N64-NEXT: ld.d $w0, 0($1) +; N64-NEXT: dsll $2, $2, 3 +; N64-NEXT: sld.b $w0, $w0[$2] +; N64-NEXT: insert.d $w0[0], $4 +; N64-NEXT: dneg $2, $2 +; N64-NEXT: sld.b $w0, $w0[$2] +; N64-NEXT: jr $ra +; N64-NEXT: st.d $w0, 0($1) %1 = load <2 x i64>, <2 x i64>* @v2i64 - ; MIPS32-DAG: ld.w [[R1:\$w[0-9]+]], - ; MIPS64-DAG: ld.d [[R1:\$w[0-9]+]], - %2 = load i32, i32* @i32 - ; O32-DAG: lw [[PTR_I:\$[0-9]+]], %got(i32)( - ; N32-DAG: lw [[PTR_I:\$[0-9]+]], %got_disp(i32)( - ; N64-DAG: ld [[PTR_I:\$[0-9]+]], %got_disp(i32)( - ; ALL-DAG: lw [[IDX:\$[0-9]+]], 0([[PTR_I]]) - - ; ALL-NOT: andi - ; ALL-NOT: sra - %3 = insertelement <2 x i64> %1, i64 %a, i32 %2 - ; TODO: This code could be a lot better but it works. The legalizer splits - ; 64-bit inserts into two 32-bit inserts because there is no i64 type on - ; MIPS32. The obvious optimisation is to perform both insert.w's at once while - ; the vector is rotated. - ; MIPS32-DAG: sll [[BIDX:\$[0-9]+]], [[IDX]], 2 - ; MIPS32-DAG: sld.b [[R1]], [[R1]]{{\[}}[[BIDX]]] - ; MIPS32-DAG: insert.w [[R1]][0], $4 - ; MIPS32-DAG: neg [[NIDX:\$[0-9]+]], [[BIDX]] - ; MIPS32-DAG: sld.b [[R1]], [[R1]]{{\[}}[[NIDX]]] - ; MIPS32-DAG: addiu [[IDX2:\$[0-9]+]], [[IDX]], 1 - ; MIPS32-DAG: sll [[BIDX:\$[0-9]+]], [[IDX2]], 2 - ; MIPS32-DAG: sld.b [[R1]], [[R1]]{{\[}}[[BIDX]]] - ; MIPS32-DAG: insert.w [[R1]][0], $5 - ; MIPS32-DAG: neg [[NIDX:\$[0-9]+]], [[BIDX]] - ; MIPS32-DAG: sld.b [[R1]], [[R1]]{{\[}}[[NIDX]]] - - ; MIPS64-DAG: sll [[BIDX:\$[0-9]+]], [[IDX]], 3 - ; MIPS64-DAG: sld.b [[R1]], [[R1]]{{\[}}[[BIDX]]] - ; MIPS64-DAG: insert.d [[R1]][0], $4 - ; N32-DAG: neg [[NIDX:\$[0-9]+]], [[BIDX]] - ; N64-DAG: dneg [[NIDX:\$[0-9]+]], [[BIDX]] - ; MIPS64-DAG: sld.b [[R1]], [[R1]]{{\[}}[[NIDX]]] - store <2 x i64> %3, <2 x i64>* @v2i64 - ; MIPS32-DAG: st.w [[R1]] - ; MIPS64-DAG: st.d [[R1]] - ret void } +; TODO: What code should be emitted? define void @truncstore() nounwind { - ; ALL-LABEL: truncstore: - +; O32-LABEL: truncstore: +; O32: # %bb.0: +; O32-NEXT: lui $2, %hi(_gp_disp) +; O32-NEXT: addiu $2, $2, %lo(_gp_disp) +; O32-NEXT: addu $1, $2, $25 +; O32-NEXT: lw $1, %got(v4i8)($1) +; O32-NEXT: addiu $2, $zero, 255 +; O32-NEXT: sb $2, 3($1) +; O32-NEXT: sb $2, 2($1) +; O32-NEXT: sb $2, 1($1) +; O32-NEXT: jr $ra +; O32-NEXT: sb $2, 0($1) +; +; N32-LABEL: truncstore: +; N32: # %bb.0: +; N32-NEXT: lui $1, %hi(%neg(%gp_rel(truncstore))) +; N32-NEXT: addu $1, $1, $25 +; N32-NEXT: addiu $1, $1, %lo(%neg(%gp_rel(truncstore))) +; N32-NEXT: lw $1, %got_disp(v4i8)($1) +; N32-NEXT: addiu $2, $zero, 255 +; N32-NEXT: sb $2, 3($1) +; N32-NEXT: sb $2, 2($1) +; N32-NEXT: sb $2, 1($1) +; N32-NEXT: jr $ra +; N32-NEXT: sb $2, 0($1) +; +; N64-LABEL: truncstore: +; N64: # %bb.0: +; N64-NEXT: lui $1, %hi(%neg(%gp_rel(truncstore))) +; N64-NEXT: daddu $1, $1, $25 +; N64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(truncstore))) +; N64-NEXT: ld $1, %got_disp(v4i8)($1) +; N64-NEXT: addiu $2, $zero, 255 +; N64-NEXT: sb $2, 3($1) +; N64-NEXT: sb $2, 2($1) +; N64-NEXT: sb $2, 1($1) +; N64-NEXT: jr $ra +; N64-NEXT: sb $2, 0($1) store volatile <4 x i8> , <4 x i8>*@v4i8 - ; TODO: What code should be emitted? - ret void }