diff --git a/llvm/test/CodeGen/AArch64/bswap-known-bits.ll b/llvm/test/CodeGen/AArch64/bswap-known-bits.ll index 5a3e747859cd72..9048fc9aa72c49 100644 --- a/llvm/test/CodeGen/AArch64/bswap-known-bits.ll +++ b/llvm/test/CodeGen/AArch64/bswap-known-bits.ll @@ -1,11 +1,15 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=aarch64-apple-darwin | FileCheck %s declare i16 @llvm.bswap.i16(i16) declare i32 @llvm.bswap.i32(i32) +declare i64 @llvm.bswap.i64(i64) -; CHECK-LABEL: @test1 -; CHECK: mov w0, #1 define i1 @test1(i16 %arg) { +; CHECK-LABEL: test1: +; CHECK: ; %bb.0: +; CHECK-NEXT: mov w0, #1 +; CHECK-NEXT: ret %a = or i16 %arg, 511 %b = call i16 @llvm.bswap.i16(i16 %a) %and = and i16 %b, 256 @@ -13,9 +17,11 @@ define i1 @test1(i16 %arg) { ret i1 %res } -; CHECK-LABEL: @test2 -; CHECK: mov w0, #1 define i1 @test2(i16 %arg) { +; CHECK-LABEL: test2: +; CHECK: ; %bb.0: +; CHECK-NEXT: mov w0, #1 +; CHECK-NEXT: ret %a = or i16 %arg, 1 %b = call i16 @llvm.bswap.i16(i16 %a) %and = and i16 %b, 256 @@ -23,9 +29,11 @@ define i1 @test2(i16 %arg) { ret i1 %res } -; CHECK-LABEL: @test3 -; CHECK: mov w0, #1 define i1 @test3(i16 %arg) { +; CHECK-LABEL: test3: +; CHECK: ; %bb.0: +; CHECK-NEXT: mov w0, #1 +; CHECK-NEXT: ret %a = or i16 %arg, 256 %b = call i16 @llvm.bswap.i16(i16 %a) %and = and i16 %b, 1 @@ -33,12 +41,77 @@ define i1 @test3(i16 %arg) { ret i1 %res } -; CHECK-LABEL: @test4 -; CHECK: mov w0, #1 define i1 @test4(i32 %arg) { +; CHECK-LABEL: test4: +; CHECK: ; %bb.0: +; CHECK-NEXT: mov w0, #1 +; CHECK-NEXT: ret %a = or i32 %arg, 2147483647 ; i32_MAX %b = call i32 @llvm.bswap.i32(i32 %a) %and = and i32 %b, 127 %res = icmp eq i32 %and, 127 ret i1 %res } + +define i8 @demand_one_byte0(i32 %x) { +; CHECK-LABEL: demand_one_byte0: +; CHECK: ; %bb.0: +; CHECK-NEXT: rev w0, w0 +; CHECK-NEXT: ret + %b = call i32 @llvm.bswap.i32(i32 %x) + %r = trunc i32 %b to i8 + ret i8 %r +} + +define i32 @demand_one_byte1(i32 %x) { +; CHECK-LABEL: demand_one_byte1: +; CHECK: ; %bb.0: +; CHECK-NEXT: rev w8, w0 +; CHECK-NEXT: and w0, w8, #0xff00 +; CHECK-NEXT: ret + %b = call i32 @llvm.bswap.i32(i32 %x) + %r = and i32 %b, 65280 ; 0x0000ff00 + ret i32 %r +} + +define i32 @demand_one_byte2(i32 %x) { +; CHECK-LABEL: demand_one_byte2: +; CHECK: ; %bb.0: +; CHECK-NEXT: rev w8, w0 +; CHECK-NEXT: orr w0, w8, #0xff00ffff +; CHECK-NEXT: ret + %b = call i32 @llvm.bswap.i32(i32 %x) + %r = or i32 %b, 4278255615 ; 0xff00ffff + ret i32 %r +} + +define i64 @demand_one_byte3(i64 %x) { +; CHECK-LABEL: demand_one_byte3: +; CHECK: ; %bb.0: +; CHECK-NEXT: rev x8, x0 +; CHECK-NEXT: lsr x0, x8, #56 +; CHECK-NEXT: ret + %b = call i64 @llvm.bswap.i64(i64 %x) + %r = lshr i64 %b, 56 + ret i64 %r +} + +define void @demand_one_loaded_byte(i64* %xp, i32* %yp) { +; CHECK-LABEL: demand_one_loaded_byte: +; CHECK: ; %bb.0: +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: lsr x8, x8, #8 +; CHECK-NEXT: rev w8, w8 +; CHECK-NEXT: strb w8, [x1] +; CHECK-NEXT: ret + %x = load i64, i64* %xp, align 8 + %x_zzzz7654 = lshr i64 %x, 32 + %x_z7654zzz = shl nuw nsw i64 %x_zzzz7654, 24 + %x_4zzz = trunc i64 %x_z7654zzz to i32 + %y = load i32, i32* %yp, align 4 + %y_321z = and i32 %y, -256 + %x_zzz4 = call i32 @llvm.bswap.i32(i32 %x_4zzz) + %r = or i32 %x_zzz4, %y_321z + store i32 %r, i32* %yp, align 4 + ret void +} diff --git a/llvm/test/CodeGen/X86/combine-bswap.ll b/llvm/test/CodeGen/X86/combine-bswap.ll index c30360fd77d575..1d02e30fa2b416 100644 --- a/llvm/test/CodeGen/X86/combine-bswap.ll +++ b/llvm/test/CodeGen/X86/combine-bswap.ll @@ -56,3 +56,34 @@ define i32 @test_demandedbits_bswap(i32 %a0) nounwind { %d = and i32 %c, 4294901760 ret i32 %d } + +define void @demand_one_loaded_byte(i64* %xp, i32* %yp) { +; X86-LABEL: demand_one_loaded_byte: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl (%ecx), %edx +; X86-NEXT: movl 4(%ecx), %ecx +; X86-NEXT: shldl $24, %edx, %ecx +; X86-NEXT: bswapl %ecx +; X86-NEXT: movb %cl, (%eax) +; X86-NEXT: retl +; +; X64-LABEL: demand_one_loaded_byte: +; X64: # %bb.0: +; X64-NEXT: movq (%rdi), %rax +; X64-NEXT: shrq $8, %rax +; X64-NEXT: bswapl %eax +; X64-NEXT: movb %al, (%rsi) +; X64-NEXT: retq + %x = load i64, i64* %xp, align 8 + %x_zzzz7654 = lshr i64 %x, 32 + %x_z7654zzz = shl nuw nsw i64 %x_zzzz7654, 24 + %x_4zzz = trunc i64 %x_z7654zzz to i32 + %y = load i32, i32* %yp, align 4 + %y_321z = and i32 %y, -256 + %x_zzz4 = call i32 @llvm.bswap.i32(i32 %x_4zzz) + %r = or i32 %x_zzz4, %y_321z + store i32 %r, i32* %yp, align 4 + ret void +}