| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,97 @@ | ||
| ; RUN: opt < %s -cost-model -analyze -mtriple=systemz-unknown -mcpu=z13 \ | ||
| ; RUN: | FileCheck %s -check-prefixes=CHECK,Z13 | ||
| ; RUN: opt < %s -cost-model -analyze -mtriple=systemz-unknown -mcpu=arch13 \ | ||
| ; RUN: | FileCheck %s -check-prefixes=CHECK,AR13 | ||
|
|
||
| define void @fun0(i32 %a) { | ||
| ; CHECK-LABEL: Printing analysis 'Cost Model Analysis' for function 'fun0': | ||
| ; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %c0 = xor i32 %l0, -1 | ||
| ; Z13: Cost Model: Found an estimated cost of 1 for instruction: %res0 = or i32 %a, %c0 | ||
| ; AR13: Cost Model: Found an estimated cost of 0 for instruction: %res0 = or i32 %a, %c0 | ||
| ; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %c1 = xor i32 %l1, -1 | ||
| ; Z13: Cost Model: Found an estimated cost of 1 for instruction: %res1 = and i32 %a, %c1 | ||
| ; AR13: Cost Model: Found an estimated cost of 0 for instruction: %res1 = and i32 %a, %c1 | ||
| ; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %c2 = and i32 %l2, %a | ||
| ; Z13: Cost Model: Found an estimated cost of 1 for instruction: %res2 = xor i32 %c2, -1 | ||
| ; AR13: Cost Model: Found an estimated cost of 0 for instruction: %res2 = xor i32 %c2, -1 | ||
| ; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %c3 = or i32 %l3, %a | ||
| ; Z13: Cost Model: Found an estimated cost of 1 for instruction: %res3 = xor i32 %c3, -1 | ||
| ; AR13: Cost Model: Found an estimated cost of 0 for instruction: %res3 = xor i32 %c3, -1 | ||
| ; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %c4 = xor i32 %l4, %a | ||
| ; Z13: Cost Model: Found an estimated cost of 1 for instruction: %res4 = xor i32 %c4, -1 | ||
| ; AR13: Cost Model: Found an estimated cost of 0 for instruction: %res4 = xor i32 %c4, -1 | ||
|
|
||
| entry: | ||
| %l0 = load i32, i32* undef | ||
| %c0 = xor i32 %l0, -1 | ||
| %res0 = or i32 %a, %c0 | ||
| store i32 %res0, i32* undef | ||
|
|
||
| %l1 = load i32, i32* undef | ||
| %c1 = xor i32 %l1, -1 | ||
| %res1 = and i32 %a, %c1 | ||
| store i32 %res1, i32* undef | ||
|
|
||
| %l2 = load i32, i32* undef | ||
| %c2 = and i32 %l2, %a | ||
| %res2 = xor i32 %c2, -1 | ||
| store i32 %res2, i32* undef | ||
|
|
||
| %l3 = load i32, i32* undef | ||
| %c3 = or i32 %l3, %a | ||
| %res3 = xor i32 %c3, -1 | ||
| store i32 %res3, i32* undef | ||
|
|
||
| %l4 = load i32, i32* undef | ||
| %c4 = xor i32 %l4, %a | ||
| %res4 = xor i32 %c4, -1 | ||
| store i32 %res4, i32* undef | ||
|
|
||
| ret void | ||
| } | ||
|
|
||
| define void @fun1(i64 %a) { | ||
| ; CHECK-LABEL: Printing analysis 'Cost Model Analysis' for function 'fun1': | ||
| ; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %c0 = xor i64 %l0, -1 | ||
| ; Z13: Cost Model: Found an estimated cost of 1 for instruction: %res0 = or i64 %a, %c0 | ||
| ; AR13: Cost Model: Found an estimated cost of 0 for instruction: %res0 = or i64 %a, %c0 | ||
| ; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %c1 = xor i64 %l1, -1 | ||
| ; Z13: Cost Model: Found an estimated cost of 1 for instruction: %res1 = and i64 %a, %c1 | ||
| ; AR13: Cost Model: Found an estimated cost of 0 for instruction: %res1 = and i64 %a, %c1 | ||
| ; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %c2 = and i64 %l2, %a | ||
| ; Z13: Cost Model: Found an estimated cost of 1 for instruction: %res2 = xor i64 %c2, -1 | ||
| ; AR13: Cost Model: Found an estimated cost of 0 for instruction: %res2 = xor i64 %c2, -1 | ||
| ; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %c3 = or i64 %l3, %a | ||
| ; Z13: Cost Model: Found an estimated cost of 1 for instruction: %res3 = xor i64 %c3, -1 | ||
| ; AR13: Cost Model: Found an estimated cost of 0 for instruction: %res3 = xor i64 %c3, -1 | ||
| ; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %c4 = xor i64 %l4, %a | ||
| ; Z13: Cost Model: Found an estimated cost of 1 for instruction: %res4 = xor i64 %c4, -1 | ||
| ; AR13: Cost Model: Found an estimated cost of 0 for instruction: %res4 = xor i64 %c4, -1 | ||
| entry: | ||
| %l0 = load i64, i64* undef | ||
| %c0 = xor i64 %l0, -1 | ||
| %res0 = or i64 %a, %c0 | ||
| store i64 %res0, i64* undef | ||
|
|
||
| %l1 = load i64, i64* undef | ||
| %c1 = xor i64 %l1, -1 | ||
| %res1 = and i64 %a, %c1 | ||
| store i64 %res1, i64* undef | ||
|
|
||
| %l2 = load i64, i64* undef | ||
| %c2 = and i64 %l2, %a | ||
| %res2 = xor i64 %c2, -1 | ||
| store i64 %res2, i64* undef | ||
|
|
||
| %l3 = load i64, i64* undef | ||
| %c3 = or i64 %l3, %a | ||
| %res3 = xor i64 %c3, -1 | ||
| store i64 %res3, i64* undef | ||
|
|
||
| %l4 = load i64, i64* undef | ||
| %c4 = xor i64 %l4, %a | ||
| %res4 = xor i64 %c4, -1 | ||
| store i64 %res4, i64* undef | ||
|
|
||
| ret void | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,121 @@ | ||
| ; Test SELR and SELGR. | ||
| ; | ||
| ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch13 -verify-machineinstrs | FileCheck %s | ||
|
|
||
| ; Test SELR. | ||
| define i32 @f1(i32 %limit, i32 %a, i32 %b) { | ||
| ; CHECK-LABEL: f1: | ||
| ; CHECK: clfi %r2, 42 | ||
| ; CHECK: selrl %r2, %r3, %r4 | ||
| ; CHECK: br %r14 | ||
| %cond = icmp ult i32 %limit, 42 | ||
| %res = select i1 %cond, i32 %a, i32 %b | ||
| ret i32 %res | ||
| } | ||
|
|
||
| ; Test SELGR. | ||
| define i64 @f2(i64 %limit, i64 %a, i64 %b) { | ||
| ; CHECK-LABEL: f2: | ||
| ; CHECK: clgfi %r2, 42 | ||
| ; CHECK: selgrl %r2, %r3, %r4 | ||
| ; CHECK: br %r14 | ||
| %cond = icmp ult i64 %limit, 42 | ||
| %res = select i1 %cond, i64 %a, i64 %b | ||
| ret i64 %res | ||
| } | ||
|
|
||
| ; Test SELR in a case that could use COMPARE AND BRANCH. We prefer using | ||
| ; SELR if possible. | ||
| define i32 @f3(i32 %limit, i32 %a, i32 %b) { | ||
| ; CHECK-LABEL: f3: | ||
| ; CHECK: chi %r2, 42 | ||
| ; CHECK: selre %r2, %r3, %r4 | ||
| ; CHECK: br %r14 | ||
| %cond = icmp eq i32 %limit, 42 | ||
| %res = select i1 %cond, i32 %a, i32 %b | ||
| ret i32 %res | ||
| } | ||
|
|
||
| ; ...and again for SELGR. | ||
| define i64 @f4(i64 %limit, i64 %a, i64 %b) { | ||
| ; CHECK-LABEL: f4: | ||
| ; CHECK: cghi %r2, 42 | ||
| ; CHECK: selgre %r2, %r3, %r4 | ||
| ; CHECK: br %r14 | ||
| %cond = icmp eq i64 %limit, 42 | ||
| %res = select i1 %cond, i64 %a, i64 %b | ||
| ret i64 %res | ||
| } | ||
|
|
||
| ; Check that we also get SELR as a result of early if-conversion. | ||
| define i32 @f5(i32 %limit, i32 %a, i32 %b) { | ||
| ; CHECK-LABEL: f5: | ||
| ; CHECK: clfi %r2, 41 | ||
| ; CHECK: selrh %r2, %r4, %r3 | ||
| ; CHECK: br %r14 | ||
| entry: | ||
| %cond = icmp ult i32 %limit, 42 | ||
| br i1 %cond, label %if.then, label %return | ||
|
|
||
| if.then: | ||
| br label %return | ||
|
|
||
| return: | ||
| %res = phi i32 [ %a, %if.then ], [ %b, %entry ] | ||
| ret i32 %res | ||
| } | ||
|
|
||
| ; ... and likewise for SELGR. | ||
| define i64 @f6(i64 %limit, i64 %a, i64 %b) { | ||
| ; CHECK-LABEL: f6: | ||
| ; CHECK: clgfi %r2, 41 | ||
| ; CHECK: selgrh %r2, %r4, %r3 | ||
| ; CHECK: br %r14 | ||
| entry: | ||
| %cond = icmp ult i64 %limit, 42 | ||
| br i1 %cond, label %if.then, label %return | ||
|
|
||
| if.then: | ||
| br label %return | ||
|
|
||
| return: | ||
| %res = phi i64 [ %a, %if.then ], [ %b, %entry ] | ||
| ret i64 %res | ||
| } | ||
|
|
||
| ; Check that inverting the condition works as well. | ||
| define i32 @f7(i32 %limit, i32 %a, i32 %b) { | ||
| ; CHECK-LABEL: f7: | ||
| ; CHECK: clfi %r2, 41 | ||
| ; CHECK: selrh %r2, %r3, %r4 | ||
| ; CHECK: br %r14 | ||
| entry: | ||
| %cond = icmp ult i32 %limit, 42 | ||
| br i1 %cond, label %if.then, label %return | ||
|
|
||
| if.then: | ||
| br label %return | ||
|
|
||
| return: | ||
| %res = phi i32 [ %b, %if.then ], [ %a, %entry ] | ||
| ret i32 %res | ||
| } | ||
|
|
||
| ; ... and likewise for SELGR. | ||
| define i64 @f8(i64 %limit, i64 %a, i64 %b) { | ||
| ; CHECK-LABEL: f8: | ||
| ; CHECK: clgfi %r2, 41 | ||
| ; CHECK: selgrh %r2, %r3, %r4 | ||
| ; CHECK: br %r14 | ||
| entry: | ||
| %cond = icmp ult i64 %limit, 42 | ||
| br i1 %cond, label %if.then, label %return | ||
|
|
||
| if.then: | ||
| br label %return | ||
|
|
||
| return: | ||
| %res = phi i64 [ %b, %if.then ], [ %a, %entry ] | ||
| ret i64 %res | ||
| } | ||
|
|
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,76 @@ | ||
| ; Test SELFHR. | ||
| ; See comments in asm-18.ll about testing high-word operations. | ||
| ; | ||
| ; RUN: llc < %s -verify-machineinstrs -mtriple=s390x-linux-gnu -mcpu=arch13 \ | ||
| ; RUN: -no-integrated-as | FileCheck %s | ||
|
|
||
| define void @f1(i32 %limit) { | ||
| ; CHECK-LABEL: f1: | ||
| ; CHECK-DAG: stepa [[REG1:%r[0-5]]] | ||
| ; CHECK-DAG: stepb [[REG2:%r[0-5]]] | ||
| ; CHECK-DAG: clfi %r2, 42 | ||
| ; CHECK: selfhrl [[REG3:%r[0-5]]], [[REG1]], [[REG2]] | ||
| ; CHECK: stepc [[REG3]] | ||
| ; CHECK: br %r14 | ||
| %a = call i32 asm sideeffect "stepa $0", "=h"() | ||
| %b = call i32 asm sideeffect "stepb $0", "=h"() | ||
| %cond = icmp ult i32 %limit, 42 | ||
| %res = select i1 %cond, i32 %a, i32 %b | ||
| call void asm sideeffect "stepc $0", "h"(i32 %res) | ||
| call void asm sideeffect "use $0", "h"(i32 %a) | ||
| call void asm sideeffect "use $0", "h"(i32 %b) | ||
| ret void | ||
| } | ||
|
|
||
| ; Check that we also get SELFHR as a result of early if-conversion. | ||
| define void @f2(i32 %limit) { | ||
| ; CHECK-LABEL: f2: | ||
| ; CHECK-DAG: stepa [[REG1:%r[0-5]]] | ||
| ; CHECK-DAG: stepb [[REG2:%r[0-5]]] | ||
| ; CHECK-DAG: clfi %r2, 41 | ||
| ; CHECK: selfhrh [[REG3:%r[0-5]]], [[REG2]], [[REG1]] | ||
| ; CHECK: stepc [[REG3]] | ||
| ; CHECK: br %r14 | ||
| entry: | ||
| %a = call i32 asm sideeffect "stepa $0", "=h"() | ||
| %b = call i32 asm sideeffect "stepb $0", "=h"() | ||
| %cond = icmp ult i32 %limit, 42 | ||
| br i1 %cond, label %if.then, label %return | ||
|
|
||
| if.then: | ||
| br label %return | ||
|
|
||
| return: | ||
| %res = phi i32 [ %a, %if.then ], [ %b, %entry ] | ||
| call void asm sideeffect "stepc $0", "h"(i32 %res) | ||
| call void asm sideeffect "use $0", "h"(i32 %a) | ||
| call void asm sideeffect "use $0", "h"(i32 %b) | ||
| ret void | ||
| } | ||
|
|
||
| ; Check that inverting the condition works as well. | ||
| define void @f3(i32 %limit) { | ||
| ; CHECK-LABEL: f3: | ||
| ; CHECK-DAG: stepa [[REG1:%r[0-5]]] | ||
| ; CHECK-DAG: stepb [[REG2:%r[0-5]]] | ||
| ; CHECK-DAG: clfi %r2, 41 | ||
| ; CHECK: selfhrh [[REG3:%r[0-5]]], [[REG1]], [[REG2]] | ||
| ; CHECK: stepc [[REG3]] | ||
| ; CHECK: br %r14 | ||
| entry: | ||
| %a = call i32 asm sideeffect "stepa $0", "=h"() | ||
| %b = call i32 asm sideeffect "stepb $0", "=h"() | ||
| %cond = icmp ult i32 %limit, 42 | ||
| br i1 %cond, label %if.then, label %return | ||
|
|
||
| if.then: | ||
| br label %return | ||
|
|
||
| return: | ||
| %res = phi i32 [ %b, %if.then ], [ %a, %entry ] | ||
| call void asm sideeffect "stepc $0", "h"(i32 %res) | ||
| call void asm sideeffect "use $0", "h"(i32 %a) | ||
| call void asm sideeffect "use $0", "h"(i32 %b) | ||
| ret void | ||
| } | ||
|
|
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,179 @@ | ||
| # RUN: llc -mtriple=s390x-linux-gnu -mcpu=arch13 -start-before=greedy %s -o - \ | ||
| # RUN: | FileCheck %s | ||
| # | ||
| # Test that regalloc manages (via regalloc hints) to avoid a LOCRMux jump | ||
| # sequence expansion, and a SELR instuction is emitted. | ||
|
|
||
| --- | | ||
| ; ModuleID = 'tc.ll' | ||
| source_filename = "tc.ll" | ||
| target datalayout = "E-m:e-i1:8:16-i8:8:16-i64:64-f128:64-v128:64-a:8:16-n32:64" | ||
|
|
||
| @globvar = external global i32 | ||
|
|
||
| declare void @fun() #0 | ||
|
|
||
| define void @fun1() #0 { | ||
| bb5: | ||
| br label %bb6 | ||
|
|
||
| bb6: ; preds = %bb33, %bb5 | ||
| %tmp = phi i1 [ %tmp34, %bb33 ], [ undef, %bb5 ] | ||
| br label %bb7 | ||
|
|
||
| bb7: ; preds = %bb7, %bb6 | ||
| %lsr.iv1 = phi [512 x i32]* [ %0, %bb7 ], [ undef, %bb6 ] | ||
| %tmp8 = phi i32 [ %tmp27, %bb7 ], [ -1000000, %bb6 ] | ||
| %tmp9 = phi i64 [ %tmp28, %bb7 ], [ 0, %bb6 ] | ||
| %lsr3 = trunc i64 %tmp9 to i32 | ||
| %lsr.iv12 = bitcast [512 x i32]* %lsr.iv1 to i32* | ||
| %tmp11 = load i32, i32* %lsr.iv12 | ||
| %tmp12 = icmp sgt i32 %tmp11, undef | ||
| %tmp13 = trunc i64 %tmp9 to i32 | ||
| %tmp14 = select i1 %tmp12, i32 %lsr3, i32 0 | ||
| %tmp15 = select i1 %tmp12, i32 %tmp13, i32 %tmp8 | ||
| %tmp16 = load i32, i32* undef | ||
| %tmp17 = select i1 false, i32 undef, i32 %tmp14 | ||
| %tmp18 = select i1 false, i32 undef, i32 %tmp15 | ||
| %tmp19 = select i1 false, i32 %tmp16, i32 undef | ||
| %tmp20 = select i1 undef, i32 undef, i32 %tmp17 | ||
| %tmp21 = select i1 undef, i32 undef, i32 %tmp18 | ||
| %tmp22 = select i1 undef, i32 undef, i32 %tmp19 | ||
| %tmp23 = or i64 %tmp9, 3 | ||
| %tmp24 = icmp sgt i32 undef, %tmp22 | ||
| %tmp25 = trunc i64 %tmp23 to i32 | ||
| %tmp26 = select i1 %tmp24, i32 %tmp25, i32 %tmp20 | ||
| %tmp27 = select i1 %tmp24, i32 %tmp25, i32 %tmp21 | ||
| %tmp28 = add nuw nsw i64 %tmp9, 4 | ||
| %tmp29 = icmp eq i64 undef, 0 | ||
| %scevgep = getelementptr [512 x i32], [512 x i32]* %lsr.iv1, i64 0, i64 4 | ||
| %0 = bitcast i32* %scevgep to [512 x i32]* | ||
| br i1 %tmp29, label %bb30, label %bb7 | ||
|
|
||
| bb30: ; preds = %bb7 | ||
| %tmp32 = icmp sgt i32 %tmp27, -1000000 | ||
| br i1 %tmp32, label %bb33, label %bb35 | ||
|
|
||
| bb33: ; preds = %bb30 | ||
| call void @fun() | ||
| store i32 %tmp26, i32* @globvar | ||
| %tmp34 = icmp ugt i32 undef, 1 | ||
| br label %bb6 | ||
|
|
||
| bb35: ; preds = %bb30 | ||
| br i1 %tmp, label %bb37, label %bb38 | ||
|
|
||
| bb37: ; preds = %bb35 | ||
| unreachable | ||
|
|
||
| bb38: ; preds = %bb35 | ||
| unreachable | ||
| } | ||
|
|
||
| ; Function Attrs: nounwind | ||
| declare void @llvm.stackprotector(i8*, i8**) #1 | ||
|
|
||
| attributes #0 = { "target-cpu"="arch13" } | ||
| attributes #1 = { nounwind } | ||
|
|
||
| ... | ||
|
|
||
| # CHECK: selr | ||
| # CHECK-NOT: risblg | ||
|
|
||
| --- | ||
| name: fun1 | ||
| alignment: 4 | ||
| tracksRegLiveness: true | ||
| registers: | ||
| - { id: 0, class: grx32bit } | ||
| - { id: 1, class: addr64bit } | ||
| - { id: 2, class: grx32bit } | ||
| - { id: 3, class: addr64bit } | ||
| - { id: 4, class: gr32bit } | ||
| - { id: 5, class: grx32bit } | ||
| - { id: 6, class: gr64bit } | ||
| - { id: 7, class: gr64bit } | ||
| - { id: 8, class: grx32bit } | ||
| - { id: 9, class: grx32bit } | ||
| - { id: 10, class: gr64bit } | ||
| - { id: 11, class: grx32bit } | ||
| - { id: 12, class: gr64bit } | ||
| - { id: 13, class: grx32bit } | ||
| - { id: 14, class: gr32bit } | ||
| - { id: 15, class: gr32bit } | ||
| - { id: 16, class: grx32bit } | ||
| - { id: 17, class: grx32bit } | ||
| - { id: 18, class: gr32bit } | ||
| - { id: 19, class: addr64bit } | ||
| - { id: 20, class: grx32bit } | ||
| - { id: 21, class: gr32bit } | ||
| - { id: 22, class: gr64bit } | ||
| - { id: 23, class: grx32bit } | ||
| - { id: 24, class: grx32bit } | ||
| - { id: 25, class: grx32bit } | ||
| - { id: 26, class: addr64bit } | ||
| - { id: 27, class: grx32bit } | ||
| - { id: 28, class: addr64bit } | ||
| frameInfo: | ||
| hasCalls: true | ||
| body: | | ||
| bb.0.bb5: | ||
| %25:grx32bit = IMPLICIT_DEF | ||
| bb.1.bb6: | ||
| %28:addr64bit = LGHI 0 | ||
| %27:grx32bit = IIFMux 4293967296 | ||
| %26:addr64bit = IMPLICIT_DEF | ||
| bb.2.bb7: | ||
| successors: %bb.3(0x04000000), %bb.2(0x7c000000) | ||
| %14:gr32bit = LMux %26, 0, $noreg :: (load 4 from %ir.lsr.iv12) | ||
| CR %14, undef %15:gr32bit, implicit-def $cc | ||
| %16:grx32bit = COPY %28.subreg_l32 | ||
| %16:grx32bit = LOCHIMux %16, 0, 14, 12, implicit $cc | ||
| %17:grx32bit = SELRMux %27, %28.subreg_l32, 14, 2, implicit killed $cc | ||
| %18:gr32bit = LMux undef %19:addr64bit, 0, $noreg :: (load 4 from `i32* undef`) | ||
| %20:grx32bit = COPY %28.subreg_l32 | ||
| %20:grx32bit = OILMux %20, 3, implicit-def dead $cc | ||
| CR undef %21:gr32bit, %18, implicit-def $cc | ||
| %4:gr32bit = SELRMux %16, %20, 14, 2, implicit $cc | ||
| %27:grx32bit = SELRMux %17, %20, 14, 2, implicit killed $cc | ||
| %28:addr64bit = nuw nsw LA %28, 4, $noreg | ||
| %26:addr64bit = LA %26, 16, $noreg | ||
| CGHI undef %22:gr64bit, 0, implicit-def $cc | ||
| BRC 14, 6, %bb.2, implicit killed $cc | ||
| J %bb.3 | ||
| bb.3.bb30: | ||
| successors: %bb.4(0x7fffffff), %bb.5(0x00000001) | ||
| CFIMux %27, -999999, implicit-def $cc | ||
| BRC 14, 4, %bb.5, implicit killed $cc | ||
| J %bb.4 | ||
| bb.4.bb33: | ||
| ADJCALLSTACKDOWN 0, 0 | ||
| CallBRASL @fun, csr_systemz, implicit-def dead $r14d, implicit-def dead $cc | ||
| ADJCALLSTACKUP 0, 0 | ||
| STRL %4, @globvar :: (store 4 into @globvar) | ||
| CLFIMux undef %23:grx32bit, 1, implicit-def $cc | ||
| %25:grx32bit = LHIMux 0 | ||
| %25:grx32bit = LOCHIMux %25, 1, 14, 2, implicit killed $cc | ||
| J %bb.1 | ||
| bb.5.bb35: | ||
| successors: %bb.6, %bb.7 | ||
| TMLMux %25, 1, implicit-def $cc | ||
| BRC 15, 8, %bb.7, implicit killed $cc | ||
| J %bb.6 | ||
| bb.6.bb37: | ||
| successors: | ||
| bb.7.bb38: | ||
| ... |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,74 @@ | ||
| ; Test population-count instruction on arch13 | ||
| ; | ||
| ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch13 | FileCheck %s | ||
|
|
||
| declare i32 @llvm.ctpop.i32(i32 %a) | ||
| declare i64 @llvm.ctpop.i64(i64 %a) | ||
|
|
||
| define i32 @f1(i32 %a) { | ||
| ; CHECK-LABEL: f1: | ||
| ; CHECK: llgfr %r0, %r2 | ||
| ; CHECK: popcnt %r2, %r0, 8 | ||
| ; CHECK: br %r14 | ||
|
|
||
| %popcnt = call i32 @llvm.ctpop.i32(i32 %a) | ||
| ret i32 %popcnt | ||
| } | ||
|
|
||
| define i32 @f2(i32 %a) { | ||
| ; CHECK-LABEL: f2: | ||
| ; CHECK: llghr %r0, %r2 | ||
| ; CHECK: popcnt %r2, %r0, 8 | ||
| ; CHECK: br %r14 | ||
| %and = and i32 %a, 65535 | ||
| %popcnt = call i32 @llvm.ctpop.i32(i32 %and) | ||
| ret i32 %popcnt | ||
| } | ||
|
|
||
| define i32 @f3(i32 %a) { | ||
| ; CHECK-LABEL: f3: | ||
| ; CHECK: llgcr %r0, %r2 | ||
| ; CHECK: popcnt %r2, %r0, 8 | ||
| ; CHECK: br %r14 | ||
| %and = and i32 %a, 255 | ||
| %popcnt = call i32 @llvm.ctpop.i32(i32 %and) | ||
| ret i32 %popcnt | ||
| } | ||
|
|
||
| define i64 @f4(i64 %a) { | ||
| ; CHECK-LABEL: f4: | ||
| ; CHECK: popcnt %r2, %r2, 8 | ||
| ; CHECK: br %r14 | ||
| %popcnt = call i64 @llvm.ctpop.i64(i64 %a) | ||
| ret i64 %popcnt | ||
| } | ||
|
|
||
| define i64 @f5(i64 %a) { | ||
| ; CHECK-LABEL: f5: | ||
| ; CHECK: llgfr %r0, %r2 | ||
| ; CHECK: popcnt %r2, %r0, 8 | ||
| %and = and i64 %a, 4294967295 | ||
| %popcnt = call i64 @llvm.ctpop.i64(i64 %and) | ||
| ret i64 %popcnt | ||
| } | ||
|
|
||
| define i64 @f6(i64 %a) { | ||
| ; CHECK-LABEL: f6: | ||
| ; CHECK: llghr %r0, %r2 | ||
| ; CHECK: popcnt %r2, %r0, 8 | ||
| ; CHECK: br %r14 | ||
| %and = and i64 %a, 65535 | ||
| %popcnt = call i64 @llvm.ctpop.i64(i64 %and) | ||
| ret i64 %popcnt | ||
| } | ||
|
|
||
| define i64 @f7(i64 %a) { | ||
| ; CHECK-LABEL: f7: | ||
| ; CHECK: llgcr %r0, %r2 | ||
| ; CHECK: popcnt %r2, %r0, 8 | ||
| ; CHECK: br %r14 | ||
| %and = and i64 %a, 255 | ||
| %popcnt = call i64 @llvm.ctpop.i64(i64 %and) | ||
| ret i64 %popcnt | ||
| } | ||
|
|
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,126 @@ | ||
| ; Combined logical operations involving complement on arch13 | ||
| ; | ||
| ; RUN: llc -mcpu=arch13 < %s -mtriple=s390x-linux-gnu | FileCheck %s | ||
|
|
||
| ; And-with-complement 32-bit. | ||
| define i32 @f1(i32 %dummy, i32 %a, i32 %b) { | ||
| ; CHECK-LABEL: f1: | ||
| ; CHECK: ncrk %r2, %r3, %r4 | ||
| ; CHECK: br %r14 | ||
| %neg = xor i32 %b, -1 | ||
| %ret = and i32 %neg, %a | ||
| ret i32 %ret | ||
| } | ||
|
|
||
| ; And-with-complement 64-bit. | ||
| define i64 @f2(i64 %dummy, i64 %a, i64 %b) { | ||
| ; CHECK-LABEL: f2: | ||
| ; CHECK: ncgrk %r2, %r3, %r4 | ||
| ; CHECK: br %r14 | ||
| %neg = xor i64 %b, -1 | ||
| %ret = and i64 %neg, %a | ||
| ret i64 %ret | ||
| } | ||
|
|
||
| ; Or-with-complement 32-bit. | ||
| define i32 @f3(i32 %dummy, i32 %a, i32 %b) { | ||
| ; CHECK-LABEL: f3: | ||
| ; CHECK: ocrk %r2, %r3, %r4 | ||
| ; CHECK: br %r14 | ||
| %neg = xor i32 %b, -1 | ||
| %ret = or i32 %neg, %a | ||
| ret i32 %ret | ||
| } | ||
|
|
||
| ; Or-with-complement 64-bit. | ||
| define i64 @f4(i64 %dummy, i64 %a, i64 %b) { | ||
| ; CHECK-LABEL: f4: | ||
| ; CHECK: ocgrk %r2, %r3, %r4 | ||
| ; CHECK: br %r14 | ||
| %neg = xor i64 %b, -1 | ||
| %ret = or i64 %neg, %a | ||
| ret i64 %ret | ||
| } | ||
|
|
||
| ; NAND 32-bit. | ||
| define i32 @f5(i32 %dummy, i32 %a, i32 %b) { | ||
| ; CHECK-LABEL: f5: | ||
| ; CHECK: nnrk %r2, %r3, %r4 | ||
| ; CHECK: br %r14 | ||
| %tmp = and i32 %a, %b | ||
| %ret = xor i32 %tmp, -1 | ||
| ret i32 %ret | ||
| } | ||
|
|
||
| ; NAND 64-bit. | ||
| define i64 @f6(i64 %dummy, i64 %a, i64 %b) { | ||
| ; CHECK-LABEL: f6: | ||
| ; CHECK: nngrk %r2, %r3, %r4 | ||
| ; CHECK: br %r14 | ||
| %tmp = and i64 %a, %b | ||
| %ret = xor i64 %tmp, -1 | ||
| ret i64 %ret | ||
| } | ||
|
|
||
| ; NOR 32-bit. | ||
| define i32 @f7(i32 %dummy, i32 %a, i32 %b) { | ||
| ; CHECK-LABEL: f7: | ||
| ; CHECK: nork %r2, %r3, %r4 | ||
| ; CHECK: br %r14 | ||
| %tmp = or i32 %a, %b | ||
| %ret = xor i32 %tmp, -1 | ||
| ret i32 %ret | ||
| } | ||
|
|
||
| ; NOR 64-bit. | ||
| define i64 @f8(i64 %dummy, i64 %a, i64 %b) { | ||
| ; CHECK-LABEL: f8: | ||
| ; CHECK: nogrk %r2, %r3, %r4 | ||
| ; CHECK: br %r14 | ||
| %tmp = or i64 %a, %b | ||
| %ret = xor i64 %tmp, -1 | ||
| ret i64 %ret | ||
| } | ||
|
|
||
| ; NXOR 32-bit. | ||
| define i32 @f9(i32 %dummy, i32 %a, i32 %b) { | ||
| ; CHECK-LABEL: f9: | ||
| ; CHECK: nxrk %r2, %r3, %r4 | ||
| ; CHECK: br %r14 | ||
| %tmp = xor i32 %a, %b | ||
| %ret = xor i32 %tmp, -1 | ||
| ret i32 %ret | ||
| } | ||
|
|
||
| ; NXOR 64-bit. | ||
| define i64 @f10(i64 %dummy, i64 %a, i64 %b) { | ||
| ; CHECK-LABEL: f10: | ||
| ; CHECK: nxgrk %r2, %r3, %r4 | ||
| ; CHECK: br %r14 | ||
| %tmp = xor i64 %a, %b | ||
| %ret = xor i64 %tmp, -1 | ||
| ret i64 %ret | ||
| } | ||
|
|
||
| ; Or-with-complement 32-bit of a constant. | ||
| define i32 @f11(i32 %a) { | ||
| ; CHECK-LABEL: f11: | ||
| ; CHECK: lhi [[REG:%r[0-5]]], -256 | ||
| ; CHECK: ocrk %r2, [[REG]], %r2 | ||
| ; CHECK: br %r14 | ||
| %neg = xor i32 %a, -1 | ||
| %ret = or i32 %neg, -256 | ||
| ret i32 %ret | ||
| } | ||
|
|
||
| ; Or-with-complement 64-bit of a constant. | ||
| define i64 @f12(i64 %a) { | ||
| ; CHECK-LABEL: f12: | ||
| ; CHECK: lghi [[REG:%r[0-5]]], -256 | ||
| ; CHECK: ocgrk %r2, [[REG]], %r2 | ||
| ; CHECK: br %r14 | ||
| %neg = xor i64 %a, -1 | ||
| %ret = or i64 %neg, -256 | ||
| ret i64 %ret | ||
| } | ||
|
|
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,97 @@ | ||
| ; Test loads of byte-swapped vector elements. | ||
| ; | ||
| ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch13 | FileCheck %s | ||
|
|
||
| declare <8 x i16> @llvm.bswap.v8i16(<8 x i16>) | ||
| declare <4 x i32> @llvm.bswap.v4i32(<4 x i32>) | ||
| declare <2 x i64> @llvm.bswap.v2i64(<2 x i64>) | ||
|
|
||
| ; Test v8i16 loads. | ||
| define <8 x i16> @f1(<8 x i16> *%ptr) { | ||
| ; CHECK-LABEL: f1: | ||
| ; CHECK: vlbrh %v24, 0(%r2) | ||
| ; CHECK: br %r14 | ||
| %load = load <8 x i16>, <8 x i16> *%ptr | ||
| %ret = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %load) | ||
| ret <8 x i16> %ret | ||
| } | ||
|
|
||
| ; Test v4i32 loads. | ||
| define <4 x i32> @f2(<4 x i32> *%ptr) { | ||
| ; CHECK-LABEL: f2: | ||
| ; CHECK: vlbrf %v24, 0(%r2) | ||
| ; CHECK: br %r14 | ||
| %load = load <4 x i32>, <4 x i32> *%ptr | ||
| %ret = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %load) | ||
| ret <4 x i32> %ret | ||
| } | ||
|
|
||
| ; Test v2i64 loads. | ||
| define <2 x i64> @f3(<2 x i64> *%ptr) { | ||
| ; CHECK-LABEL: f3: | ||
| ; CHECK: vlbrg %v24, 0(%r2) | ||
| ; CHECK: br %r14 | ||
| %load = load <2 x i64>, <2 x i64> *%ptr | ||
| %ret = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %load) | ||
| ret <2 x i64> %ret | ||
| } | ||
|
|
||
| ; Test the highest aligned in-range offset. | ||
| define <4 x i32> @f4(<4 x i32> *%base) { | ||
| ; CHECK-LABEL: f4: | ||
| ; CHECK: vlbrf %v24, 4080(%r2) | ||
| ; CHECK: br %r14 | ||
| %ptr = getelementptr <4 x i32>, <4 x i32> *%base, i64 255 | ||
| %load = load <4 x i32>, <4 x i32> *%ptr | ||
| %ret = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %load) | ||
| ret <4 x i32> %ret | ||
| } | ||
|
|
||
| ; Test the highest unaligned in-range offset. | ||
| define <4 x i32> @f5(i8 *%base) { | ||
| ; CHECK-LABEL: f5: | ||
| ; CHECK: vlbrf %v24, 4095(%r2) | ||
| ; CHECK: br %r14 | ||
| %addr = getelementptr i8, i8 *%base, i64 4095 | ||
| %ptr = bitcast i8 *%addr to <4 x i32> * | ||
| %load = load <4 x i32>, <4 x i32> *%ptr | ||
| %ret = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %load) | ||
| ret <4 x i32> %ret | ||
| } | ||
|
|
||
| ; Test the next offset up, which requires separate address logic, | ||
| define <4 x i32> @f6(<4 x i32> *%base) { | ||
| ; CHECK-LABEL: f6: | ||
| ; CHECK: aghi %r2, 4096 | ||
| ; CHECK: vlbrf %v24, 0(%r2) | ||
| ; CHECK: br %r14 | ||
| %ptr = getelementptr <4 x i32>, <4 x i32> *%base, i64 256 | ||
| %load = load <4 x i32>, <4 x i32> *%ptr | ||
| %ret = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %load) | ||
| ret <4 x i32> %ret | ||
| } | ||
|
|
||
| ; Test negative offsets, which also require separate address logic, | ||
| define <4 x i32> @f7(<4 x i32> *%base) { | ||
| ; CHECK-LABEL: f7: | ||
| ; CHECK: aghi %r2, -16 | ||
| ; CHECK: vlbrf %v24, 0(%r2) | ||
| ; CHECK: br %r14 | ||
| %ptr = getelementptr <4 x i32>, <4 x i32> *%base, i64 -1 | ||
| %load = load <4 x i32>, <4 x i32> *%ptr | ||
| %ret = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %load) | ||
| ret <4 x i32> %ret | ||
| } | ||
|
|
||
| ; Check that indexes are allowed. | ||
| define <4 x i32> @f8(i8 *%base, i64 %index) { | ||
| ; CHECK-LABEL: f8: | ||
| ; CHECK: vlbrf %v24, 0(%r3,%r2) | ||
| ; CHECK: br %r14 | ||
| %addr = getelementptr i8, i8 *%base, i64 %index | ||
| %ptr = bitcast i8 *%addr to <4 x i32> * | ||
| %load = load <4 x i32>, <4 x i32> *%ptr | ||
| %ret = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %load) | ||
| ret <4 x i32> %ret | ||
| } | ||
|
|
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,97 @@ | ||
| ; Test stores of byte-swapped vector elements. | ||
| ; | ||
| ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch13 | FileCheck %s | ||
|
|
||
| declare <8 x i16> @llvm.bswap.v8i16(<8 x i16>) | ||
| declare <4 x i32> @llvm.bswap.v4i32(<4 x i32>) | ||
| declare <2 x i64> @llvm.bswap.v2i64(<2 x i64>) | ||
|
|
||
| ; Test v8i16 stores. | ||
| define void @f1(<8 x i16> %val, <8 x i16> *%ptr) { | ||
| ; CHECK-LABEL: f1: | ||
| ; CHECK: vstbrh %v24, 0(%r2) | ||
| ; CHECK: br %r14 | ||
| %swap = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %val) | ||
| store <8 x i16> %swap, <8 x i16> *%ptr | ||
| ret void | ||
| } | ||
|
|
||
| ; Test v4i32 stores. | ||
| define void @f2(<4 x i32> %val, <4 x i32> *%ptr) { | ||
| ; CHECK-LABEL: f2: | ||
| ; CHECK: vstbrf %v24, 0(%r2) | ||
| ; CHECK: br %r14 | ||
| %swap = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %val) | ||
| store <4 x i32> %swap, <4 x i32> *%ptr | ||
| ret void | ||
| } | ||
|
|
||
| ; Test v2i64 stores. | ||
| define void @f3(<2 x i64> %val, <2 x i64> *%ptr) { | ||
| ; CHECK-LABEL: f3: | ||
| ; CHECK: vstbrg %v24, 0(%r2) | ||
| ; CHECK: br %r14 | ||
| %swap = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %val) | ||
| store <2 x i64> %swap, <2 x i64> *%ptr | ||
| ret void | ||
| } | ||
|
|
||
| ; Test the highest aligned in-range offset. | ||
| define void @f4(<4 x i32> %val, <4 x i32> *%base) { | ||
| ; CHECK-LABEL: f4: | ||
| ; CHECK: vstbrf %v24, 4080(%r2) | ||
| ; CHECK: br %r14 | ||
| %ptr = getelementptr <4 x i32>, <4 x i32> *%base, i64 255 | ||
| %swap = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %val) | ||
| store <4 x i32> %swap, <4 x i32> *%ptr | ||
| ret void | ||
| } | ||
|
|
||
| ; Test the highest unaligned in-range offset. | ||
| define void @f5(<4 x i32> %val, i8 *%base) { | ||
| ; CHECK-LABEL: f5: | ||
| ; CHECK: vstbrf %v24, 4095(%r2) | ||
| ; CHECK: br %r14 | ||
| %addr = getelementptr i8, i8 *%base, i64 4095 | ||
| %ptr = bitcast i8 *%addr to <4 x i32> * | ||
| %swap = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %val) | ||
| store <4 x i32> %swap, <4 x i32> *%ptr, align 1 | ||
| ret void | ||
| } | ||
|
|
||
| ; Test the next offset up, which requires separate address logic, | ||
| define void @f6(<4 x i32> %val, <4 x i32> *%base) { | ||
| ; CHECK-LABEL: f6: | ||
| ; CHECK: aghi %r2, 4096 | ||
| ; CHECK: vstbrf %v24, 0(%r2) | ||
| ; CHECK: br %r14 | ||
| %ptr = getelementptr <4 x i32>, <4 x i32> *%base, i64 256 | ||
| %swap = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %val) | ||
| store <4 x i32> %swap, <4 x i32> *%ptr | ||
| ret void | ||
| } | ||
|
|
||
| ; Test negative offsets, which also require separate address logic, | ||
| define void @f7(<4 x i32> %val, <4 x i32> *%base) { | ||
| ; CHECK-LABEL: f7: | ||
| ; CHECK: aghi %r2, -16 | ||
| ; CHECK: vstbrf %v24, 0(%r2) | ||
| ; CHECK: br %r14 | ||
| %ptr = getelementptr <4 x i32>, <4 x i32> *%base, i64 -1 | ||
| %swap = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %val) | ||
| store <4 x i32> %swap, <4 x i32> *%ptr | ||
| ret void | ||
| } | ||
|
|
||
| ; Check that indexes are allowed. | ||
| define void @f8(<4 x i32> %val, i8 *%base, i64 %index) { | ||
| ; CHECK-LABEL: f8: | ||
| ; CHECK: vstbrf %v24, 0(%r3,%r2) | ||
| ; CHECK: br %r14 | ||
| %addr = getelementptr i8, i8 *%base, i64 %index | ||
| %ptr = bitcast i8 *%addr to <4 x i32> * | ||
| %swap = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %val) | ||
| store <4 x i32> %swap, <4 x i32> *%ptr, align 1 | ||
| ret void | ||
| } | ||
|
|