8 changes: 8 additions & 0 deletions llvm/lib/Target/RISCV/RISCVInstrGISel.td
Original file line number Diff line number Diff line change
Expand Up @@ -24,3 +24,11 @@ def G_FCLASS : RISCVGenericInstruction {
let hasSideEffects = false;
}
def : GINodeEquiv<G_FCLASS, riscv_fclass>;

// Pseudo equivalent to a RISCVISD::READ_VLENB.
def G_READ_VLENB : RISCVGenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins);
let hasSideEffects = false;
}
def : GINodeEquiv<G_READ_VLENB, riscv_read_vlenb>;
300 changes: 300 additions & 0 deletions llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/rvv/vscale32.mir
Original file line number Diff line number Diff line change
@@ -0,0 +1,300 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=riscv32 -mattr=+v,+m -run-pass=instruction-select \
# RUN: -simplify-mir -verify-machineinstrs %s -o - | FileCheck %s

---
name: test_1_s32
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.0.entry:
; CHECK-LABEL: name: test_1_s32
; CHECK: [[PseudoReadVLENB:%[0-9]+]]:gpr = PseudoReadVLENB
; CHECK-NEXT: [[SRLI:%[0-9]+]]:gpr = SRLI [[PseudoReadVLENB]], 3
; CHECK-NEXT: $x10 = COPY [[SRLI]]
; CHECK-NEXT: PseudoRET implicit $x10
%1:gprb(s32) = G_READ_VLENB
%2:gprb(s32) = G_CONSTANT i32 3
%0:gprb(s32) = G_LSHR %1, %2(s32)
$x10 = COPY %0(s32)
PseudoRET implicit $x10
...
---
name: test_2_s32
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.0.entry:
; CHECK-LABEL: name: test_2_s32
; CHECK: [[PseudoReadVLENB:%[0-9]+]]:gpr = PseudoReadVLENB
; CHECK-NEXT: [[SRLI:%[0-9]+]]:gpr = SRLI [[PseudoReadVLENB]], 2
; CHECK-NEXT: $x10 = COPY [[SRLI]]
; CHECK-NEXT: PseudoRET implicit $x10
%1:gprb(s32) = G_READ_VLENB
%2:gprb(s32) = G_CONSTANT i32 2
%0:gprb(s32) = G_LSHR %1, %2(s32)
$x10 = COPY %0(s32)
PseudoRET implicit $x10
...
---
name: test_3_s32
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.0.entry:
; CHECK-LABEL: name: test_3_s32
; CHECK: [[PseudoReadVLENB:%[0-9]+]]:gpr = PseudoReadVLENB
; CHECK-NEXT: [[SRLI:%[0-9]+]]:gpr = SRLI [[PseudoReadVLENB]], 3
; CHECK-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI $x0, 3
; CHECK-NEXT: [[MUL:%[0-9]+]]:gpr = MUL [[SRLI]], [[ADDI]]
; CHECK-NEXT: $x10 = COPY [[MUL]]
; CHECK-NEXT: PseudoRET implicit $x10
%1:gprb(s32) = G_READ_VLENB
%2:gprb(s32) = G_CONSTANT i32 3
%3:gprb(s32) = G_LSHR %1, %2(s32)
%4:gprb(s32) = G_CONSTANT i32 3
%0:gprb(s32) = G_MUL %3, %4
$x10 = COPY %0(s32)
PseudoRET implicit $x10
...
---
name: test_4_s32
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.0.entry:
; CHECK-LABEL: name: test_4_s32
; CHECK: [[PseudoReadVLENB:%[0-9]+]]:gpr = PseudoReadVLENB
; CHECK-NEXT: [[SRLI:%[0-9]+]]:gpr = SRLI [[PseudoReadVLENB]], 1
; CHECK-NEXT: $x10 = COPY [[SRLI]]
; CHECK-NEXT: PseudoRET implicit $x10
%1:gprb(s32) = G_READ_VLENB
%2:gprb(s32) = G_CONSTANT i32 1
%0:gprb(s32) = G_LSHR %1, %2(s32)
$x10 = COPY %0(s32)
PseudoRET implicit $x10
...
---
name: test_8_s32
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.0.entry:
; CHECK-LABEL: name: test_8_s32
; CHECK: [[PseudoReadVLENB:%[0-9]+]]:gpr = PseudoReadVLENB
; CHECK-NEXT: $x10 = COPY [[PseudoReadVLENB]]
; CHECK-NEXT: PseudoRET implicit $x10
%0:gprb(s32) = G_READ_VLENB
$x10 = COPY %0(s32)
PseudoRET implicit $x10
...
---
name: test_16_s32
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.0.entry:
; CHECK-LABEL: name: test_16_s32
; CHECK: [[PseudoReadVLENB:%[0-9]+]]:gpr = PseudoReadVLENB
; CHECK-NEXT: [[SLLI:%[0-9]+]]:gpr = SLLI [[PseudoReadVLENB]], 1
; CHECK-NEXT: $x10 = COPY [[SLLI]]
; CHECK-NEXT: PseudoRET implicit $x10
%1:gprb(s32) = G_READ_VLENB
%2:gprb(s32) = G_CONSTANT i32 1
%0:gprb(s32) = G_SHL %1, %2(s32)
$x10 = COPY %0(s32)
PseudoRET implicit $x10
...
---
name: test_40_s32
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.0.entry:
; CHECK-LABEL: name: test_40_s32
; CHECK: [[PseudoReadVLENB:%[0-9]+]]:gpr = PseudoReadVLENB
; CHECK-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI $x0, 5
; CHECK-NEXT: [[MUL:%[0-9]+]]:gpr = MUL [[PseudoReadVLENB]], [[ADDI]]
; CHECK-NEXT: $x10 = COPY [[MUL]]
; CHECK-NEXT: PseudoRET implicit $x10
%1:gprb(s32) = G_READ_VLENB
%2:gprb(s32) = G_CONSTANT i32 5
%0:gprb(s32) = G_MUL %1, %2
$x10 = COPY %0(s32)
PseudoRET implicit $x10
...
---
name: test_1_s64
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.0.entry:
; CHECK-LABEL: name: test_1_s64
; CHECK: [[PseudoReadVLENB:%[0-9]+]]:gpr = PseudoReadVLENB
; CHECK-NEXT: [[SRLI:%[0-9]+]]:gpr = SRLI [[PseudoReadVLENB]], 3
; CHECK-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI $x0, 1
; CHECK-NEXT: [[MUL:%[0-9]+]]:gpr = MUL [[SRLI]], [[ADDI]]
; CHECK-NEXT: $x10 = COPY [[MUL]]
; CHECK-NEXT: PseudoRET implicit $x10
%17:gprb(s32) = G_READ_VLENB
%18:gprb(s32) = G_CONSTANT i32 3
%2:gprb(s32) = G_LSHR %17, %18(s32)
%15:gprb(s32) = G_CONSTANT i32 1
%9:gprb(s32) = G_MUL %2, %15
$x10 = COPY %9(s32)
PseudoRET implicit $x10
...
---
name: test_2_s64
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.0.entry:
; CHECK-LABEL: name: test_2_s64
; CHECK: [[PseudoReadVLENB:%[0-9]+]]:gpr = PseudoReadVLENB
; CHECK-NEXT: [[SRLI:%[0-9]+]]:gpr = SRLI [[PseudoReadVLENB]], 3
; CHECK-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI $x0, 2
; CHECK-NEXT: [[MUL:%[0-9]+]]:gpr = MUL [[SRLI]], [[ADDI]]
; CHECK-NEXT: $x10 = COPY [[MUL]]
; CHECK-NEXT: PseudoRET implicit $x10
%17:gprb(s32) = G_READ_VLENB
%18:gprb(s32) = G_CONSTANT i32 3
%2:gprb(s32) = G_LSHR %17, %18(s32)
%15:gprb(s32) = G_CONSTANT i32 2
%9:gprb(s32) = G_MUL %2, %15
$x10 = COPY %9(s32)
PseudoRET implicit $x10
...
---
name: test_3_s64
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.0.entry:
; CHECK-LABEL: name: test_3_s64
; CHECK: [[PseudoReadVLENB:%[0-9]+]]:gpr = PseudoReadVLENB
; CHECK-NEXT: [[SRLI:%[0-9]+]]:gpr = SRLI [[PseudoReadVLENB]], 3
; CHECK-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI $x0, 3
; CHECK-NEXT: [[MUL:%[0-9]+]]:gpr = MUL [[SRLI]], [[ADDI]]
; CHECK-NEXT: $x10 = COPY [[MUL]]
; CHECK-NEXT: PseudoRET implicit $x10
%17:gprb(s32) = G_READ_VLENB
%18:gprb(s32) = G_CONSTANT i32 3
%2:gprb(s32) = G_LSHR %17, %18(s32)
%15:gprb(s32) = G_CONSTANT i32 3
%9:gprb(s32) = G_MUL %2, %15
$x10 = COPY %9(s32)
PseudoRET implicit $x10
...
---
name: test_4_s64
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.0.entry:
; CHECK-LABEL: name: test_4_s64
; CHECK: [[PseudoReadVLENB:%[0-9]+]]:gpr = PseudoReadVLENB
; CHECK-NEXT: [[SRLI:%[0-9]+]]:gpr = SRLI [[PseudoReadVLENB]], 3
; CHECK-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI $x0, 4
; CHECK-NEXT: [[MUL:%[0-9]+]]:gpr = MUL [[SRLI]], [[ADDI]]
; CHECK-NEXT: $x10 = COPY [[MUL]]
; CHECK-NEXT: PseudoRET implicit $x10
%17:gprb(s32) = G_READ_VLENB
%18:gprb(s32) = G_CONSTANT i32 3
%2:gprb(s32) = G_LSHR %17, %18(s32)
%15:gprb(s32) = G_CONSTANT i32 4
%9:gprb(s32) = G_MUL %2, %15
$x10 = COPY %9(s32)
PseudoRET implicit $x10
...
---
name: test_8_s64
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.0.entry:
; CHECK-LABEL: name: test_8_s64
; CHECK: [[PseudoReadVLENB:%[0-9]+]]:gpr = PseudoReadVLENB
; CHECK-NEXT: [[SRLI:%[0-9]+]]:gpr = SRLI [[PseudoReadVLENB]], 3
; CHECK-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI $x0, 8
; CHECK-NEXT: [[MUL:%[0-9]+]]:gpr = MUL [[SRLI]], [[ADDI]]
; CHECK-NEXT: $x10 = COPY [[MUL]]
; CHECK-NEXT: PseudoRET implicit $x10
%17:gprb(s32) = G_READ_VLENB
%18:gprb(s32) = G_CONSTANT i32 3
%2:gprb(s32) = G_LSHR %17, %18(s32)
%15:gprb(s32) = G_CONSTANT i32 8
%9:gprb(s32) = G_MUL %2, %15
$x10 = COPY %9(s32)
PseudoRET implicit $x10
...
---
name: test_16_s64
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.0.entry:
; CHECK-LABEL: name: test_16_s64
; CHECK: [[PseudoReadVLENB:%[0-9]+]]:gpr = PseudoReadVLENB
; CHECK-NEXT: [[SRLI:%[0-9]+]]:gpr = SRLI [[PseudoReadVLENB]], 3
; CHECK-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI $x0, 16
; CHECK-NEXT: [[MUL:%[0-9]+]]:gpr = MUL [[SRLI]], [[ADDI]]
; CHECK-NEXT: $x10 = COPY [[MUL]]
; CHECK-NEXT: PseudoRET implicit $x10
%17:gprb(s32) = G_READ_VLENB
%18:gprb(s32) = G_CONSTANT i32 3
%2:gprb(s32) = G_LSHR %17, %18(s32)
%15:gprb(s32) = G_CONSTANT i32 16
%9:gprb(s32) = G_MUL %2, %15
$x10 = COPY %9(s32)
PseudoRET implicit $x10
...
---
name: test_40_s64
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.0.entry:
; CHECK-LABEL: name: test_40_s64
; CHECK: [[PseudoReadVLENB:%[0-9]+]]:gpr = PseudoReadVLENB
; CHECK-NEXT: [[SRLI:%[0-9]+]]:gpr = SRLI [[PseudoReadVLENB]], 3
; CHECK-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI $x0, 40
; CHECK-NEXT: [[MUL:%[0-9]+]]:gpr = MUL [[SRLI]], [[ADDI]]
; CHECK-NEXT: $x10 = COPY [[MUL]]
; CHECK-NEXT: PseudoRET implicit $x10
%17:gprb(s32) = G_READ_VLENB
%18:gprb(s32) = G_CONSTANT i32 3
%2:gprb(s32) = G_LSHR %17, %18(s32)
%15:gprb(s32) = G_CONSTANT i32 40
%9:gprb(s32) = G_MUL %2, %15
$x10 = COPY %9(s32)
PseudoRET implicit $x10
...
139 changes: 139 additions & 0 deletions llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/rvv/vscale64.mir
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=riscv64 -mattr=+v,+m -run-pass=instruction-select \
# RUN: -simplify-mir -verify-machineinstrs %s -o - | FileCheck %s

---
name: test_1
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.0.entry:
; CHECK-LABEL: name: test_1
; CHECK: [[PseudoReadVLENB:%[0-9]+]]:gpr = PseudoReadVLENB
; CHECK-NEXT: [[SRLI:%[0-9]+]]:gpr = SRLI [[PseudoReadVLENB]], 3
; CHECK-NEXT: $x10 = COPY [[SRLI]]
; CHECK-NEXT: PseudoRET implicit $x10
%0:gprb(s64) = G_READ_VLENB
%1:gprb(s64) = G_CONSTANT i64 3
%2:gprb(s64) = G_LSHR %0, %1(s64)
$x10 = COPY %2(s64)
PseudoRET implicit $x10
...
---
name: test_2
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.0.entry:
; CHECK-LABEL: name: test_2
; CHECK: [[PseudoReadVLENB:%[0-9]+]]:gpr = PseudoReadVLENB
; CHECK-NEXT: [[SRLI:%[0-9]+]]:gpr = SRLI [[PseudoReadVLENB]], 2
; CHECK-NEXT: $x10 = COPY [[SRLI]]
; CHECK-NEXT: PseudoRET implicit $x10
%0:gprb(s64) = G_READ_VLENB
%1:gprb(s64) = G_CONSTANT i64 2
%2:gprb(s64) = G_LSHR %0, %1(s64)
$x10 = COPY %2(s64)
PseudoRET implicit $x10
...
---
name: test_3
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.0.entry:
; CHECK-LABEL: name: test_3
; CHECK: [[PseudoReadVLENB:%[0-9]+]]:gpr = PseudoReadVLENB
; CHECK-NEXT: [[SRLI:%[0-9]+]]:gpr = SRLI [[PseudoReadVLENB]], 3
; CHECK-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI $x0, 3
; CHECK-NEXT: [[MUL:%[0-9]+]]:gpr = MUL [[SRLI]], [[ADDI]]
; CHECK-NEXT: $x10 = COPY [[MUL]]
; CHECK-NEXT: PseudoRET implicit $x10
%0:gprb(s64) = G_READ_VLENB
%1:gprb(s64) = G_CONSTANT i64 3
%2:gprb(s64) = G_LSHR %0, %1(s64)
%3:gprb(s64) = G_CONSTANT i64 3
%4:gprb(s64) = G_MUL %2, %3
$x10 = COPY %4(s64)
PseudoRET implicit $x10
...
---
name: test_4
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.0.entry:
; CHECK-LABEL: name: test_4
; CHECK: [[PseudoReadVLENB:%[0-9]+]]:gpr = PseudoReadVLENB
; CHECK-NEXT: [[SRLI:%[0-9]+]]:gpr = SRLI [[PseudoReadVLENB]], 1
; CHECK-NEXT: $x10 = COPY [[SRLI]]
; CHECK-NEXT: PseudoRET implicit $x10
%0:gprb(s64) = G_READ_VLENB
%1:gprb(s64) = G_CONSTANT i64 1
%2:gprb(s64) = G_LSHR %0, %1(s64)
$x10 = COPY %2(s64)
PseudoRET implicit $x10
...
---
name: test_8
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.0.entry:
; CHECK-LABEL: name: test_8
; CHECK: [[PseudoReadVLENB:%[0-9]+]]:gpr = PseudoReadVLENB
; CHECK-NEXT: $x10 = COPY [[PseudoReadVLENB]]
; CHECK-NEXT: PseudoRET implicit $x10
%0:gprb(s64) = G_READ_VLENB
$x10 = COPY %0(s64)
PseudoRET implicit $x10
...
---
name: test_16
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.0.entry:
; CHECK-LABEL: name: test_16
; CHECK: [[PseudoReadVLENB:%[0-9]+]]:gpr = PseudoReadVLENB
; CHECK-NEXT: [[SLLI:%[0-9]+]]:gpr = SLLI [[PseudoReadVLENB]], 1
; CHECK-NEXT: $x10 = COPY [[SLLI]]
; CHECK-NEXT: PseudoRET implicit $x10
%0:gprb(s64) = G_READ_VLENB
%1:gprb(s64) = G_CONSTANT i64 1
%2:gprb(s64) = G_SHL %0, %1(s64)
$x10 = COPY %2(s64)
PseudoRET implicit $x10
...
---
name: test_40
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.0.entry:
; CHECK-LABEL: name: test_40
; CHECK: [[PseudoReadVLENB:%[0-9]+]]:gpr = PseudoReadVLENB
; CHECK-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI $x0, 5
; CHECK-NEXT: [[MUL:%[0-9]+]]:gpr = MUL [[PseudoReadVLENB]], [[ADDI]]
; CHECK-NEXT: $x10 = COPY [[MUL]]
; CHECK-NEXT: PseudoRET implicit $x10
%0:gprb(s64) = G_READ_VLENB
%1:gprb(s64) = G_CONSTANT i64 5
%2:gprb(s64) = G_MUL %0, %1
$x10 = COPY %2(s64)
PseudoRET implicit $x10
...
Original file line number Diff line number Diff line change
@@ -0,0 +1,228 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=riscv32 -mattr=+v,+m -run-pass=legalizer %s -o - | FileCheck %s

---
name: test_1_s32
body: |
bb.0.entry:
; CHECK-LABEL: name: test_1_s32
; CHECK: [[READ_VLENB:%[0-9]+]]:_(s32) = G_READ_VLENB
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[READ_VLENB]], [[C]](s32)
; CHECK-NEXT: $x10 = COPY [[LSHR]](s32)
; CHECK-NEXT: PseudoRET implicit $x10
%0:_(s32) = G_VSCALE i32 1
$x10 = COPY %0
PseudoRET implicit $x10
...
---
name: test_2_s32
body: |
bb.0.entry:
; CHECK-LABEL: name: test_2_s32
; CHECK: [[READ_VLENB:%[0-9]+]]:_(s32) = G_READ_VLENB
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[READ_VLENB]], [[C]](s32)
; CHECK-NEXT: $x10 = COPY [[LSHR]](s32)
; CHECK-NEXT: PseudoRET implicit $x10
%0:_(s32) = G_VSCALE i32 2
$x10 = COPY %0
PseudoRET implicit $x10
...
---
name: test_3_s32
body: |
bb.0.entry:
; CHECK-LABEL: name: test_3_s32
; CHECK: [[READ_VLENB:%[0-9]+]]:_(s32) = G_READ_VLENB
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[READ_VLENB]], [[C]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
; CHECK-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[LSHR]], [[C1]]
; CHECK-NEXT: $x10 = COPY [[MUL]](s32)
; CHECK-NEXT: PseudoRET implicit $x10
%0:_(s32) = G_VSCALE i32 3
$x10 = COPY %0
PseudoRET implicit $x10
...
---
name: test_4_s32
body: |
bb.0.entry:
; CHECK-LABEL: name: test_4_s32
; CHECK: [[READ_VLENB:%[0-9]+]]:_(s32) = G_READ_VLENB
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[READ_VLENB]], [[C]](s32)
; CHECK-NEXT: $x10 = COPY [[LSHR]](s32)
; CHECK-NEXT: PseudoRET implicit $x10
%0:_(s32) = G_VSCALE i32 4
$x10 = COPY %0
PseudoRET implicit $x10
...
---
name: test_8_s32
body: |
bb.0.entry:
; CHECK-LABEL: name: test_8_s32
; CHECK: [[READ_VLENB:%[0-9]+]]:_(s32) = G_READ_VLENB
; CHECK-NEXT: $x10 = COPY [[READ_VLENB]](s32)
; CHECK-NEXT: PseudoRET implicit $x10
%0:_(s32) = G_VSCALE i32 8
$x10 = COPY %0
PseudoRET implicit $x10
...
---
name: test_16_s32
body: |
bb.0.entry:
; CHECK-LABEL: name: test_16_s32
; CHECK: [[READ_VLENB:%[0-9]+]]:_(s32) = G_READ_VLENB
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[READ_VLENB]], [[C]](s32)
; CHECK-NEXT: $x10 = COPY [[SHL]](s32)
; CHECK-NEXT: PseudoRET implicit $x10
%0:_(s32) = G_VSCALE i32 16
$x10 = COPY %0
PseudoRET implicit $x10
...
---
name: test_40_s32
body: |
bb.0.entry:
; CHECK-LABEL: name: test_40_s32
; CHECK: [[READ_VLENB:%[0-9]+]]:_(s32) = G_READ_VLENB
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
; CHECK-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[READ_VLENB]], [[C]]
; CHECK-NEXT: $x10 = COPY [[MUL]](s32)
; CHECK-NEXT: PseudoRET implicit $x10
%0:_(s32) = G_VSCALE i32 40
$x10 = COPY %0
PseudoRET implicit $x10
...

---
name: test_1_s64
body: |
bb.0.entry:
; CHECK-LABEL: name: test_1_s64
; CHECK: [[READ_VLENB:%[0-9]+]]:_(s32) = G_READ_VLENB
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[READ_VLENB]], [[C]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
; CHECK-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[LSHR]], [[C1]]
; CHECK-NEXT: $x10 = COPY [[MUL]](s32)
; CHECK-NEXT: PseudoRET implicit $x10
%0:_(s64) = G_VSCALE i64 1
%1:_(s32) = G_TRUNC %0
$x10 = COPY %1
PseudoRET implicit $x10
...
---
name: test_2_s64
body: |
bb.0.entry:
; CHECK-LABEL: name: test_2_s64
; CHECK: [[READ_VLENB:%[0-9]+]]:_(s32) = G_READ_VLENB
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[READ_VLENB]], [[C]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
; CHECK-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[LSHR]], [[C1]]
; CHECK-NEXT: $x10 = COPY [[MUL]](s32)
; CHECK-NEXT: PseudoRET implicit $x10
%0:_(s64) = G_VSCALE i64 2
%1:_(s32) = G_TRUNC %0
$x10 = COPY %1
PseudoRET implicit $x10
...
---
name: test_3_s64
body: |
bb.0.entry:
; CHECK-LABEL: name: test_3_s64
; CHECK: [[READ_VLENB:%[0-9]+]]:_(s32) = G_READ_VLENB
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[READ_VLENB]], [[C]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
; CHECK-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[LSHR]], [[C1]]
; CHECK-NEXT: $x10 = COPY [[MUL]](s32)
; CHECK-NEXT: PseudoRET implicit $x10
%0:_(s64) = G_VSCALE i64 3
%1:_(s32) = G_TRUNC %0
$x10 = COPY %1
PseudoRET implicit $x10
...
---
name: test_4_s64
body: |
bb.0.entry:
; CHECK-LABEL: name: test_4_s64
; CHECK: [[READ_VLENB:%[0-9]+]]:_(s32) = G_READ_VLENB
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[READ_VLENB]], [[C]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
; CHECK-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[LSHR]], [[C1]]
; CHECK-NEXT: $x10 = COPY [[MUL]](s32)
; CHECK-NEXT: PseudoRET implicit $x10
%0:_(s64) = G_VSCALE i64 4
%1:_(s32) = G_TRUNC %0
$x10 = COPY %1
PseudoRET implicit $x10
...
---
name: test_8_s64
body: |
bb.0.entry:
; CHECK-LABEL: name: test_8_s64
; CHECK: [[READ_VLENB:%[0-9]+]]:_(s32) = G_READ_VLENB
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[READ_VLENB]], [[C]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
; CHECK-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[LSHR]], [[C1]]
; CHECK-NEXT: $x10 = COPY [[MUL]](s32)
; CHECK-NEXT: PseudoRET implicit $x10
%0:_(s64) = G_VSCALE i64 8
%1:_(s32) = G_TRUNC %0
$x10 = COPY %1
PseudoRET implicit $x10
...
---
name: test_16_s64
body: |
bb.0.entry:
; CHECK-LABEL: name: test_16_s64
; CHECK: [[READ_VLENB:%[0-9]+]]:_(s32) = G_READ_VLENB
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[READ_VLENB]], [[C]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; CHECK-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[LSHR]], [[C1]]
; CHECK-NEXT: $x10 = COPY [[MUL]](s32)
; CHECK-NEXT: PseudoRET implicit $x10
%0:_(s64) = G_VSCALE i64 16
%1:_(s32) = G_TRUNC %0
$x10 = COPY %1
PseudoRET implicit $x10
...
---
name: test_40_s64
body: |
bb.0.entry:
; CHECK-LABEL: name: test_40_s64
; CHECK: [[READ_VLENB:%[0-9]+]]:_(s32) = G_READ_VLENB
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[READ_VLENB]], [[C]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 40
; CHECK-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[LSHR]], [[C1]]
; CHECK-NEXT: $x10 = COPY [[MUL]](s32)
; CHECK-NEXT: PseudoRET implicit $x10
%0:_(s64) = G_VSCALE i64 40
%1:_(s32) = G_TRUNC %0
$x10 = COPY %1
PseudoRET implicit $x10
...
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=riscv64 -mattr=+v,+m -run-pass=legalizer %s -o - | FileCheck %s

---
name: test_1
body: |
bb.0.entry:
; CHECK-LABEL: name: test_1
; CHECK: [[READ_VLENB:%[0-9]+]]:_(s64) = G_READ_VLENB
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[READ_VLENB]], [[C]](s64)
; CHECK-NEXT: $x10 = COPY [[LSHR]](s64)
; CHECK-NEXT: PseudoRET implicit $x10
%0:_(s64) = G_VSCALE i64 1
$x10 = COPY %0
PseudoRET implicit $x10
...
---
name: test_2
body: |
bb.0.entry:
; CHECK-LABEL: name: test_2
; CHECK: [[READ_VLENB:%[0-9]+]]:_(s64) = G_READ_VLENB
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[READ_VLENB]], [[C]](s64)
; CHECK-NEXT: $x10 = COPY [[LSHR]](s64)
; CHECK-NEXT: PseudoRET implicit $x10
%0:_(s64) = G_VSCALE i64 2
$x10 = COPY %0
PseudoRET implicit $x10
...
---
name: test_3
body: |
bb.0.entry:
; CHECK-LABEL: name: test_3
; CHECK: [[READ_VLENB:%[0-9]+]]:_(s64) = G_READ_VLENB
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[READ_VLENB]], [[C]](s64)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
; CHECK-NEXT: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[LSHR]], [[C1]]
; CHECK-NEXT: $x10 = COPY [[MUL]](s64)
; CHECK-NEXT: PseudoRET implicit $x10
%0:_(s64) = G_VSCALE i64 3
$x10 = COPY %0
PseudoRET implicit $x10
...
---
name: test_4
body: |
bb.0.entry:
; CHECK-LABEL: name: test_4
; CHECK: [[READ_VLENB:%[0-9]+]]:_(s64) = G_READ_VLENB
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[READ_VLENB]], [[C]](s64)
; CHECK-NEXT: $x10 = COPY [[LSHR]](s64)
; CHECK-NEXT: PseudoRET implicit $x10
%0:_(s64) = G_VSCALE i64 4
$x10 = COPY %0
PseudoRET implicit $x10
...
---
name: test_8
body: |
bb.0.entry:
; CHECK-LABEL: name: test_8
; CHECK: [[READ_VLENB:%[0-9]+]]:_(s64) = G_READ_VLENB
; CHECK-NEXT: $x10 = COPY [[READ_VLENB]](s64)
; CHECK-NEXT: PseudoRET implicit $x10
%0:_(s64) = G_VSCALE i64 8
$x10 = COPY %0
PseudoRET implicit $x10
...
---
name: test_16
body: |
bb.0.entry:
; CHECK-LABEL: name: test_16
; CHECK: [[READ_VLENB:%[0-9]+]]:_(s64) = G_READ_VLENB
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[READ_VLENB]], [[C]](s64)
; CHECK-NEXT: $x10 = COPY [[SHL]](s64)
; CHECK-NEXT: PseudoRET implicit $x10
%0:_(s64) = G_VSCALE i64 16
$x10 = COPY %0
PseudoRET implicit $x10
...
---
name: test_40
body: |
bb.0.entry:
; CHECK-LABEL: name: test_40
; CHECK: [[READ_VLENB:%[0-9]+]]:_(s64) = G_READ_VLENB
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
; CHECK-NEXT: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[READ_VLENB]], [[C]]
; CHECK-NEXT: $x10 = COPY [[MUL]](s64)
; CHECK-NEXT: PseudoRET implicit $x10
%0:_(s64) = G_VSCALE i64 40
$x10 = COPY %0
PseudoRET implicit $x10
...


Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4
# RUN: llc -mtriple=riscv32 -mattr=+m,+v -run-pass=regbankselect \
# RUN: -disable-gisel-legality-check -simplify-mir -verify-machineinstrs %s \
# RUN: -o - | FileCheck %s

---
name: test_s32
legalized: true
tracksRegLiveness: true
body: |
bb.0.entry:
; CHECK-LABEL: name: test_s32
; CHECK: [[READ_VLENB:%[0-9]+]]:gprb(s32) = G_READ_VLENB
; CHECK-NEXT: [[C:%[0-9]+]]:gprb(s32) = G_CONSTANT i32 3
; CHECK-NEXT: [[LSHR:%[0-9]+]]:gprb(s32) = G_LSHR [[READ_VLENB]], [[C]](s32)
; CHECK-NEXT: $x10 = COPY [[LSHR]](s32)
; CHECK-NEXT: PseudoRET implicit $x10
%1:_(s32) = G_READ_VLENB
%2:_(s32) = G_CONSTANT i32 3
%0:_(s32) = G_LSHR %1, %2(s32)
$x10 = COPY %0(s32)
PseudoRET implicit $x10
...
---
name: test_s64
legalized: true
tracksRegLiveness: true
body: |
bb.0.entry:
; CHECK-LABEL: name: test_s64
; CHECK: [[READ_VLENB:%[0-9]+]]:gprb(s32) = G_READ_VLENB
; CHECK-NEXT: [[C:%[0-9]+]]:gprb(s32) = G_CONSTANT i32 3
; CHECK-NEXT: [[LSHR:%[0-9]+]]:gprb(s32) = G_LSHR [[READ_VLENB]], [[C]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:gprb(s32) = G_CONSTANT i32 1
; CHECK-NEXT: [[MUL:%[0-9]+]]:gprb(s32) = G_MUL [[LSHR]], [[C1]]
; CHECK-NEXT: $x10 = COPY [[MUL]](s32)
; CHECK-NEXT: PseudoRET implicit $x10
%17:_(s32) = G_READ_VLENB
%18:_(s32) = G_CONSTANT i32 3
%2:_(s32) = G_LSHR %17, %18(s32)
%15:_(s32) = G_CONSTANT i32 1
%9:_(s32) = G_MUL %2, %15
$x10 = COPY %9(s32)
PseudoRET implicit $x10
...

Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=riscv64 -mattr=+m,+v -run-pass=regbankselect \
# RUN: -disable-gisel-legality-check -simplify-mir -verify-machineinstrs %s \
# RUN: -o - | FileCheck %s

---
name: test
legalized: true
tracksRegLiveness: true
body: |
bb.0.entry:
; CHECK-LABEL: name: test
; CHECK: [[READ_VLENB:%[0-9]+]]:gprb(s64) = G_READ_VLENB
; CHECK-NEXT: [[C:%[0-9]+]]:gprb(s64) = G_CONSTANT i64 3
; CHECK-NEXT: [[LSHR:%[0-9]+]]:gprb(s64) = G_LSHR [[READ_VLENB]], [[C]](s64)
; CHECK-NEXT: $x10 = COPY [[LSHR]](s64)
; CHECK-NEXT: PseudoRET implicit $x10
%1:_(s64) = G_READ_VLENB
%2:_(s64) = G_CONSTANT i64 3
%0:_(s64) = G_LSHR %1, %2(s64)
$x10 = COPY %0(s64)
PseudoRET implicit $x10
...

4 changes: 4 additions & 0 deletions mlir/include/mlir/Interfaces/ValueBoundsOpInterface.h
Original file line number Diff line number Diff line change
Expand Up @@ -259,6 +259,10 @@ class ValueBoundsConstraintSet
/// Return an expression that represents a constant.
AffineExpr getExpr(int64_t constant);

/// Debugging only: Dump the constraint set and the column-to-value/dim
/// mapping to llvm::errs.
void dump() const;

protected:
/// Dimension identifier to indicate a value is index-typed. This is used for
/// internal data structures/API only.
Expand Down
4 changes: 4 additions & 0 deletions mlir/lib/Dialect/Tensor/IR/TensorOps.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -276,6 +276,10 @@ bool mlir::tensor::preservesStaticInformation(Type source, Type target) {
if (sourceType.getRank() != targetType.getRank())
return false;

// Requires same encoding.
if (sourceType.getEncoding() != targetType.getEncoding())
return false;

// If cast is towards more static sizes along any dimension, don't fold.
for (auto t : llvm::zip(sourceType.getShape(), targetType.getShape())) {
if (!ShapedType::isDynamic(std::get<0>(t)) &&
Expand Down
29 changes: 29 additions & 0 deletions mlir/lib/Interfaces/ValueBoundsOpInterface.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -715,6 +715,35 @@ ValueBoundsConstraintSet::areEquivalentSlices(MLIRContext *ctx,
return true;
}

void ValueBoundsConstraintSet::dump() const {
llvm::errs() << "==========\nColumns:\n";
llvm::errs() << "(column\tdim\tvalue)\n";
for (auto [index, valueDim] : llvm::enumerate(positionToValueDim)) {
llvm::errs() << " " << index << "\t";
if (valueDim) {
if (valueDim->second == kIndexValue) {
llvm::errs() << "n/a\t";
} else {
llvm::errs() << valueDim->second << "\t";
}
llvm::errs() << getOwnerOfValue(valueDim->first)->getName() << " ";
if (OpResult result = dyn_cast<OpResult>(valueDim->first)) {
llvm::errs() << "(result " << result.getResultNumber() << ")";
} else {
llvm::errs() << "(bbarg "
<< cast<BlockArgument>(valueDim->first).getArgNumber()
<< ")";
}
llvm::errs() << "\n";
} else {
llvm::errs() << "n/a\tn/a\n";
}
}
llvm::errs() << "\nConstraint set:\n";
cstr.dump();
llvm::errs() << "==========\n";
}

ValueBoundsConstraintSet::BoundBuilder &
ValueBoundsConstraintSet::BoundBuilder::operator[](int64_t dim) {
assert(!this->dim.has_value() && "dim was already set");
Expand Down
47 changes: 47 additions & 0 deletions mlir/test/Dialect/SparseTensor/no_fold_into_consumer.mlir
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
// RUN: mlir-opt %s --canonicalize --pre-sparsification-rewrite | FileCheck %s

#map = affine_map<(d0, d1, d2) -> (d0, d1, d2)>

#sparse = #sparse_tensor.encoding<{
map = (d0, d1, d2) ->
(d0 : compressed(nonunique),
d1 : singleton(nonunique, soa),
d2 : singleton(soa)),
posWidth = 64,
crdWidth = 64
}>


module {
//
// This IR should not end up in an infinite loop trying to fold
// the linalg producer into the tensor cast consumer (even though
// static sizes can fold, the different encodings cannot). The
// cast was sloppy to begin with (but it has been observed by
// external sources) and can be easily repaired by the sparsifier.
//
// CHECK-LABEL: func @avoid_fold
// CHECK: arith.constant
// CHECK: tensor.empty()
// CHECK: linalg.generic
// CHECK: sparse_tensor.convert
// CHECK: return
//
func.func @avoid_fold(%0: tensor<10x20x30xf64, #sparse>) -> tensor<10x20x30xf64, #sparse> {
%1 = tensor.empty() : tensor<10x20x30xf64>
%2 = linalg.generic { indexing_maps = [#map, #map],
iterator_types = ["parallel", "parallel", "parallel"]
}
ins (%0 : tensor<10x20x30xf64, #sparse>)
outs(%1 : tensor<10x20x30xf64>) {
^bb0(%in: f64, %out: f64):
%cst = arith.constant 0.000000e+00 : f64
%4 = arith.cmpf ugt, %in, %cst : f64
%5 = arith.select %4, %in, %cst : f64
linalg.yield %5 : f64
} -> tensor<10x20x30xf64>
%cast = tensor.cast %2 : tensor<10x20x30xf64> to tensor<10x20x30xf64, #sparse>
return %cast : tensor<10x20x30xf64, #sparse>
}
}

11 changes: 3 additions & 8 deletions openmp/runtime/src/kmp_collapse.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1517,16 +1517,11 @@ void kmp_handle_upper_triangle_matrix(
kmp_uint64 iter_with_current = iter_before_current + iter_current;
// calculate the outer loop lower bound (lbo) which is the max outer iv value
// that gives the number of iterations that is equal or just below the total
// number of iterations executed by the previous threads, for less_than
// (1-based) inner loops (inner_ub0 == -1) it will be i.e.
// lbo*(lbo-1)/2<=iter_before_current => lbo^2-lbo-2*iter_before_current<=0
// for less_than_equal (0-based) inner loops (inner_ub == 0) it will be:
// i.e. lbo*(lbo+1)/2<=iter_before_current =>
// lbo^2+lbo-2*iter_before_current<=0 both cases can be handled similarily
// using a parameter to control the equatio sign
// number of iterations executed by the previous threads:
// lbo*(lbo+1)/2<=iter_before_current =>
// lbo^2+lbo-2*iter_before_current<=0
kmp_uint64 lower_bound_outer =
(kmp_uint64)(sqrt_newton_approx(1 + 8 * iter_before_current) + 1) / 2 - 1;
;
// calculate the inner loop lower bound which is the remaining number of
// iterations required to hit the total number of iterations executed by the
// previous threads giving the starting point of this thread
Expand Down
201 changes: 201 additions & 0 deletions openmp/runtime/test/worksharing/for/collapse_test.inc
Original file line number Diff line number Diff line change
@@ -0,0 +1,201 @@
#include <omp.h>
#include <malloc.h>
#include <stdio.h>
#include <memory.h>

#define LOOP_IV_TYPE0 LOOP_TYPES
#define LOOP_TYPE0 LOOP_TYPES
#define LOOP_STYPE0 LOOP_TYPES

#define LOOP_IV_TYPE1 LOOP_TYPES
#define LOOP_TYPE1 LOOP_TYPES
#define LOOP_STYPE1 LOOP_TYPES

#define LOOP_IV_TYPE2 LOOP_TYPES
#define LOOP_TYPE2 LOOP_TYPES
#define LOOP_STYPE2 LOOP_TYPES

#define MAX_THREADS 256

#if defined VERBOSE
#define PRINTF printf
#else
#define PRINTF
#endif

LOOP_TYPE0 iLB, iUB;
LOOP_TYPE1 jA0, jB0;
LOOP_TYPE2 kA0, kB0;

LOOP_STYPE0 iStep;
LOOP_STYPE1 jA1, jB1, jStep;
LOOP_STYPE2 kA1, kB1, kStep;

// We can check <=, <, >=, > (!= has different pattern)
// Additional definition of LOOP_LEi, LOOP_LTi, etc. is helpful to build calls
// of the test from main

#if defined LOOP_LE0
#define COMPARE0 <=
#elif defined LOOP_LT0
#define COMPARE0 <
#elif defined LOOP_GE0
#define COMPARE0 >=
#elif defined LOOP_GT0
#define COMPARE0 >
#endif

#if defined LOOP_LE1
#define COMPARE1 <=
#elif defined LOOP_LT1
#define COMPARE1 <
#elif defined LOOP_GE1
#define COMPARE1 >=
#elif defined LOOP_GT1
#define COMPARE1 >
#endif

#if defined LOOP_LE2
#define COMPARE2 <=
#elif defined LOOP_LT2
#define COMPARE2 <
#elif defined LOOP_GE2
#define COMPARE2 >=
#elif defined LOOP_GT2
#define COMPARE2 >
#endif

typedef struct {
LOOP_IV_TYPE0 i;
LOOP_IV_TYPE1 j;
LOOP_IV_TYPE2 k;
} spaceType;

spaceType *AllocSpace(unsigned size) {

spaceType *p = (spaceType *)malloc(size * sizeof(spaceType));
memset(p, 0, size * sizeof(spaceType));
return p;
}

void FreeSpace(spaceType *space) { free(space); }

// record an iteration
void Set(spaceType *space, unsigned count, unsigned trueCount, LOOP_IV_TYPE0 i,
LOOP_IV_TYPE1 j, LOOP_IV_TYPE0 k) {
if (count > trueCount) {
// number of iterations exceeded
// will be reported with checks
return;
}
space[count - 1].i = i;
space[count - 1].j = j;
space[count - 1].k = k;
}
int test() {
int pass = 1;
LOOP_IV_TYPE0 i;
LOOP_IV_TYPE1 j;
LOOP_IV_TYPE2 k;

spaceType *openmpSpace;
spaceType *scalarSpace;

unsigned trueCount = 0;
unsigned openmpCount = 0;
unsigned scalarCount = 0;
unsigned uselessThreadsOpenMP = 0;
unsigned usefulThreadsOpenMP = 0;
unsigned chunkSizesOpenmp[MAX_THREADS] = {0};

unsigned num_threads = omp_get_max_threads();
if (num_threads > MAX_THREADS)
num_threads = MAX_THREADS;
omp_set_num_threads(num_threads);

// count iterations and allocate space
LOOP { ++trueCount; }

openmpSpace = AllocSpace(trueCount);
scalarSpace = AllocSpace(trueCount);

// fill the scalar (compare) space
LOOP {
++scalarCount;
Set(scalarSpace, scalarCount, trueCount, i, j, k);
}

// test run body:
// perform and record OpenMP iterations and thread use
#pragma omp parallel num_threads(num_threads)
{
#pragma omp for collapse(3) private(i, j, k)
LOOP {
unsigned count;
unsigned gtid = omp_get_thread_num();
#pragma omp atomic update
++chunkSizesOpenmp[gtid];
#pragma omp atomic capture
count = ++openmpCount;
Set(openmpSpace, count, trueCount, i, j, k);
}
}

// check for the right number of iterations processed
// (only need to check for less, greater is checked when recording)
if (openmpCount < trueCount) {
PRINTF("OpenMP FAILURE: Openmp processed fewer iterations: %d vs %d\n",
openmpCount, trueCount);
pass = 0;
} else if (openmpCount > trueCount) {
PRINTF("OpenMP FAILURE: Openmp processed more iterations: %d vs %d\n",
openmpCount, trueCount);
pass = 0;
}

// check openMP for iteration correctnes against scalar
for (unsigned i = 0; i < trueCount; i++) {
unsigned j;
for (j = 0; j < openmpCount; j++) {
if ((scalarSpace[i].i == openmpSpace[j].i) &&
(scalarSpace[i].j == openmpSpace[j].j) &&
(scalarSpace[i].k == openmpSpace[j].k)) {
break;
}
}
if (j == openmpCount) {
PRINTF("OpenMP FAILURE: (%d %d %d) not processed\n", scalarSpace[i].i,
scalarSpace[i].j, scalarSpace[i].k);
pass = 0;
}
}

// check for efficient thread use
for (unsigned i = 0; i < num_threads; ++i) {
if (chunkSizesOpenmp[i] == 0) {
++uselessThreadsOpenMP;
}
}

// a check to see if at least more than one thread was used (weakish)
if ((uselessThreadsOpenMP == num_threads - 1) && (trueCount > 1)) {
PRINTF("OpenMP FAILURE: threads are not used\n");
pass = 0;
}

#if 0
// a check to see if the load was spread more or less evenly so that
// when there was more work than threads each one got at least something
// (stronger, but may currently fail for a general collapse case)
if ((trueCount >= num_threads) && (uselessThreadsOpenMP > 0)) {
PRINTF("OpenMP FAILURE: %d threads not used with %d iterations\n",
uselessThreadsOpenMP, openmpCount);
pass = 0;
}
#endif

// clean up space
FreeSpace(openmpSpace);
FreeSpace(scalarSpace);
return pass;
}
65 changes: 65 additions & 0 deletions openmp/runtime/test/worksharing/for/omp_collapse_many_GELTGT_int.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
// RUN: %libomp-compile-and-run

// Non-rectangular loop collapsing.
//
// Nested loops conform to OpenMP 5.2 standard,
// inner loops bounds may depend on outer loops induction variables.

#define LOOP_TYPES int
#define COMPARE0 >=
#define COMPARE1 <
#define COMPARE2 >
#define LOOP \
for (i = iLB; i COMPARE0 iUB; i += iStep) \
for (j = jA0; j COMPARE1 jB0; j += jStep) \
for (k = kA0; k COMPARE2 kB0; k += kStep)
#include "collapse_test.inc"

int main() {
int fail;

iLB = 3;
iUB = -2;
jA0 = -3;
jA1 = 0;
jB0 = -6;
jB1 = 0;
kA0 = -2;
kA1 = 0;
kB0 = -4;
kB1 = 0;
iStep = -1;
jStep = -1;
kStep = -4;
PRINTF("\nOne off iLB=%d; iUB=%d; jA0=%d; jA1=%d; jB0=%d; jB1=%d; kA0=%d; "
"kA1=%d; kB0=%d; kB1=%d; iStep=%d; jStep=%d; kStep=%d;\n",
iLB, iUB, jA0, jA1, jB0, jB1, kA0, kA1, kB0, kB1, iStep, jStep, kStep);
fail = (test() == 0);

if (!fail) {
for (iStep = -3; iStep >= -6; iStep -= 2) {
for (jA0 = -6; jA0 <= 6; jA0 += 3) {
for (jB0 = -3; jB0 <= 10; jB0 += 3) {
for (jStep = 1; jStep <= 10; jStep += 2) {
for (kA0 = -2; kA0 <= 4; ++kA0) {
for (kB0 = -4; kB0 <= 2; ++kB0) {
for (kStep = -2; kStep >= -10; kStep -= 4) {
{
PRINTF("\nTrying iLB=%d; iUB=%d; jA0=%d; jA1=%d; jB0=%d; "
"jB1=%d; kA0=%d; kA1=%d; kB0=%d; kB1=%d; iStep=%d; "
"jStep=%d; kStep=%d;\n",
iLB, iUB, jA0, jA1, jB0, jB1, kA0, kA1, kB0, kB1,
iStep, jStep, kStep);
fail = fail || (test() == 0);
}
}
}
}
}
}
}
}
}

return fail;
}
71 changes: 71 additions & 0 deletions openmp/runtime/test/worksharing/for/omp_collapse_many_GTGEGT_int.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
// RUN: %libomp-compile-and-run

// Non-rectangular loop collapsing.
//
// Nested loops conform to OpenMP 5.2 standard,
// inner loops bounds may depend on outer loops induction variables.

#define LOOP_TYPES int
#define COMPARE0 >
#define COMPARE1 >=
#define COMPARE2 >

#define DLOOP_GT0
#define DLOOP_GE1
#define DLOOP_GT2

#define LOOP \
for (i = iLB; i COMPARE0 iUB; i += iStep) \
for (j = jA0; j COMPARE1 jB0; j += jStep) \
for (k = kA0; k COMPARE2 kB0; k += kStep)
#include "collapse_test.inc"

int main() {
int fail;

iLB = 3;
iUB = -2;
jA0 = -3;
jA1 = 0;
jB0 = -6;
jB1 = 0;
kA0 = -2;
kA1 = 0;
kB0 = -4;
kB1 = 0;
iStep = -1;
jStep = -1;
kStep = -4;
PRINTF("\nOne off iLB=%d; iUB=%d; jA0=%d; jA1=%d; jB0=%d; jB1=%d; kA0=%d; "
"kA1=%d; kB0=%d; kB1=%d; iStep=%d; jStep=%d; kStep=%d;\n",
iLB, iUB, jA0, jA1, jB0, jB1, kA0, kA1, kB0, kB1, iStep, jStep, kStep);
fail = (test() == 0);

if (!fail) {

for (iStep = -3; iStep >= -6; iStep -= 2) {
for (jA0 = -3; jA0 <= 10; jA0 += 3) {
for (jB0 = -6; jB0 <= 6; jB0 += 3) {
for (jStep = -1; jStep >= -10; jStep -= 2) {
for (kA0 = -2; kA0 <= 4; ++kA0) {
for (kB0 = -4; kB0 <= 2; ++kB0) {
for (kStep = -2; kStep >= -10; kStep -= 4) {
{
PRINTF("\nTrying iLB=%d; iUB=%d; jA0=%d; jA1=%d; jB0=%d; "
"jB1=%d; kA0=%d; kA1=%d; kB0=%d; kB1=%d; iStep=%d; "
"jStep=%d; kStep=%d;\n",
iLB, iUB, jA0, jA1, jB0, jB1, kA0, kA1, kB0, kB1,
iStep, jStep, kStep);
fail = fail || (test() == 0);
}
}
}
}
}
}
}
}
}

return fail;
}
66 changes: 66 additions & 0 deletions openmp/runtime/test/worksharing/for/omp_collapse_many_LTLEGE_int.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
// RUN: %libomp-compile-and-run

// Non-rectangular loop collapsing.
//
// Nested loops conform to OpenMP 5.2 standard,
// inner loops bounds may depend on outer loops induction variables.

#define LOOP_TYPES int
#define COMPARE0 <
#define COMPARE1 <=
#define COMPARE2 >=
#define LOOP \
for (i = iLB; i COMPARE0 iUB; i += iStep) \
for (j = jA0; j COMPARE1 jB0; j += jStep) \
for (k = kA0; k COMPARE2 kB0; k += kStep)
#include "collapse_test.inc"

int main() {
int fail;

iLB = -2;
iUB = 3;
jA0 = -3;
jA1 = 0;
jB0 = -6;
jB1 = 0;
kA0 = -2;
kA1 = 0;
kB0 = -4;
kB1 = 0;
iStep = -1;
jStep = -1;
kStep = -4;
PRINTF("\nOne off iLB=%d; iUB=%d; jA0=%d; jA1=%d; jB0=%d; jB1=%d; kA0=%d; "
"kA1=%d; kB0=%d; kB1=%d; iStep=%d; jStep=%d; kStep=%d;\n",
iLB, iUB, jA0, jA1, jB0, jB1, kA0, kA1, kB0, kB1, iStep, jStep, kStep);
fail = (test() == 0);

if (!fail) {

for (iStep = 2; iStep <= 6; iStep += 2) {
for (jA0 = -6; jA0 <= 6; jA0 += 3) {
for (jB0 = -3; jB0 <= 10; jB0 += 3) {
for (jStep = 1; jStep <= 10; jStep += 2) {
for (kA0 = -2; kA0 <= 4; ++kA0) {
for (kB0 = -4; kB0 <= 2; ++kB0) {
for (kStep = -2; kStep >= -10; kStep -= 4) {
{
PRINTF("\nTrying iLB=%d; iUB=%d; jA0=%d; jA1=%d; jB0=%d; "
"jB1=%d; kA0=%d; kA1=%d; kB0=%d; kB1=%d; iStep=%d; "
"jStep=%d; kStep=%d;\n",
iLB, iUB, jA0, jA1, jB0, jB1, kA0, kA1, kB0, kB1,
iStep, jStep, kStep);
fail = fail || (test() == 0);
}
}
}
}
}
}
}
}
}

return fail;
}
73 changes: 73 additions & 0 deletions openmp/runtime/test/worksharing/for/omp_collapse_many_int.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
// RUN: %libomp-compile-and-run
// XFAIL: true

// Non-rectangular loop collapsing.
//
// Nested loops conform to OpenMP 5.2 standard,
// inner loops bounds may depend on outer loops induction variables.

#define LOOP_TYPES int
#define LOOP \
for (i = iLB; i <= iUB; i += iStep) \
for (j = i * jA1 + jA0; j <= i * jB1 + jB0; j += jStep) \
for (k = j * kA1 + kA0; k <= j * kB1 + kB0; k += kStep)
#include "collapse_test.inc"

int main() {
int fail = 0;

iLB = -2;
iUB = 3;
jA0 = -7;
jA1 = -1;
jB0 = 13;
jB1 = 3;
kA0 = -20;
kA1 = -2;
kB0 = 111;
kB1 = -1;
iStep = 5;
jStep = 9;
kStep = 10;
PRINTF("\nOne off iLB=%d; iUB=%d; jA0=%d; jA1=%d; jB0=%d; jB1=%d; kA0=%d; "
"kA1=%d; kB0=%d; kB1=%d; iStep=%d; jStep=%d; kStep=%d;\n",
iLB, iUB, jA0, jA1, jB0, jB1, kA0, kA1, kB0, kB1, iStep, jStep, kStep);
fail = fail || (test() == 0);

if (!fail) {

// NOTE: if a loop on some level won't execute for all iterations of an
// outer loop, it still should work. Runtime doesn't require lower bounds to
// be <= upper bounds for all possible i, j, k.

iLB = -2;
iUB = 3;
jA0 = -7;
jB0 = 5;
kA0 = -13;
kB0 = 37;

for (kA1 = -2; kA1 <= 2; ++kA1) { // <=
for (kB1 = -2; kB1 <= 2; ++kB1) {
for (jA1 = -3; jA1 <= 3; ++jA1) {
for (jB1 = -3; jB1 <= 3; ++jB1) {
for (iStep = 1; iStep <= 3; ++iStep) {
for (jStep = 2; jStep <= 6; jStep += 2) {
for (kStep = 2; kStep <= 8; kStep += 3) {
PRINTF("\nTrying iLB=%d; iUB=%d; jA0=%d; jA1=%d; jB0=%d; "
"jB1=%d; kA0=%d; kA1=%d; kB0=%d; kB1=%d; iStep=%d; "
"jStep=%d; kStep=%d;\n",
iLB, iUB, jA0, jA1, jB0, jB1, kA0, kA1, kB0, kB1,
iStep, jStep, kStep);
fail = fail || (test() == 0);
}
}
}
}
}
}
}
}

return fail;
}
32 changes: 32 additions & 0 deletions openmp/runtime/test/worksharing/for/omp_collapse_one_int.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
// RUN: %libomp-compile-and-run

// Non-rectangular loop collapsing.
//
// Nested loops conform to OpenMP 5.2 standard,
// inner loops bounds may depend on outer loops induction variables.

#define LOOP_TYPES int
#define LOOP \
for (i = iLB; i <= iUB; i += iStep) \
for (j = i + jA0; j <= i + jB0; j += jStep) \
for (k = j + kA0; k <= j + kB0; k += kStep)

#include "collapse_test.inc"

int main() {
int fail;
iLB = -2;
iUB = 3;
jA0 = -7;
jB0 = 13;
kA0 = -20;
kB0 = 111;
iStep = 5;
jStep = 9;
kStep = 10;
PRINTF("\nOne off iLB=%d; iUB=%d; jA0=%d; jB0=%d; kA0=%d; kB0=%d; iStep=%d; "
"jStep=%d; kStep=%d;\n",
iLB, iUB, jA0, jB0, kA0, kB0, iStep, jStep, kStep);
fail = (test() == 0);
return fail;
}