Skip to content

Commit c253e5c

Browse files
authored
[Exegesis][RISCV] Add initial RVV support (#128767)
This patch adds initial vector extension support to RISC-V's exegesis. The strategy here is to enumerate all RVV _pseudo_ opcodes as their MC opcode counterparts are kind of useless under this circumstance. We also enumerate all possible VTYPE operands in each CodeTemplate configuration. Various of MachineFunction Passes are used for post processing the snippets, like inserting VSETVLI instructions. See https://llvm.org/devmtg/2024-10/slides/techtalk/Hsu-RVV-Exegesis.pdf for more technical details.
1 parent b923f6c commit c253e5c

22 files changed

+1137
-52
lines changed

llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -432,7 +432,44 @@ enum RoundingMode {
432432
RNE = 1,
433433
RDN = 2,
434434
ROD = 3,
435+
Invalid
435436
};
437+
438+
inline static StringRef roundingModeToString(RoundingMode RndMode) {
439+
switch (RndMode) {
440+
default:
441+
llvm_unreachable("Unknown vector fixed-point rounding mode");
442+
case RISCVVXRndMode::RNU:
443+
return "rnu";
444+
case RISCVVXRndMode::RNE:
445+
return "rne";
446+
case RISCVVXRndMode::RDN:
447+
return "rdn";
448+
case RISCVVXRndMode::ROD:
449+
return "rod";
450+
}
451+
}
452+
453+
inline static RoundingMode stringToRoundingMode(StringRef Str) {
454+
return StringSwitch<RoundingMode>(Str)
455+
.Case("rnu", RISCVVXRndMode::RNU)
456+
.Case("rne", RISCVVXRndMode::RNE)
457+
.Case("rdn", RISCVVXRndMode::RDN)
458+
.Case("rod", RISCVVXRndMode::ROD)
459+
.Default(RISCVVXRndMode::Invalid);
460+
}
461+
462+
inline static bool isValidRoundingMode(unsigned Mode) {
463+
switch (Mode) {
464+
default:
465+
return false;
466+
case RISCVVXRndMode::RNU:
467+
case RISCVVXRndMode::RNE:
468+
case RISCVVXRndMode::RDN:
469+
case RISCVVXRndMode::ROD:
470+
return true;
471+
}
472+
}
436473
} // namespace RISCVVXRndMode
437474

438475
//===----------------------------------------------------------------------===//
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=latency \
2+
# RUN: --opcode-name=PseudoVCOMPRESS_VM_M2_E8,PseudoVCPOP_M_B32 | \
3+
# RUN: FileCheck %s --allow-empty --check-prefix=LATENCY
4+
# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=inverse_throughput \
5+
# RUN: --opcode-name=PseudoVCOMPRESS_VM_M2_E8,PseudoVCPOP_M_B32 \
6+
# RUN: --min-instructions=100 | \
7+
# RUN: FileCheck %s --check-prefix=RTHROUGHPUT1
8+
9+
# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=latency \
10+
# RUN: --opcode-name=PseudoVRGATHEREI16_VV_M2_E32_M1,PseudoVRGATHER_VI_M2,PseudoVRGATHER_VV_M8_E32,PseudoVRGATHER_VX_M4 | \
11+
# RUN: FileCheck %s --allow-empty --check-prefix=LATENCY
12+
# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=inverse_throughput \
13+
# RUN: --opcode-name=PseudoVRGATHEREI16_VV_M2_E32_M1,PseudoVRGATHER_VI_M2,PseudoVRGATHER_VV_M8_E32,PseudoVRGATHER_VX_M4 \
14+
# RUN: --min-instructions=100 | \
15+
# RUN: FileCheck %s --check-prefix=RTHROUGHPUT2
16+
17+
# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=latency \
18+
# RUN: --opcode-name=PseudoVSLIDE1UP_VX_M1,PseudoVSLIDEUP_VI_M2,PseudoVSLIDEUP_VX_M2 | \
19+
# RUN: FileCheck %s --allow-empty --check-prefix=LATENCY
20+
# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=inverse_throughput \
21+
# RUN: --opcode-name=PseudoVSLIDE1UP_VX_M1,PseudoVSLIDEUP_VI_M2,PseudoVSLIDEUP_VX_M2 \
22+
# RUN: --min-instructions=100 | \
23+
# RUN: FileCheck %s --check-prefix=RTHROUGHPUT3
24+
25+
# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=latency \
26+
# RUN: --opcode-name=PseudoVNCLIPU_WI_M2,PseudoVNSRA_WI_M2,PseudoVNSRL_WI_M2 | \
27+
# RUN: FileCheck %s --allow-empty --check-prefix=LATENCY
28+
# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=inverse_throughput \
29+
# RUN: --opcode-name=PseudoVNCLIPU_WI_M2,PseudoVNSRA_WI_M2,PseudoVNSRL_WI_M2 \
30+
# RUN: --min-instructions=100 | \
31+
# RUN: FileCheck %s --check-prefix=RTHROUGHPUT4
32+
33+
# These instructions are only eligible under the inverse throughput mode.
34+
35+
# LATENCY-NOT: PseudoVCOMPRESS_VM_M2_E8
36+
# LATENCY-NOT: PseudoVCPOP_M_B32
37+
# LATENCY-NOT: PseudoVRGATHEREI16_VV_M2_E32_M1
38+
# LATENCY-NOT: PseudoVRGATHER_VI_M2
39+
# LATENCY-NOT: PseudoVRGATHER_VV_M8_E32
40+
# LATENCY-NOT: PseudoVRGATHER_VX_M4
41+
# LATENCY-NOT: PseudoVSLIDE1UP_VX_M1
42+
# LATENCY-NOT: PseudoVSLIDEUP_VI_M2
43+
# LATENCY-NOT: PseudoVSLIDEUP_VX_M2
44+
# LATENCY-NOT: PseudoVNCLIPU_WI_M2
45+
# LATENCY-NOT: PseudoVNSRA_WI_M2
46+
# LATENCY-NOT: PseudoVNSRL_WI_M2
47+
48+
# RTHROUGHPUT1: PseudoVCOMPRESS_VM_M2_E8
49+
# RTHROUGHPUT1: PseudoVCPOP_M_B32
50+
# RTHROUGHPUT2: PseudoVRGATHEREI16_VV_M2_E32_M1
51+
# RTHROUGHPUT2: PseudoVRGATHER_VI_M2
52+
# RTHROUGHPUT2: PseudoVRGATHER_VV_M8_E32
53+
# RTHROUGHPUT2: PseudoVRGATHER_VX_M4
54+
# RTHROUGHPUT3: PseudoVSLIDE1UP_VX_M1
55+
# RTHROUGHPUT3: PseudoVSLIDEUP_VI_M2
56+
# RTHROUGHPUT3: PseudoVSLIDEUP_VX_M2
57+
# RTHROUGHPUT4: PseudoVNCLIPU_WI_M2
58+
# RTHROUGHPUT4: PseudoVNSRA_WI_M2
59+
# RTHROUGHPUT4: PseudoVNSRL_WI_M2
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=latency --opcode-name=PseudoVFWREDUSUM_VS_M1_E32 \
2+
# RUN: --max-configs-per-opcode=1000 --min-instructions=100 | FileCheck %s
3+
4+
# Make sure none of the config has SEW other than e32
5+
# CHECK: PseudoVFWREDUSUM_VS_M1_E32
6+
# CHECK: SEW: e32
7+
# CHECK-NOT: SEW: e{{(8|16|64)}}
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=inverse_throughput --opcode-name=PseudoVNCLIPU_WX_M1_MASK \
2+
# RUN: --riscv-filter-config='vtype = {VXRM: rod, AVL: VLMAX, SEW: e(8|16), Policy: ta/mu}' --max-configs-per-opcode=1000 --min-instructions=100 | FileCheck %s
3+
4+
# CHECK: config: 'vtype = {VXRM: rod, AVL: VLMAX, SEW: e8, Policy: ta/mu}'
5+
# CHECK: config: 'vtype = {VXRM: rod, AVL: VLMAX, SEW: e16, Policy: ta/mu}'
6+
# CHECK-NOT: config: 'vtype = {VXRM: rod, AVL: VLMAX, SEW: e(32|64), Policy: ta/mu}'
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-p670 -benchmark-phase=assemble-measured-code --mode=latency --opcode-name=PseudoVWREDSUMU_VS_M8_E32 --min-instructions=100 | \
2+
# RUN: FileCheck %s
3+
4+
# Make sure reduction ops don't have alias between vd and vs1
5+
# CHECK: instructions:
6+
# CHECK-NEXT: PseudoVWREDSUMU_VS_M8_E32
7+
# CHECK-NOT: V[[REG:[0-9]+]] V[[REG]] V{{[0-9]+}}M8 V[[REG]]
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=latency --opcode-name=PseudoVXOR_VX_M4 --min-instructions=100 | \
2+
# RUN: FileCheck %s
3+
4+
# Make sure all def / use operands are the same in latency mode.
5+
# CHECK: instructions:
6+
# CHECK-NEXT: PseudoVXOR_VX_M4 V[[REG:[0-9]+]]M4 V[[REG]]M4 V[[REG]]M4 X{{.*}}
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=latency --opcode-name=PseudoVAADDU_VV_M1 \
2+
# RUN: --riscv-enumerate-rounding-modes=false --max-configs-per-opcode=1000 --min-instructions=100 | FileCheck %s --check-prefix=VX
3+
# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=latency --opcode-name=PseudoVFADD_VFPR16_M1_E16 \
4+
# RUN: --riscv-enumerate-rounding-modes=false --max-configs-per-opcode=1000 --min-instructions=100 | FileCheck %s --check-prefix=FP
5+
6+
# VX: PseudoVAADDU_VV_M1
7+
# VX: VXRM: rnu
8+
# VX-NOT: VXRM: {{(rne|rdn|rod)}}
9+
10+
# FP: PseudoVFADD_VFPR16_M1_E16
11+
# FP: FRM: dyn
12+
# FP-NOT: FRM: {{(rtz|rdn|rup|rmm|rne)}}
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-p670 -benchmark-phase=assemble-measured-code --mode=inverse_throughput \
2+
# RUN: --opcode-name=PseudoVAESDF_VS_M1_M1 --max-configs-per-opcode=1000 --min-instructions=100 | \
3+
# RUN: FileCheck %s --check-prefix=ZVK
4+
# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-p670 -benchmark-phase=assemble-measured-code --mode=inverse_throughput \
5+
# RUN: --opcode-name=PseudoVGHSH_VV_M1 --max-configs-per-opcode=1000 --min-instructions=100 | \
6+
# RUN: FileCheck %s --check-prefix=ZVK
7+
# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-p670 -benchmark-phase=assemble-measured-code --mode=inverse_throughput \
8+
# RUN: --opcode-name=PseudoVSM4K_VI_M1 --max-configs-per-opcode=1000 --min-instructions=100 | \
9+
# RUN: FileCheck %s --check-prefix=ZVK
10+
# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-p670 -benchmark-phase=assemble-measured-code --mode=inverse_throughput \
11+
# RUN: --opcode-name=PseudoVSM3C_VI_M2 --max-configs-per-opcode=1000 --min-instructions=100 | \
12+
# RUN: FileCheck %s --check-prefix=ZVK
13+
# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-p670 -benchmark-phase=assemble-measured-code --mode=inverse_throughput \
14+
# RUN: --opcode-name=PseudoVSHA2MS_VV_M1_E32 --max-configs-per-opcode=1000 --min-instructions=100 | \
15+
# RUN: FileCheck %s --allow-empty --check-prefix=ZVKNH
16+
# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-p670 -benchmark-phase=assemble-measured-code --mode=inverse_throughput \
17+
# RUN: --opcode-name=PseudoVSHA2MS_VV_M2_E64 --max-configs-per-opcode=1000 --min-instructions=100 | \
18+
# RUN: FileCheck %s --allow-empty --check-prefix=ZVKNH
19+
# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-p670 -benchmark-phase=assemble-measured-code --mode=inverse_throughput \
20+
# RUN: --opcode-name=PseudoVSM3C_VI_M1 --max-configs-per-opcode=1000 --min-instructions=100 | \
21+
# RUN: FileCheck %s --allow-empty --check-prefix=EMPTY
22+
23+
# Most vector crypto only supports SEW=32, except Zvknhb which also supports SEW=64
24+
# ZVK-NOT: SEW: e{{(8|16)}}
25+
# ZVK: SEW: e32
26+
# ZVK-NOT: SEW: e64
27+
28+
# ZVKNH(A|B) can either have SEW=32 (EGW=128) or SEW=64 (EGW=256)
29+
30+
# ZVKNH-NOT: SEW: e{{(8|16)}}
31+
# ZVKNH: SEW: e{{(32|64)}}
32+
33+
# EMPTY-NOT: SEW: e{{(8|16|32|64)}}
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=latency --opcode-name=PseudoVMUL_VV_MF4_MASK \
2+
# RUN: --max-configs-per-opcode=1000 --min-instructions=100 | FileCheck %s --check-prefix=FRAC-LMUL
3+
# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=latency \
4+
# RUN: --opcode-name=PseudoVFADD_VFPR16_M1_E16,PseudoVFADD_VV_M2_E16,PseudoVFCLASS_V_MF2 --max-configs-per-opcode=1000 --min-instructions=100 | \
5+
# RUN: FileCheck %s --check-prefix=FP
6+
# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=inverse_throughput \
7+
# RUN: --opcode-name=PseudoVSEXT_VF8_M2,PseudoVZEXT_VF8_M2 --max-configs-per-opcode=1000 --min-instructions=100 | \
8+
# RUN: FileCheck %s --check-prefix=VEXT
9+
# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-p470 -benchmark-phase=assemble-measured-code --mode=latency \
10+
# RUN: --opcode-name=PseudoVFREDUSUM_VS_M1_E16 --max-configs-per-opcode=1000 --min-instructions=100 | \
11+
# RUN: FileCheck %s --check-prefix=VFRED --allow-empty
12+
13+
# Make sure only the supported SEWs are generated for fractional LMUL.
14+
# FRAC-LMUL: PseudoVMUL_VV_MF4_MASK
15+
# FRAC-LMUL: SEW: e8
16+
# FRAC-LMUL: SEW: e16
17+
# FRAC-LMUL-NOT: SEW: e{{(32|64)}}
18+
19+
# Make sure only SEWs that are equal to the supported FLEN are generated
20+
# FP: PseudoVFADD_VFPR16_M1_E16
21+
# FP-NOT: SEW: e8
22+
# FP: PseudoVFADD_VV_M2_E16
23+
# FP-NOT: SEW: e8
24+
# FP: PseudoVFCLASS_V_MF2
25+
# FP-NOT: SEW: e8
26+
27+
# VS/ZEXT can only operate on SEW that will not lead to invalid EEW on the
28+
# source operand.
29+
# VEXT: PseudoVSEXT_VF8_M2
30+
# VEXT-NOT: SEW: e8
31+
# VEXT-NOT: SEW: e16
32+
# VEXT-NOT: SEW: e32
33+
# VEXT: SEW: e64
34+
# VEXT: PseudoVZEXT_VF8_M2
35+
# VEXT-NOT: SEW: e8
36+
# VEXT-NOT: SEW: e16
37+
# VEXT-NOT: SEW: e32
38+
# VEXT: SEW: e64
39+
40+
# P470 doesn't have Zvfh so 16-bit vfredusum shouldn't exist
41+
# VFRED-NOT: PseudoVFREDUSUM_VS_M1_E16
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=latency --opcode-name=PseudoVFWREDUSUM_VS_M1_E32 \
2+
# RUN: --riscv-vlmax-for-vl --max-configs-per-opcode=1000 --min-instructions=100 | FileCheck %s
3+
4+
# Only allow VLMAX for AVL when -riscv-vlmax-for-vl is present
5+
# CHECK: PseudoVFWREDUSUM_VS_M1_E32
6+
# CHECK: AVL: VLMAX
7+
# CHECK-NOT: AVL: {{(simm5|<MCOperand: .*>)}}

0 commit comments

Comments
 (0)