diff --git a/llvm/lib/Target/RISCV/RISCVSchedAndes45.td b/llvm/lib/Target/RISCV/RISCVSchedAndes45.td index 8cf15fa26e22d..4b026001589ff 100644 --- a/llvm/lib/Target/RISCV/RISCVSchedAndes45.td +++ b/llvm/lib/Target/RISCV/RISCVSchedAndes45.td @@ -8,7 +8,236 @@ //===----------------------------------------------------------------------===// -// FIXME: Implement sheduling model for V and other extensions. +defvar Andes45VLEN = 512; +defvar Andes45DLEN = 512; +defvar Andes45VLEN_DLEN_RATIO = !div(Andes45VLEN, Andes45DLEN); + +assert !or(!eq(Andes45VLEN_DLEN_RATIO, 1), !eq(Andes45VLEN_DLEN_RATIO, 2)), + "Andes45VLEN / Andes45DLEN should be 1 or 2"; + +defvar Andes45BIU_DATA_WIDTH = 512; +defvar Andes45DLEN_BIU_DATA_WIDTH_RATIO = !div(Andes45DLEN, Andes45BIU_DATA_WIDTH); + +assert !or(!eq(Andes45DLEN_BIU_DATA_WIDTH_RATIO, 1), !eq(Andes45DLEN_BIU_DATA_WIDTH_RATIO, 2)), + "Andes45DLEN / Andes45DLEN_BIU_DATA_WIDTH_RATIO should be 1 or 2"; + +// HVM region: VLSU_MEM_DW equals DLEN +// Cachable/Non-cachable region: VLSU_MEM_DW equals BIU_DATA_WIDTH +defvar Andes45VLSU_MEM_DW = Andes45BIU_DATA_WIDTH; +defvar Andes45VLEN_VLSU_MEM_DW_RATIO = !div(Andes45VLEN, Andes45VLSU_MEM_DW); + +// There are various latency depending on its memory type and status. +defvar VLSU_MEM_LATENCY = 13; + +// The worst case LMUL is the largest LMUL. +class Andes45IsWorstCaseMX MxList> { + defvar LLMUL = LargestLMUL.r; + bit c = !eq(mx, LLMUL); +} + +// The worst case is the largest LMUL with the smallest SEW. +class Andes45IsWorstCaseMXSEW MxList, + bit isF = 0> { + defvar LLMUL = LargestLMUL.r; + defvar SSEW = SmallestSEW.r; + bit c = !and(!eq(mx, LLMUL), !eq(sew, SSEW)); +} + +// When fractional LMUL is used, the LMUL used in calculation is 1. +class Andes45GetLMULValue { + int c = !cond( + !eq(mx, "M1") : 1, + !eq(mx, "M2") : 2, + !eq(mx, "M4") : 4, + !eq(mx, "M8") : 8, + !eq(mx, "MF2") : 1, + !eq(mx, "MF4") : 1, + !eq(mx, "MF8") : 1 + ); +} + +// (VLEN/DLEN)*LMUL +// When fractional LMUL is used, the LMUL used in calculation is 1. +class Andes45GetCyclesDefault { + int c = !cond( + !eq(mx, "M1") : !mul(Andes45VLEN_DLEN_RATIO, 1), + !eq(mx, "M2") : !mul(Andes45VLEN_DLEN_RATIO, 2), + !eq(mx, "M4") : !mul(Andes45VLEN_DLEN_RATIO, 4), + !eq(mx, "M8") : !mul(Andes45VLEN_DLEN_RATIO, 8), + !eq(mx, "MF2") : !mul(Andes45VLEN_DLEN_RATIO, 1), + !eq(mx, "MF4") : !mul(Andes45VLEN_DLEN_RATIO, 1), + !eq(mx, "MF8") : !mul(Andes45VLEN_DLEN_RATIO, 1) + ); +} + +// (VLEN/DLEN)*LMUL*2, if LMUL >= 1, +// (VLEN != DLEN) ? : 4 : 1, if LMUL < 1. +class Andes45GetCyclesWidening { + int c = !cond( + !eq(mx, "M1") : !mul(Andes45VLEN_DLEN_RATIO, 2), + !eq(mx, "M2") : !mul(Andes45VLEN_DLEN_RATIO, 4), + !eq(mx, "M4") : !mul(Andes45VLEN_DLEN_RATIO, 8), + // FIXME: .v* and .w* are different if LMUL < 1. + !eq(mx, "MF2") : !if(!ne(Andes45VLEN, Andes45DLEN), 4, 1), + !eq(mx, "MF4") : !if(!ne(Andes45VLEN, Andes45DLEN), 4, 1), + !eq(mx, "MF8") : !if(!ne(Andes45VLEN, Andes45DLEN), 4, 1), + ); +} + +// (VLEN/DLEN)*LMUL*2, if LMUL >= 1, +// (VLEN != DLEN) ? : 4 : 1, if LMUL < 1. +class Andes45GetCyclesNarrowing { + int c = !cond( + !eq(mx, "M1") : !mul(Andes45VLEN_DLEN_RATIO, 2), + !eq(mx, "M2") : !mul(Andes45VLEN_DLEN_RATIO, 4), + !eq(mx, "M4") : !mul(Andes45VLEN_DLEN_RATIO, 8), + !eq(mx, "MF2") : !if(!ne(Andes45VLEN, Andes45DLEN), 4, 1), + !eq(mx, "MF4") : !if(!ne(Andes45VLEN, Andes45DLEN), 4, 1), + !eq(mx, "MF8") : !if(!ne(Andes45VLEN, Andes45DLEN), 4, 1) + ); +} + +// 3, if LMUL >= 1, +// (VLEN != DLEN) ? 3 : 2, if LMUL <1. +class Andes45GetLatencyNarrowing { + int c = !cond( + !eq(mx, "M1") : 3, + !eq(mx, "M2") : 3, + !eq(mx, "M4") : 3, + !eq(mx, "MF2") : !if(!ne(Andes45VLEN, Andes45DLEN), 3, 2), + !eq(mx, "MF4") : !if(!ne(Andes45VLEN, Andes45DLEN), 3, 2), + !eq(mx, "MF8") : !if(!ne(Andes45VLEN, Andes45DLEN), 3, 2) + ); +} + +// (VLEN/VLSU_MEM_DW)*EMUL +class Andes45GetCyclesLoadStore { + int c = !cond( + !eq(mx, "M1") : !mul(Andes45VLEN_VLSU_MEM_DW_RATIO, 1), + !eq(mx, "M2") : !mul(Andes45VLEN_VLSU_MEM_DW_RATIO, 2), + !eq(mx, "M4") : !mul(Andes45VLEN_VLSU_MEM_DW_RATIO, 4), + !eq(mx, "M8") : !mul(Andes45VLEN_VLSU_MEM_DW_RATIO, 8), + !eq(mx, "MF2") : !mul(Andes45VLEN_VLSU_MEM_DW_RATIO, 1), + !eq(mx, "MF4") : !mul(Andes45VLEN_VLSU_MEM_DW_RATIO, 1), + !eq(mx, "MF8") : !mul(Andes45VLEN_VLSU_MEM_DW_RATIO, 1) + ); +} + +class Andes45GetCyclesOnePerElement { + defvar VL = !div(Andes45VLEN, sew); + int c = !cond( + !eq(mx, "M1") : VL, + !eq(mx, "M2") : !mul(VL, 2), + !eq(mx, "M4") : !mul(VL, 4), + !eq(mx, "M8") : !mul(VL, 8), + !eq(mx, "MF2") : !div(VL, 2), + !eq(mx, "MF4") : !div(VL, 4), + !eq(mx, "MF8") : !div(VL, 8) + ); +} + +class Andes45GetLatecyDiv { + int c = !cond( + !eq(sew, 8) : 12, + !eq(sew, 16) : 20, + !eq(sew, 32) : 36, + !eq(sew, 64) : 68 + ); +} + +// (VLEN/DLEN)*LMUL*SEW+(VLEN/DLEN)*LMUL*2+1 +// = (VLEN/DLEN)*LMUL*(SEW+2)+1 +class Andes45GetCyclesDiv { + defvar a = !mul(Andes45VLEN_DLEN_RATIO, !add(sew, 2)); + int b = !cond( + !eq(mx, "M1") : !mul(a, 1), + !eq(mx, "M2") : !mul(a, 2), + !eq(mx, "M4") : !mul(a, 4), + !eq(mx, "M8") : !mul(a, 8), + !eq(mx, "MF2") : !mul(a, 1), + !eq(mx, "MF4") : !mul(a, 1), + !eq(mx, "MF8") : !mul(a, 1) + ); + + int c = !add(b, 1); +} + +class Andes45GetFDivFactor { + int c = !cond( + !eq(sew, 16) : 22, + !eq(sew, 32) : 36, + !eq(sew, 64) : 64 + ); +} + +class Andes45GetFSqrtFactor { + int c = !cond( + !eq(sew, 16) : 20, + !eq(sew, 32) : 34, + !eq(sew, 64) : 62 + ); +} + +// (VLEN/DLEN)*LMUL+LOG2(DLEN/64)*2+LOG2(64/SEW) +class Andes45GetReductionCycles { + defvar d = Andes45GetCyclesDefault.c; + int c = !add(d, + !add(!mul(!logtwo(!div(Andes45DLEN, 64)), 2), + !logtwo(!div(64, sew)))); +} + +// (VLEN/DLEN)*LMUL*2+LOG2(DLEN/64)*2+LOG2(64/2/SEW) +class Andes45GetReductionCyclesWidening { + defvar w = !mul(Andes45GetCyclesDefault.c, 2); + int c = !add(w, + !add(!mul(!logtwo(!div(Andes45DLEN, 64)), 2), + !logtwo(!div(64, sew)))); +} + +// (VLEN/DLEN)*LMUL+LOG2(DLEN/SEW) +class Andes45GetFReductionCycles { + defvar d = Andes45GetCyclesDefault.c; + int c = !add(d, !logtwo(!div(Andes45DLEN, sew))); +} + +// (VLEN/DLEN)*LMUL*2+LOG2(DLEN/SEW)-1 +class Andes45GetFWReductionCycles { + defvar a = !mul(Andes45GetCyclesDefault.c, 2); + defvar b = !add(a, !logtwo(!div(Andes45DLEN, sew))); + int c = !sub(b, 1); +} + +// (VLEN*LMUL)/SEW +class Andes45GetOrderedFReductionCycles { + defvar b = !cond( + !eq(mx, "M1") : !mul(Andes45VLEN, 1), + !eq(mx, "M2") : !mul(Andes45VLEN, 2), + !eq(mx, "M4") : !mul(Andes45VLEN, 4), + !eq(mx, "M8") : !mul(Andes45VLEN, 8), + !eq(mx, "MF2") : !mul(Andes45VLEN, 1), + !eq(mx, "MF4") : !mul(Andes45VLEN, 1), + !eq(mx, "MF8") : !mul(Andes45VLEN, 1) + ); + + int c = !div(b, sew); +} + +// (VLEN*LMUL)/SEW +class Andes45GetOrderedFWReductionCycles { + defvar b = !cond( + !eq(mx, "M1") : !mul(Andes45VLEN, 1), + !eq(mx, "M2") : !mul(Andes45VLEN, 2), + !eq(mx, "M4") : !mul(Andes45VLEN, 4), + !eq(mx, "M8") : !mul(Andes45VLEN, 8), + !eq(mx, "MF2") : !mul(Andes45VLEN, 1), + !eq(mx, "MF4") : !mul(Andes45VLEN, 1), + !eq(mx, "MF8") : !mul(Andes45VLEN, 1) + ); + + int c = !div(b, sew); +} + + def Andes45Model : SchedMachineModel { let MicroOpBufferSize = 0; // Andes45 is in-order processor let IssueWidth = 2; // 2 micro-ops dispatched per cycle @@ -32,6 +261,15 @@ let SchedModel = Andes45Model in { // - Floating Point Divide / SQRT Unit (FDIV) // - Floating Point Move Unit (FMV) // - Floating Point Misc Unit (FMISC) +// +// Andes 45 series VPU +// - Vector Arithmetic and Logical Unit (VALU) +// - Vector Multiply Accumulate Unit (VMAC) +// - Vector Divide Unit (VDIV) +// - Vector Permutation Unit (VPERMUT) +// - Vector Mask Unit (VMASK) +// - Vector Floating-Point Miscellaneous Unit (VFMIS) +// - Vector Floating-Point Divide Unit (VFDIV) //===----------------------------------------------------------------------===// let BufferSize = 0 in { @@ -44,6 +282,24 @@ def Andes45FMAC : ProcResource<1>; def Andes45FDIV : ProcResource<1>; def Andes45FMV : ProcResource<1>; def Andes45FMISC : ProcResource<1>; + +def Andes45VALU : ProcResource<1>; +def Andes45VMAC : ProcResource<1>; +def Andes45VFMIS : ProcResource<1>; +def Andes45VPERMUT : ProcResource<1>; +def Andes45VDIV : ProcResource<1>; +def Andes45VFDIV : ProcResource<1>; +def Andes45VMASK : ProcResource<1>; +def Andes45VLSU : ProcResource<1>; + +def Andes45VPU : ProcResGroup<[Andes45VALU, + Andes45VMAC, + Andes45VFMIS, + Andes45VPERMUT, + Andes45VDIV, + Andes45VFDIV, + Andes45VMASK, + Andes45VLSU]>; } // Integer arithmetic and logic @@ -333,10 +589,838 @@ def : ReadAdvance; def : ReadAdvance; def : ReadAdvance; +// RVV Scheduling + +// 6. Configuration-Setting Instructions +def : WriteRes; +def : WriteRes; +def : WriteRes; + +// 7. Vector Loads and Stores + +// Unit-stride loads and stores. + +// The latency for loads is (4+VLSU_MEM_LATENCY) +// The throughput for loads and stores is (VLEN/VLSU_MEM_DW)*EMUL. +foreach mx = SchedMxList in { + defvar Cycles = Andes45GetCyclesLoadStore.c; + defvar IsWorstCase = Andes45IsWorstCaseMX.c; + let Latency = !add(4, VLSU_MEM_LATENCY), ReleaseAtCycles = [Cycles] in { + defm "" : LMULWriteResMX<"WriteVLDE", [Andes45VLSU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVLDFF", [Andes45VLSU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVLDM", [Andes45VLSU], mx, IsWorstCase>; + } + let ReleaseAtCycles = [Cycles] in { + defm "" : LMULWriteResMX<"WriteVSTE", [Andes45VLSU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVSTM", [Andes45VLSU], mx, IsWorstCase>; + } +} + +// Strided loads and stores. + +// Strided loads and stores operate at one element per cycles. +// We uses the SEW to compute the number of elements for throughput. +// The latency for loads is (4+VLSU_MEM_LATENCY+(DLEN/EEW)). +// The throughput for loads and stores is VL. +foreach mx = SchedMxList in { + defvar Cycles = Andes45GetCyclesOnePerElement.c; + defvar IsWorstCase = Andes45IsWorstCaseMX.c; + let Latency = !add(4, !add(VLSU_MEM_LATENCY, !div(Andes45DLEN, 8))), + ReleaseAtCycles = [Cycles] in + defm "" : LMULWriteResMX<"WriteVLDS8", [Andes45VLSU], mx, IsWorstCase>; + let ReleaseAtCycles = [Cycles] in + defm "" : LMULWriteResMX<"WriteVSTS8", [Andes45VLSU], mx, IsWorstCase>; +} +foreach mx = SchedMxList in { + defvar Cycles = Andes45GetCyclesOnePerElement.c; + defvar IsWorstCase = Andes45IsWorstCaseMX.c; + let Latency = !add(4, !add(VLSU_MEM_LATENCY, !div(Andes45DLEN, 16))), + ReleaseAtCycles = [Cycles] in + defm "" : LMULWriteResMX<"WriteVLDS16", [Andes45VLSU], mx, IsWorstCase>; + let ReleaseAtCycles = [Cycles] in + defm "" : LMULWriteResMX<"WriteVSTS16", [Andes45VLSU], mx, IsWorstCase>; +} +foreach mx = SchedMxList in { + defvar Cycles = Andes45GetCyclesOnePerElement.c; + defvar IsWorstCase = Andes45IsWorstCaseMX.c; + let Latency = !add(4, !add(VLSU_MEM_LATENCY, !div(Andes45DLEN, 32))), + ReleaseAtCycles = [Cycles] in + defm "" : LMULWriteResMX<"WriteVLDS32", [Andes45VLSU], mx, IsWorstCase>; + let ReleaseAtCycles = [Cycles] in + defm "" : LMULWriteResMX<"WriteVSTS32", [Andes45VLSU], mx, IsWorstCase>; +} +foreach mx = SchedMxList in { + defvar Cycles = Andes45GetCyclesOnePerElement.c; + defvar IsWorstCase = Andes45IsWorstCaseMX.c; + let Latency = !add(4, !add(VLSU_MEM_LATENCY, !div(Andes45DLEN, 64))), + ReleaseAtCycles = [Cycles] in + defm "" : LMULWriteResMX<"WriteVLDS64", [Andes45VLSU], mx, IsWorstCase>; + let ReleaseAtCycles = [Cycles] in + defm "" : LMULWriteResMX<"WriteVSTS64", [Andes45VLSU], mx, IsWorstCase>; +} + +// Indexed loads and stores + +// Indexed loads and stores operate at one element per cycles. +// We uses the SEW to compute the number of elements for throughput. +// The latency for loads is (5+VLSU_MEM_LATENCY+(DLEN/EEW)). +// The throughput for loads and stores is (VL+EMUL-1). +foreach mx = SchedMxList in { + defvar Cycles = Andes45GetCyclesOnePerElement.c; + defvar IsWorstCase = Andes45IsWorstCaseMX.c; + let Latency = !add(5, !add(VLSU_MEM_LATENCY, !div(Andes45DLEN, 8))), + ReleaseAtCycles = [!add(Cycles, !sub(Andes45GetLMULValue.c, 1))] in { + defm "" : LMULWriteResMX<"WriteVLDUX8", [Andes45VLSU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVLDOX8", [Andes45VLSU], mx, IsWorstCase>; + } + let ReleaseAtCycles = [!add(Cycles, !sub(Andes45GetLMULValue.c, 1))] in { + defm "" : LMULWriteResMX<"WriteVSTUX8", [Andes45VLSU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVSTOX8", [Andes45VLSU], mx, IsWorstCase>; + } +} +foreach mx = SchedMxList in { + defvar Cycles = Andes45GetCyclesOnePerElement.c; + defvar IsWorstCase = Andes45IsWorstCaseMX.c; + let Latency = !add(5, !add(VLSU_MEM_LATENCY, !div(Andes45DLEN, 16))), + ReleaseAtCycles = [!add(Cycles, !sub(Andes45GetLMULValue.c, 1))] in { + defm "" : LMULWriteResMX<"WriteVLDUX16", [Andes45VLSU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVLDOX16", [Andes45VLSU], mx, IsWorstCase>; + } + let ReleaseAtCycles = [!add(Cycles, !sub(Andes45GetLMULValue.c, 1))] in { + defm "" : LMULWriteResMX<"WriteVSTUX16", [Andes45VLSU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVSTOX16", [Andes45VLSU], mx, IsWorstCase>; + } +} +foreach mx = SchedMxList in { + defvar Cycles = Andes45GetCyclesOnePerElement.c; + defvar IsWorstCase = Andes45IsWorstCaseMX.c; + let Latency = !add(5, !add(VLSU_MEM_LATENCY, !div(Andes45DLEN, 32))), + ReleaseAtCycles = [!add(Cycles, !sub(Andes45GetLMULValue.c, 1))] in { + defm "" : LMULWriteResMX<"WriteVLDUX32", [Andes45VLSU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVLDOX32", [Andes45VLSU], mx, IsWorstCase>; + } + let ReleaseAtCycles = [!add(Cycles, !sub(Andes45GetLMULValue.c, 1))] in { + defm "" : LMULWriteResMX<"WriteVSTUX32", [Andes45VLSU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVSTOX32", [Andes45VLSU], mx, IsWorstCase>; + } +} +foreach mx = SchedMxList in { + defvar Cycles = Andes45GetCyclesOnePerElement.c; + defvar IsWorstCase = Andes45IsWorstCaseMX.c; + let Latency = !add(5, !add(VLSU_MEM_LATENCY, !div(Andes45DLEN, 64))), + ReleaseAtCycles = [!add(Cycles, !sub(Andes45GetLMULValue.c, 1))] in { + defm "" : LMULWriteResMX<"WriteVLDUX64", [Andes45VLSU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVLDOX64", [Andes45VLSU], mx, IsWorstCase>; + } + let ReleaseAtCycles = [!add(Cycles, !sub(Andes45GetLMULValue.c, 1))] in { + defm "" : LMULWriteResMX<"WriteVSTUX64", [Andes45VLSU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVSTOX64", [Andes45VLSU], mx, IsWorstCase>; + } +} + +// TODO: Please confirm again the throughput and latency for load/store +// whole register +// VLD*R is LMUL aware +let Latency = 6, ReleaseAtCycles = [2] in + def : WriteRes; +let Latency = 6, ReleaseAtCycles = [4] in + def : WriteRes; +let Latency = 6, ReleaseAtCycles = [8] in + def : WriteRes; +let Latency = 6, ReleaseAtCycles = [16] in + def : WriteRes; +// VST*R is LMUL aware +let ReleaseAtCycles = [2] in + def : WriteRes; +let ReleaseAtCycles = [4] in + def : WriteRes; +let ReleaseAtCycles = [8] in + def : WriteRes; +let ReleaseAtCycles = [16] in + def : WriteRes; + +// Unit-Stride Segmented Loads and Stores + +// The latency for loads is (4+VLSU_MEM_LATENCY+EMUL* NFIELDS+2) +// The throughput for loads and stores is (VLEN/VLSU_MEM_DW)*EMUL*NFIELDS. +foreach mx = SchedMxList in { + foreach eew = [8, 16, 32, 64] in { + defvar Cycles = Andes45GetCyclesLoadStore.c; + defvar IsWorstCase = Andes45IsWorstCaseMX.c; + foreach nf=2-8 in { + defvar Size = !mul(Andes45GetLMULValue.c, nf); + let Latency = !add(4, !add(VLSU_MEM_LATENCY, !add(Size, 2))), + ReleaseAtCycles = [!mul(Cycles, nf)] in { + defm "" : LMULWriteResMX<"WriteVLSEG" # nf # "e" # eew, + [Andes45VLSU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVLSEGFF" # nf # "e" # eew, + [Andes45VLSU], mx, IsWorstCase>; + } + // TODO + let ReleaseAtCycles = [Cycles] in + defm "" : LMULWriteResMX<"WriteVSSEG" # nf # "e" # eew, + [Andes45VLSU], mx, IsWorstCase>; + } + } +} + +// Strided Segmented Loads and Stores + +// The latency for loads is (5+VLSU_MEM_LATENCY+(DLEN/EEW)) +// The throughput for loads and stores is VL. +foreach mx = SchedMxList in { + foreach nf=2-8 in { + foreach eew = [8, 16, 32, 64] in { + defvar Cycles = Andes45GetCyclesOnePerElement.c; + defvar IsWorstCase = Andes45IsWorstCaseMX.c; + let Latency = !add(5, !add(VLSU_MEM_LATENCY, !div(Andes45DLEN, eew))), + ReleaseAtCycles = [Cycles] in + defm "" : LMULWriteResMX<"WriteVLSSEG" # nf # "e" # eew, + [Andes45VLSU], mx, IsWorstCase>; + let ReleaseAtCycles = [Cycles] in + defm "" : LMULWriteResMX<"WriteVSSSEG" # nf # "e" # eew, + [Andes45VLSU], mx, IsWorstCase>; + } + } +} + +// Indexed Segmented Loads and Stores + +// The latency for loads is (6+VLSU_MEM_LATENCY+(DLEN/EEW)) +// The throughput for loads and stores is (VL+EMUL-1). +foreach mx = SchedMxList in { + foreach nf=2-8 in { + foreach eew = [8, 16, 32, 64] in { + defvar Cycles = Andes45GetCyclesOnePerElement.c; + defvar IsWorstCase = Andes45IsWorstCaseMX.c; + let Latency = !add(6, !add(VLSU_MEM_LATENCY, !div(Andes45DLEN, eew))), + ReleaseAtCycles = [!add(Cycles, !sub(Andes45GetLMULValue.c, 1))] in { + defm "" : LMULWriteResMX<"WriteVLUXSEG" # nf # "e" # eew, + [Andes45VLSU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVLOXSEG" # nf # "e" # eew, + [Andes45VLSU], mx, IsWorstCase>; + } + let ReleaseAtCycles = [!add(Cycles, !sub(Andes45GetLMULValue.c, 1))] in { + defm "" : LMULWriteResMX<"WriteVSUXSEG" # nf # "e" # eew, + [Andes45VLSU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVSOXSEG" # nf # "e" # eew, + [Andes45VLSU], mx, IsWorstCase>; + } + } + } +} + +// 11. Vector Integer Arithmetic Instructions +foreach mx = SchedMxList in { + defvar Cycles = Andes45GetCyclesDefault.c; + defvar IsWorstCase = Andes45IsWorstCaseMX.c; + let Latency = 2, ReleaseAtCycles = [Cycles] in { + defm "" : LMULWriteResMX<"WriteVIALUV", [Andes45VALU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVIALUX", [Andes45VALU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVIALUI", [Andes45VALU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVICALUV", [Andes45VALU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVICALUX", [Andes45VALU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVICALUI", [Andes45VALU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVICALUMV", [Andes45VALU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVICALUMX", [Andes45VALU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVICALUMI", [Andes45VALU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVShiftV", [Andes45VALU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVShiftX", [Andes45VALU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVShiftI", [Andes45VALU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVIMinMaxV", [Andes45VALU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVIMinMaxX", [Andes45VALU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVIMergeV", [Andes45VALU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVIMergeX", [Andes45VALU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVIMergeI", [Andes45VALU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVIMovV", [Andes45VALU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVIMovX", [Andes45VALU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVIMovI", [Andes45VALU], mx, IsWorstCase>; + } + // Mask results can't chain. + let Latency = !add(Cycles, 2), + ReleaseAtCycles = [!add(Cycles, !ne(Andes45VLEN, Andes45DLEN))] in { + defm "" : LMULWriteResMX<"WriteVICmpV", [Andes45VALU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVICmpX", [Andes45VALU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVICmpI", [Andes45VALU], mx, IsWorstCase>; + } +} + +foreach mx = SchedMxList in { + defvar Cycles = Andes45GetCyclesDefault.c; + defvar IsWorstCase = Andes45IsWorstCaseMX.c; + let Latency = 4, ReleaseAtCycles = [Cycles] in { + defm "" : LMULWriteResMX<"WriteVIMulV", [Andes45VMAC], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVIMulX", [Andes45VMAC], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVIMulAddV", [Andes45VMAC], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVIMulAddX", [Andes45VMAC], mx, IsWorstCase>; + } +} + +foreach mx = SchedMxList in { + foreach sew = SchedSEWSet.val in { + defvar Latency = Andes45GetLatecyDiv.c; + defvar Cycles = Andes45GetCyclesDiv.c; + defvar IsWorstCase = Andes45IsWorstCaseMXSEW.c; + let Latency = Latency, ReleaseAtCycles = [Cycles] in { + defm "" : LMULSEWWriteResMXSEW<"WriteVIDivV", [Andes45VDIV], mx, sew, IsWorstCase>; + defm "" : LMULSEWWriteResMXSEW<"WriteVIDivX", [Andes45VDIV], mx, sew, IsWorstCase>; + } + } +} + +foreach mx = SchedMxList in { + defvar Cycles = Andes45GetCyclesDefault.c; + defvar IsWorstCase = Andes45IsWorstCaseMX.c; + let Latency = 2, ReleaseAtCycles = [Cycles] in { + defm "" : LMULWriteResMX<"WriteVExtV", [Andes45VPERMUT], mx, IsWorstCase>; + } +} + +// Widening +foreach mx = SchedMxListW in { + defvar Cycles = Andes45GetCyclesWidening.c; + defvar IsWorstCase = Andes45IsWorstCaseMX.c; + let Latency = 2, ReleaseAtCycles = [Cycles] in { + defm "" : LMULWriteResMX<"WriteVIWALUV", [Andes45VALU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVIWALUX", [Andes45VALU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVIWALUI", [Andes45VALU], mx, IsWorstCase>; + } + let Latency = 4, ReleaseAtCycles = [Cycles] in { + defm "" : LMULWriteResMX<"WriteVIWMulV", [Andes45VMAC], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVIWMulX", [Andes45VMAC], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVIWMulAddV", [Andes45VMAC], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVIWMulAddX", [Andes45VMAC], mx, IsWorstCase>; + } +} + +// Narrowing +foreach mx = SchedMxListW in { + defvar Latency = Andes45GetLatencyNarrowing.c; + defvar Cycles = Andes45GetCyclesNarrowing.c; + defvar IsWorstCase = Andes45IsWorstCaseMX.c; + let Latency = Latency, ReleaseAtCycles = [Cycles] in { + defm "" : LMULWriteResMX<"WriteVNShiftV", [Andes45VALU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVNShiftX", [Andes45VALU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVNShiftI", [Andes45VALU], mx, IsWorstCase>; + } +} + +// 12. Vector Fixed-Point Arithmetic Instructions +foreach mx = SchedMxList in { + defvar Cycles = Andes45GetCyclesDefault.c; + defvar IsWorstCase = Andes45IsWorstCaseMX.c; + let Latency = 2, ReleaseAtCycles = [Cycles] in { + defm "" : LMULWriteResMX<"WriteVSALUV", [Andes45VALU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVSALUX", [Andes45VALU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVSALUI", [Andes45VALU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVAALUV", [Andes45VALU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVAALUX", [Andes45VALU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVSShiftV", [Andes45VALU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVSShiftX", [Andes45VALU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVSShiftI", [Andes45VALU], mx, IsWorstCase>; + } + let Latency = 4, ReleaseAtCycles = [Cycles] in { + defm "" : LMULWriteResMX<"WriteVSMulV", [Andes45VMAC], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVSMulX", [Andes45VMAC], mx, IsWorstCase>; + } +} + +// Narrowing +foreach mx = SchedMxListW in { + defvar Latency = Andes45GetLatencyNarrowing.c; + defvar Cycles = Andes45GetCyclesNarrowing.c; + defvar IsWorstCase = Andes45IsWorstCaseMX.c; + let Latency = Latency, ReleaseAtCycles = [Cycles] in { + defm "" : LMULWriteResMX<"WriteVNClipV", [Andes45VALU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVNClipX", [Andes45VALU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVNClipI", [Andes45VALU], mx, IsWorstCase>; + } +} + +// 13. Vector Floating-Point Instructions +foreach mx = SchedMxListF in { + defvar Cycles = Andes45GetCyclesDefault.c; + foreach sew = SchedSEWSet.val in { + defvar IsWorstCase = Andes45IsWorstCaseMXSEW.c; + // The latency for the CPU configured as 'FP32' is 5, and as 'FP32+FP64' is 6. + // In most cases, CPU would be configured as "FP32+FP64". + let Latency = 6, ReleaseAtCycles = [Cycles] in { + defm "" : LMULSEWWriteResMXSEW<"WriteVFALUV", [Andes45VMAC], mx, sew, IsWorstCase>; + defm "" : LMULSEWWriteResMXSEW<"WriteVFALUF", [Andes45VMAC], mx, sew, IsWorstCase>; + defm "" : LMULSEWWriteResMXSEW<"WriteVFMulV", [Andes45VMAC], mx, sew, IsWorstCase>; + defm "" : LMULSEWWriteResMXSEW<"WriteVFMulF", [Andes45VMAC], mx, sew, IsWorstCase>; + defm "" : LMULSEWWriteResMXSEW<"WriteVFMulAddV", [Andes45VMAC], mx, sew, IsWorstCase>; + defm "" : LMULSEWWriteResMXSEW<"WriteVFMulAddF", [Andes45VMAC], mx, sew, IsWorstCase>; + } + let Latency = 3, ReleaseAtCycles = [Cycles] in { + defm "" : LMULSEWWriteResMXSEW<"WriteVFCvtIToFV", [Andes45VFMIS], mx, sew, IsWorstCase>; + } + let Latency = 2, ReleaseAtCycles = [Cycles] in { + defm "" : LMULSEWWriteResMXSEW<"WriteVFSgnjV", [Andes45VFMIS], mx, sew, IsWorstCase>; + defm "" : LMULSEWWriteResMXSEW<"WriteVFSgnjF", [Andes45VFMIS], mx, sew, IsWorstCase>; + defm "" : LMULSEWWriteResMXSEW<"WriteVFMinMaxV", [Andes45VFMIS], mx, sew, IsWorstCase>; + defm "" : LMULSEWWriteResMXSEW<"WriteVFMinMaxF", [Andes45VFMIS], mx, sew, IsWorstCase>; + } + } +} + +foreach mx = SchedMxList in { + defvar Cycles = Andes45GetCyclesDefault.c; + defvar IsWorstCase = Andes45IsWorstCaseMX.c; + let Latency = 3, ReleaseAtCycles = [Cycles] in { + defm "" : LMULWriteResMX<"WriteVFCvtFToIV", [Andes45VFMIS], mx, IsWorstCase>; + } + let Latency = 2, ReleaseAtCycles = [Cycles] in { + defm "" : LMULWriteResMX<"WriteVFClassV", [Andes45VFMIS], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVFMergeV", [Andes45VFMIS], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVFMovV", [Andes45VFMIS], mx, IsWorstCase>; + } + // Mask results can't chain. + let Latency = !add(Cycles, 2), + ReleaseAtCycles = [!add(Cycles, !ne(Andes45VLEN, Andes45DLEN))] in { + defm "" : LMULWriteResMX<"WriteVFCmpV", [Andes45VFMIS], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVFCmpF", [Andes45VFMIS], mx, IsWorstCase>; + } +} + +foreach mx = SchedMxListF in { + defvar Cycles = !mul(Andes45GetCyclesDefault.c, 6); + foreach sew = SchedSEWSet.val in { + defvar IsWorstCase = Andes45IsWorstCaseMXSEW.c; + let Latency = 7, ReleaseAtCycles = [Cycles] in { + defm "" : LMULSEWWriteResMXSEW<"WriteVFRecpV", [Andes45VFDIV], mx, sew, IsWorstCase>; + } + } +} + +foreach mx = SchedMxListF in { + foreach sew = SchedSEWSet.val in { + defvar Factor = Andes45GetFDivFactor.c; + defvar Cycles = !mul(Andes45GetCyclesDefault.c, Factor); + defvar IsWorstCase = Andes45IsWorstCaseMXSEW.c; + let Latency = !add(2, Factor), ReleaseAtCycles = [Cycles] in { + defm "" : LMULSEWWriteResMXSEW<"WriteVFDivV", [Andes45VFDIV], mx, sew, IsWorstCase>; + defm "" : LMULSEWWriteResMXSEW<"WriteVFDivF", [Andes45VFDIV], mx, sew, IsWorstCase>; + } + } +} + +foreach mx = SchedMxListF in { + foreach sew = SchedSEWSet.val in { + defvar Factor = Andes45GetFSqrtFactor.c; + defvar Cycles = !mul(Andes45GetCyclesDefault.c, Factor); + defvar IsWorstCase = Andes45IsWorstCaseMXSEW.c; + let Latency = !add(2, Factor), ReleaseAtCycles = [Cycles] in { + defm "" : LMULSEWWriteResMXSEW<"WriteVFSqrtV", [Andes45VFDIV], mx, sew, IsWorstCase>; + } + } +} + +// Widening +foreach mx = SchedMxListW in { + defvar Cycles = Andes45GetCyclesWidening.c; + foreach sew = SchedSEWSet.val in { + defvar IsWorstCase = Andes45IsWorstCaseMXSEW.c; + let Latency = 3, ReleaseAtCycles = [Cycles] in { + defm "" : LMULSEWWriteResMXSEW<"WriteVFWCvtIToFV", [Andes45VFMIS], mx, sew, IsWorstCase>; + } + } +} +foreach mx = SchedMxListFW in { + defvar Cycles = Andes45GetCyclesWidening.c; + foreach sew = SchedSEWSet.val in { + defvar IsWorstCase = Andes45IsWorstCaseMXSEW.c; + // The latency for the CPU configured as 'FP32' is 5, and as 'FP32+FP64' is 6. + // In most cases, CPU would be configured as "FP32+FP64". + let Latency = 6, ReleaseAtCycles = [Cycles] in { + defm "" : LMULSEWWriteResMXSEW<"WriteVFWALUV", [Andes45VMAC], mx, sew, IsWorstCase>; + defm "" : LMULSEWWriteResMXSEW<"WriteVFWALUF", [Andes45VMAC], mx, sew, IsWorstCase>; + defm "" : LMULSEWWriteResMXSEW<"WriteVFWMulV", [Andes45VMAC], mx, sew, IsWorstCase>; + defm "" : LMULSEWWriteResMXSEW<"WriteVFWMulF", [Andes45VMAC], mx, sew, IsWorstCase>; + defm "" : LMULSEWWriteResMXSEW<"WriteVFWMulAddV", [Andes45VMAC], mx, sew, IsWorstCase>; + defm "" : LMULSEWWriteResMXSEW<"WriteVFWMulAddF", [Andes45VMAC], mx, sew, IsWorstCase>; + } + let Latency = 3, ReleaseAtCycles = [Cycles] in { + defm "" : LMULSEWWriteResMXSEW<"WriteVFWCvtFToFV", [Andes45VFMIS], mx, sew, IsWorstCase>; + } + } + + defvar IsWorstCase = Andes45IsWorstCaseMX.c; + let Latency = 3, ReleaseAtCycles = [Cycles] in { + defm "" : LMULWriteResMX<"WriteVFWCvtFToIV", [Andes45VFMIS], mx, IsWorstCase>; + } +} + +// Narrowing +foreach mx = SchedMxListW in { + defvar Latency = !add(Andes45GetLatencyNarrowing.c, 1); + defvar Cycles = Andes45GetCyclesNarrowing.c; + defvar IsWorstCase = Andes45IsWorstCaseMX.c; + let Latency = Latency, ReleaseAtCycles = [Cycles] in { + defm "" : LMULWriteResMX<"WriteVFNCvtFToIV", [Andes45VFMIS], mx, IsWorstCase>; + } +} +foreach mx = SchedMxListFW in { + defvar Cycles = Andes45GetCyclesNarrowing.c; + foreach sew = SchedSEWSet.val in { + defvar Latency = !add(Andes45GetLatencyNarrowing.c, 1); + defvar IsWorstCase = Andes45IsWorstCaseMXSEW.c; + let Latency = Latency, ReleaseAtCycles = [Cycles] in { + defm "" : LMULSEWWriteResMXSEW<"WriteVFNCvtIToFV", [Andes45VFMIS], mx, sew, IsWorstCase>; + defm "" : LMULSEWWriteResMXSEW<"WriteVFNCvtFToFV", [Andes45VFMIS], mx, sew, IsWorstCase>; + } + } +} + +// 14. Vector Reduction Operations +foreach mx = SchedMxList in { + foreach sew = SchedSEWSet.val in { + defvar Cycles = Andes45GetReductionCycles.c; + defvar IsWorstCase = Andes45IsWorstCaseMXSEW.c; + let Latency = !add(Cycles, 1), + ReleaseAtCycles = [!add(Cycles, !ne(Andes45VLEN, Andes45DLEN))] in { + defm "" : LMULSEWWriteResMXSEW<"WriteVIRedV_From", [Andes45VALU], + mx, sew, IsWorstCase>; + defm "" : LMULSEWWriteResMXSEW<"WriteVIRedMinMaxV_From", [Andes45VALU], + mx, sew, IsWorstCase>; + } + } +} + +foreach mx = SchedMxListWRed in { + foreach sew = SchedSEWSet.val in { + defvar Cycles = Andes45GetReductionCyclesWidening.c; + defvar IsWorstCase = Andes45IsWorstCaseMXSEW.c; + let Latency = !add(Cycles, 1), + ReleaseAtCycles = [!add(Cycles, !ne(Andes45VLEN, Andes45DLEN))] in + defm "" : LMULSEWWriteResMXSEW<"WriteVIWRedV_From", [Andes45VALU], + mx, sew, IsWorstCase>; + } +} + +foreach mx = SchedMxListF in { + foreach sew = SchedSEWSet.val in { + defvar Cycles = Andes45GetFReductionCycles.c; + defvar IsWorstCase = Andes45IsWorstCaseMXSEW.c; + // 4*vfredusum-micro-ops+2 + let Latency = !add(!mul(4, Cycles), 2), + // 1+4*(vfredusum-micro-ops-1)+(VLEN!=DLEN) + ReleaseAtCycles = [!add(1, !add(!mul(4, !sub(Cycles, 1)), !ne(Andes45VLEN, Andes45DLEN)))] in + defm "" : LMULSEWWriteResMXSEW<"WriteVFRedV_From", [Andes45VMAC], + mx, sew, IsWorstCase>; + } +} + +foreach mx = SchedMxListF in { + foreach sew = SchedSEWSet.val in { + defvar Cycles = Andes45GetOrderedFReductionCycles.c; + defvar IsWorstCase = Andes45IsWorstCaseMXSEW.c; + // 4*vfredosum-micro-ops+2 + let Latency = !add(!mul(4, Cycles), 2), + // 1+4*(vfredosum-micro-ops-1)+(VLEN!=DLEN) + ReleaseAtCycles = [!add(1, !add(!mul(4, !sub(Cycles, 1)), !ne(Andes45VLEN, Andes45DLEN)))] in + defm "" : LMULSEWWriteResMXSEW<"WriteVFRedOV_From", [Andes45VMAC], + mx, sew, IsWorstCase>; + } +} + +foreach mx = SchedMxListF in { + foreach sew = SchedSEWSet.val in { + defvar Cycles = Andes45GetReductionCycles.c; + defvar IsWorstCase = Andes45IsWorstCaseMXSEW.c; + let Latency = !add(Cycles, 1), + ReleaseAtCycles = [!add(Cycles, !ne(Andes45VLEN, Andes45DLEN))] in + defm "" : LMULSEWWriteResMXSEW<"WriteVFRedMinMaxV_From", [Andes45VFMIS], + mx, sew, IsWorstCase>; + } +} + +foreach mx = SchedMxListFWRed in { + foreach sew = SchedSEWSet.val in { + defvar Cycles = Andes45GetFWReductionCycles.c; + defvar IsWorstCase = Andes45IsWorstCaseMXSEW.c; + // 4*vfwredusum-micro-ops+2 + let Latency = !add(!mul(4, Cycles), 2), + // 1+4*(vfwredusum-micro-ops-1)+(VLEN!=DLEN) + ReleaseAtCycles = [!add(1, !add(!mul(4, !sub(Cycles, 1)), !ne(Andes45VLEN, Andes45DLEN)))] in + defm "" : LMULSEWWriteResMXSEW<"WriteVFWRedV_From", [Andes45VMAC], + mx, sew, IsWorstCase>; + } +} + +foreach mx = SchedMxListFWRed in { + foreach sew = SchedSEWSet.val in { + defvar Cycles = Andes45GetOrderedFWReductionCycles.c; + defvar IsWorstCase = Andes45IsWorstCaseMXSEW.c; + // 4*vfwredosum-micro-ops+2 + let Latency = !add(!mul(4, Cycles), 2), + // 1+4*(vfwredosum-micro-ops-1)+(VLEN != DLEN) + ReleaseAtCycles = [!add(1, !add(!mul(4, !sub(Cycles, 1)), !ne(Andes45VLEN, Andes45DLEN)))] in + defm "" : LMULSEWWriteResMXSEW<"WriteVFWRedOV_From", [Andes45VMAC], + mx, sew, IsWorstCase>; + } +} + +// 15. Vector Mask Instructions +foreach mx = SchedMxList in { + defvar IsWorstCase = Andes45IsWorstCaseMX.c; + let Latency = 3, ReleaseAtCycles = [Andes45VLEN_DLEN_RATIO] in { + defm "" : LMULWriteResMX<"WriteVMALUV", [Andes45VMASK], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVMSFSV", [Andes45VMASK], mx, IsWorstCase>; + } + let Latency = !add(3, !ne(Andes45VLEN, Andes45DLEN)), + ReleaseAtCycles = [Andes45VLEN_DLEN_RATIO] in { + defm "" : LMULWriteResMX<"WriteVMPopV", [Andes45VMASK], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVMFFSV", [Andes45VMASK], mx, IsWorstCase>; + } +} +// TODO: viota and vid have different latency and throughput if VLEN/DLEN=2. +foreach mx = SchedMxList in { + defvar Cycles = Andes45GetCyclesDefault.c; + defvar IsWorstCase = Andes45IsWorstCaseMX.c; + let Latency = 4, ReleaseAtCycles = [Cycles] in { + defm "" : LMULWriteResMX<"WriteVIotaV", [Andes45VMASK], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVIdxV", [Andes45VMASK], mx, IsWorstCase>; + } +} + +// 16. Vector Permutation Instructions +let Latency = 2, ReleaseAtCycles = [Andes45VLEN_DLEN_RATIO] in { + def : WriteRes; + def : WriteRes; +} +let Latency = 3, ReleaseAtCycles = [Andes45VLEN_DLEN_RATIO] in { + def : WriteRes; + def : WriteRes; +} + +// TODO: +foreach mx = SchedMxList in { + defvar Cycles = Andes45GetCyclesDefault.c; + defvar IsWorstCase = Andes45IsWorstCaseMX.c; + let Latency = Cycles, ReleaseAtCycles = [Cycles] in { + defm "" : LMULWriteResMX<"WriteVRGatherVX", [Andes45VPERMUT], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVRGatherVI", [Andes45VPERMUT], mx, IsWorstCase>; + } +} + +// TODO: +foreach mx = SchedMxList in { + foreach sew = SchedSEWSet.val in { + defvar Cycles = Andes45GetCyclesDefault.c; + defvar IsWorstCase = Andes45IsWorstCaseMXSEW.c; + let Latency = Cycles, ReleaseAtCycles = [Cycles] in { + defm "" : LMULSEWWriteResMXSEW<"WriteVRGatherVV", [Andes45VPERMUT], mx, sew, IsWorstCase>; + defm "" : LMULSEWWriteResMXSEW<"WriteVRGatherEI16VV", [Andes45VPERMUT], mx, sew, IsWorstCase>; + defm "" : LMULSEWWriteResMXSEW<"WriteVCompressV", [Andes45VPERMUT], mx, sew, IsWorstCase>; + } + } +} + +// TODO: +foreach mx = SchedMxList in { + defvar Cycles = Andes45GetCyclesDefault.c; + defvar IsWorstCase = Andes45IsWorstCaseMX.c; + let Latency = Cycles, ReleaseAtCycles = [Cycles] in { + defm "" : LMULWriteResMX<"WriteVSlideUpX", [Andes45VPERMUT], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVSlideDownX", [Andes45VPERMUT], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVSlideI", [Andes45VPERMUT], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVISlide1X", [Andes45VPERMUT], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVFSlide1F", [Andes45VPERMUT], mx, IsWorstCase>; + } +} + +// VMov*V is LMUL Aware +let Latency = 2, ReleaseAtCycles = [!mul(Andes45VLEN_DLEN_RATIO, 1)] in + def : WriteRes; +let Latency = 2, ReleaseAtCycles = [!mul(Andes45VLEN_DLEN_RATIO, 2)] in + def : WriteRes; +let Latency = 2, ReleaseAtCycles = [!mul(Andes45VLEN_DLEN_RATIO, 4)] in + def : WriteRes; +let Latency = 2, ReleaseAtCycles = [!mul(Andes45VLEN_DLEN_RATIO, 8)] in + def : WriteRes; + +// Others +def : WriteRes; + +// 6. Configuration-Setting Instructions +def : ReadAdvance; +def : ReadAdvance; + +// 7. Vector Loads and Stores +def : ReadAdvance; +def : ReadAdvance; +defm "" : LMULReadAdvance<"ReadVSTEV", 0>; +defm "" : LMULReadAdvance<"ReadVSTM", 0>; +def : ReadAdvance; +def : ReadAdvance; +defm "" : LMULReadAdvance<"ReadVSTS8V", 0>; +defm "" : LMULReadAdvance<"ReadVSTS16V", 0>; +defm "" : LMULReadAdvance<"ReadVSTS32V", 0>; +defm "" : LMULReadAdvance<"ReadVSTS64V", 0>; +defm "" : LMULReadAdvance<"ReadVLDUXV", 0>; +defm "" : LMULReadAdvance<"ReadVLDOXV", 0>; +defm "" : LMULReadAdvance<"ReadVSTUX8", 0>; +defm "" : LMULReadAdvance<"ReadVSTUX16", 0>; +defm "" : LMULReadAdvance<"ReadVSTUX32", 0>; +defm "" : LMULReadAdvance<"ReadVSTUX64", 0>; +defm "" : LMULReadAdvance<"ReadVSTUXV", 0>; +defm "" : LMULReadAdvance<"ReadVSTUX8V", 0>; +defm "" : LMULReadAdvance<"ReadVSTUX16V", 0>; +defm "" : LMULReadAdvance<"ReadVSTUX32V", 0>; +defm "" : LMULReadAdvance<"ReadVSTUX64V", 0>; +defm "" : LMULReadAdvance<"ReadVSTOX8", 0>; +defm "" : LMULReadAdvance<"ReadVSTOX16", 0>; +defm "" : LMULReadAdvance<"ReadVSTOX32", 0>; +defm "" : LMULReadAdvance<"ReadVSTOX64", 0>; +defm "" : LMULReadAdvance<"ReadVSTOXV", 0>; +defm "" : LMULReadAdvance<"ReadVSTOX8V", 0>; +defm "" : LMULReadAdvance<"ReadVSTOX16V", 0>; +defm "" : LMULReadAdvance<"ReadVSTOX32V", 0>; +defm "" : LMULReadAdvance<"ReadVSTOX64V", 0>; +// LMUL Aware +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; + +// 11. Vector Integer Arithmetic Instructions +defm : LMULReadAdvance<"ReadVIALUV", 0>; +defm : LMULReadAdvance<"ReadVIALUX", 0>; +defm : LMULReadAdvanceW<"ReadVIWALUV", 0>; +defm : LMULReadAdvanceW<"ReadVIWALUX", 0>; +defm : LMULReadAdvance<"ReadVExtV", 0>; +defm : LMULReadAdvance<"ReadVICALUV", 0>; +defm : LMULReadAdvance<"ReadVICALUX", 0>; +defm : LMULReadAdvance<"ReadVShiftV", 0>; +defm : LMULReadAdvance<"ReadVShiftX", 0>; +defm : LMULReadAdvanceW<"ReadVNShiftV", 0>; +defm : LMULReadAdvanceW<"ReadVNShiftX", 0>; +defm : LMULReadAdvance<"ReadVICmpV", 0>; +defm : LMULReadAdvance<"ReadVICmpX", 0>; +defm : LMULReadAdvance<"ReadVIMinMaxV", 0>; +defm : LMULReadAdvance<"ReadVIMinMaxX", 0>; +defm : LMULReadAdvance<"ReadVIMulV", 0>; +defm : LMULReadAdvance<"ReadVIMulX", 0>; +defm : LMULSEWReadAdvance<"ReadVIDivV", 0>; +defm : LMULSEWReadAdvance<"ReadVIDivX", 0>; +defm : LMULReadAdvanceW<"ReadVIWMulV", 0>; +defm : LMULReadAdvanceW<"ReadVIWMulX", 0>; +defm : LMULReadAdvance<"ReadVIMulAddV", 0>; +defm : LMULReadAdvance<"ReadVIMulAddX", 0>; +defm : LMULReadAdvanceW<"ReadVIWMulAddV", 0>; +defm : LMULReadAdvanceW<"ReadVIWMulAddX", 0>; +defm : LMULReadAdvance<"ReadVIMergeV", 0>; +defm : LMULReadAdvance<"ReadVIMergeX", 0>; +defm : LMULReadAdvance<"ReadVIMovV", 0>; +defm : LMULReadAdvance<"ReadVIMovX", 0>; + +// 12. Vector Fixed-Point Arithmetic Instructions +defm "" : LMULReadAdvance<"ReadVSALUV", 0>; +defm "" : LMULReadAdvance<"ReadVSALUX", 0>; +defm "" : LMULReadAdvance<"ReadVAALUV", 0>; +defm "" : LMULReadAdvance<"ReadVAALUX", 0>; +defm "" : LMULReadAdvance<"ReadVSMulV", 0>; +defm "" : LMULReadAdvance<"ReadVSMulX", 0>; +defm "" : LMULReadAdvance<"ReadVSShiftV", 0>; +defm "" : LMULReadAdvance<"ReadVSShiftX", 0>; +defm "" : LMULReadAdvanceW<"ReadVNClipV", 0>; +defm "" : LMULReadAdvanceW<"ReadVNClipX", 0>; + +// 13. Vector Floating-Point Instructions +defm "" : LMULSEWReadAdvanceF<"ReadVFALUV", 0>; +defm "" : LMULSEWReadAdvanceF<"ReadVFALUF", 0>; +defm "" : LMULSEWReadAdvanceFW<"ReadVFWALUV", 0>; +defm "" : LMULSEWReadAdvanceFW<"ReadVFWALUF", 0>; +defm "" : LMULSEWReadAdvanceF<"ReadVFMulV", 0>; +defm "" : LMULSEWReadAdvanceF<"ReadVFMulF", 0>; +defm "" : LMULSEWReadAdvanceF<"ReadVFDivV", 0>; +defm "" : LMULSEWReadAdvanceF<"ReadVFDivF", 0>; +defm "" : LMULSEWReadAdvanceFW<"ReadVFWMulV", 0>; +defm "" : LMULSEWReadAdvanceFW<"ReadVFWMulF", 0>; +defm "" : LMULSEWReadAdvanceF<"ReadVFMulAddV", 0>; +defm "" : LMULSEWReadAdvanceF<"ReadVFMulAddF", 0>; +defm "" : LMULSEWReadAdvanceFW<"ReadVFWMulAddV", 0>; +defm "" : LMULSEWReadAdvanceFW<"ReadVFWMulAddF", 0>; +defm "" : LMULSEWReadAdvanceF<"ReadVFSqrtV", 0>; +defm "" : LMULSEWReadAdvanceF<"ReadVFRecpV", 0>; +defm "" : LMULSEWReadAdvanceF<"ReadVFMinMaxV", 0>; +defm "" : LMULSEWReadAdvanceF<"ReadVFMinMaxF", 0>; +defm "" : LMULSEWReadAdvanceF<"ReadVFSgnjV", 0>; +defm "" : LMULSEWReadAdvanceF<"ReadVFSgnjF", 0>; +defm "" : LMULReadAdvance<"ReadVFCmpV", 0>; +defm "" : LMULReadAdvance<"ReadVFCmpF", 0>; +defm "" : LMULReadAdvance<"ReadVFClassV", 0>; +defm "" : LMULReadAdvance<"ReadVFMergeV", 0>; +defm "" : LMULReadAdvance<"ReadVFMergeF", 0>; +defm "" : LMULReadAdvance<"ReadVFMovF", 0>; +defm "" : LMULSEWReadAdvanceF<"ReadVFCvtIToFV", 0>; +defm "" : LMULReadAdvance<"ReadVFCvtFToIV", 0>; +defm "" : LMULSEWReadAdvanceW<"ReadVFWCvtIToFV", 0>; +defm "" : LMULReadAdvanceFW<"ReadVFWCvtFToIV", 0>; +defm "" : LMULSEWReadAdvanceFW<"ReadVFWCvtFToFV", 0>; +defm "" : LMULSEWReadAdvanceFW<"ReadVFNCvtIToFV", 0>; +defm "" : LMULReadAdvanceW<"ReadVFNCvtFToIV", 0>; +defm "" : LMULSEWReadAdvanceFW<"ReadVFNCvtFToFV", 0>; + +// 14. Vector Reduction Operations +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; + +// 15. Vector Mask Instructions +defm "" : LMULReadAdvance<"ReadVMALUV", 0>; +defm "" : LMULReadAdvance<"ReadVMPopV", 0>; +defm "" : LMULReadAdvance<"ReadVMFFSV", 0>; +defm "" : LMULReadAdvance<"ReadVMSFSV", 0>; +defm "" : LMULReadAdvance<"ReadVIotaV", 0>; + +// 16. Vector Permutation Instructions +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +defm "" : LMULReadAdvance<"ReadVISlideV", 0>; +defm "" : LMULReadAdvance<"ReadVISlideX", 0>; +defm "" : LMULReadAdvance<"ReadVFSlideV", 0>; +defm "" : LMULReadAdvance<"ReadVFSlideF", 0>; +defm "" : LMULSEWReadAdvance<"ReadVRGatherVV_data", 0>; +defm "" : LMULSEWReadAdvance<"ReadVRGatherVV_index", 0>; +defm "" : LMULSEWReadAdvance<"ReadVRGatherEI16VV_data", 0>; +defm "" : LMULSEWReadAdvance<"ReadVRGatherEI16VV_index", 0>; +defm "" : LMULReadAdvance<"ReadVRGatherVX_data", 0>; +defm "" : LMULReadAdvance<"ReadVRGatherVX_index", 0>; +defm "" : LMULReadAdvance<"ReadVRGatherVI_data", 0>; +defm "" : LMULSEWReadAdvance<"ReadVCompressV", 0>; +// LMUL Aware +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; + +// Others +def : ReadAdvance; +def : ReadAdvance; +foreach mx = SchedMxList in { + def : ReadAdvance("ReadVPassthru_" # mx), 0>; + foreach sew = SchedSEWSet.val in + def : ReadAdvance("ReadVPassthru_" # mx # "_E" # sew), 0>; +} + //===----------------------------------------------------------------------===// // Unsupported extensions defm : UnsupportedSchedQ; -defm : UnsupportedSchedV; defm : UnsupportedSchedZabha; defm : UnsupportedSchedZbkb; defm : UnsupportedSchedZbkx; diff --git a/llvm/test/tools/llvm-mca/RISCV/Andes45/fpr.s b/llvm/test/tools/llvm-mca/RISCV/Andes45/fpr.s index d1ab4b3b6a7e0..1a7812e70438d 100644 --- a/llvm/test/tools/llvm-mca/RISCV/Andes45/fpr.s +++ b/llvm/test/tools/llvm-mca/RISCV/Andes45/fpr.s @@ -67,26 +67,34 @@ fcvt.s.w ft0, a0 # CHECK-NEXT: [5] - Andes45FMV # CHECK-NEXT: [6] - Andes45LSU # CHECK-NEXT: [7] - Andes45MDU +# CHECK-NEXT: [8] - Andes45VALU +# CHECK-NEXT: [9] - Andes45VDIV +# CHECK-NEXT: [10] - Andes45VFDIV +# CHECK-NEXT: [11] - Andes45VFMIS +# CHECK-NEXT: [12] - Andes45VLSU +# CHECK-NEXT: [13] - Andes45VMAC +# CHECK-NEXT: [14] - Andes45VMASK +# CHECK-NEXT: [15] - Andes45VPERMUT # CHECK: Resource pressure per iteration: -# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7] -# CHECK-NEXT: - - - 56.00 4.00 4.00 2.00 - - +# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] [14] [15] +# CHECK-NEXT: - - - 56.00 4.00 4.00 2.00 - - - - - - - - - - # CHECK: Resource pressure by instruction: -# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7] Instructions: -# CHECK-NEXT: - - - - 1.00 - - - - fadd.s ft0, fa0, fa1 -# CHECK-NEXT: - - - 19.00 - - - - - fdiv.s ft0, fa0, fa1 -# CHECK-NEXT: - - - - 1.00 - - - - fadd.s ft0, fa0, fa1 -# CHECK-NEXT: - - - - 1.00 - - - - fmul.s ft0, fa0, fa1 -# CHECK-NEXT: - - - - 1.00 - - - - fmadd.s ft0, fa0, fa1, fa2 -# CHECK-NEXT: - - - 19.00 - - - - - fdiv.s ft0, fa0, fa1 -# CHECK-NEXT: - - - 18.00 - - - - - fsqrt.s ft0, fa0 -# CHECK-NEXT: - - - - - - 1.00 - - fsgnj.s ft0, fa0, fa1 -# CHECK-NEXT: - - - - - - 1.00 - - fmv.x.w a0, fa0 -# CHECK-NEXT: - - - - - 1.00 - - - fmin.s ft0, fa0, fa1 -# CHECK-NEXT: - - - - - 1.00 - - - fclass.s a0, fa0 -# CHECK-NEXT: - - - - - 1.00 - - - feq.s a0, fa0, fa1 -# CHECK-NEXT: - - - - - 1.00 - - - fcvt.s.w ft0, a0 +# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] [14] [15] Instructions: +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - - fadd.s ft0, fa0, fa1 +# CHECK-NEXT: - - - 19.00 - - - - - - - - - - - - - fdiv.s ft0, fa0, fa1 +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - - fadd.s ft0, fa0, fa1 +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - - fmul.s ft0, fa0, fa1 +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - - fmadd.s ft0, fa0, fa1, fa2 +# CHECK-NEXT: - - - 19.00 - - - - - - - - - - - - - fdiv.s ft0, fa0, fa1 +# CHECK-NEXT: - - - 18.00 - - - - - - - - - - - - - fsqrt.s ft0, fa0 +# CHECK-NEXT: - - - - - - 1.00 - - - - - - - - - - fsgnj.s ft0, fa0, fa1 +# CHECK-NEXT: - - - - - - 1.00 - - - - - - - - - - fmv.x.w a0, fa0 +# CHECK-NEXT: - - - - - 1.00 - - - - - - - - - - - fmin.s ft0, fa0, fa1 +# CHECK-NEXT: - - - - - 1.00 - - - - - - - - - - - fclass.s a0, fa0 +# CHECK-NEXT: - - - - - 1.00 - - - - - - - - - - - feq.s a0, fa0, fa1 +# CHECK-NEXT: - - - - - 1.00 - - - - - - - - - - - fcvt.s.w ft0, a0 # CHECK: Timeline view: # CHECK-NEXT: 0123456789 0123456789 0123456789 diff --git a/llvm/test/tools/llvm-mca/RISCV/Andes45/gpr.s b/llvm/test/tools/llvm-mca/RISCV/Andes45/gpr.s index d90dce8c5c3fc..3227ecfa4a372 100644 --- a/llvm/test/tools/llvm-mca/RISCV/Andes45/gpr.s +++ b/llvm/test/tools/llvm-mca/RISCV/Andes45/gpr.s @@ -133,55 +133,63 @@ bext a0, a0, a0 # CHECK-NEXT: [5] - Andes45FMV # CHECK-NEXT: [6] - Andes45LSU # CHECK-NEXT: [7] - Andes45MDU +# CHECK-NEXT: [8] - Andes45VALU +# CHECK-NEXT: [9] - Andes45VDIV +# CHECK-NEXT: [10] - Andes45VFDIV +# CHECK-NEXT: [11] - Andes45VFMIS +# CHECK-NEXT: [12] - Andes45VLSU +# CHECK-NEXT: [13] - Andes45VMAC +# CHECK-NEXT: [14] - Andes45VMASK +# CHECK-NEXT: [15] - Andes45VPERMUT # CHECK: Resource pressure per iteration: -# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7] -# CHECK-NEXT: 10.00 11.00 1.00 - - - - 16.00 80.00 +# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] [14] [15] +# CHECK-NEXT: 10.00 11.00 1.00 - - - - 16.00 80.00 - - - - - - - - # CHECK: Resource pressure by instruction: -# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7] Instructions: -# CHECK-NEXT: - 1.00 - - - - - - - add a0, a0, a0 -# CHECK-NEXT: 1.00 - - - - - - - - sub a1, a1, a1 -# CHECK-NEXT: - 1.00 - - - - - - - addw a0, a0, a0 -# CHECK-NEXT: 1.00 - - - - - - - - subw a0, a0, a0 -# CHECK-NEXT: - 1.00 - - - - - - - slli a0, a0, 4 -# CHECK-NEXT: 1.00 - - - - - - - - slliw a0, a0, 4 -# CHECK-NEXT: - 1.00 - - - - - - - srl a0, a0, a0 -# CHECK-NEXT: 1.00 - - - - - - - - srlw a0, a0, a0 -# CHECK-NEXT: - - - - - - - - 1.00 mul a0, a0, a0 -# CHECK-NEXT: - - - - - - - - 1.00 mulw a0, a0, a0 -# CHECK-NEXT: - - - - - - - - 39.00 div a0, a0, a0 -# CHECK-NEXT: - - - - - - - - 39.00 divw a0, a0, a0 -# CHECK-NEXT: - - - - - - - 1.00 - lb a0, 4(a1) -# CHECK-NEXT: - - - - - - - 1.00 - lh a0, 4(a1) -# CHECK-NEXT: - - - - - - - 1.00 - lw a0, 4(a1) -# CHECK-NEXT: - - - - - - - 1.00 - ld a0, 4(a1) -# CHECK-NEXT: - - - - - - - 1.00 - flw fa0, 4(a1) -# CHECK-NEXT: - - - - - - - 1.00 - fld fa0, 4(a1) -# CHECK-NEXT: - - - - - - - 1.00 - sb a0, 4(a1) -# CHECK-NEXT: - - - - - - - 1.00 - sh a0, 4(a1) -# CHECK-NEXT: - - - - - - - 1.00 - sw a0, 4(a1) -# CHECK-NEXT: - - - - - - - 1.00 - sd a0, 4(a1) -# CHECK-NEXT: - - - - - - - 1.00 - amoswap.w a0, a1, (a0) -# CHECK-NEXT: - - - - - - - 1.00 - amoswap.d a0, a1, (a0) -# CHECK-NEXT: - - - - - - - 1.00 - lr.w a0, (a0) -# CHECK-NEXT: - - - - - - - 1.00 - lr.d a0, (a0) -# CHECK-NEXT: - - - - - - - 1.00 - sc.w a0, a1, (a0) -# CHECK-NEXT: - - - - - - - 1.00 - sc.d a0, a1, (a0) -# CHECK-NEXT: - - 1.00 - - - - - - csrrw a0, mstatus, zero -# CHECK-NEXT: - 1.00 - - - - - - - sh1add a0, a0, a0 -# CHECK-NEXT: 1.00 - - - - - - - - sh1add.uw a0, a0, a0 -# CHECK-NEXT: - 1.00 - - - - - - - rori a0, a0, 4 -# CHECK-NEXT: 1.00 - - - - - - - - roriw a0, a0, 4 -# CHECK-NEXT: - 1.00 - - - - - - - rol a0, a0, a0 -# CHECK-NEXT: 1.00 - - - - - - - - rolw a0, a0, a0 -# CHECK-NEXT: - 1.00 - - - - - - - clz a0, a0 -# CHECK-NEXT: 1.00 - - - - - - - - clzw a0, a0 -# CHECK-NEXT: - 1.00 - - - - - - - clmul a0, a0, a0 -# CHECK-NEXT: 1.00 - - - - - - - - bclri a0, a0, 4 -# CHECK-NEXT: - 1.00 - - - - - - - bclr a0, a0, a0 -# CHECK-NEXT: 1.00 - - - - - - - - bexti a0, a0, 4 -# CHECK-NEXT: - 1.00 - - - - - - - bext a0, a0, a0 +# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] [14] [15] Instructions: +# CHECK-NEXT: - 1.00 - - - - - - - - - - - - - - - add a0, a0, a0 +# CHECK-NEXT: 1.00 - - - - - - - - - - - - - - - - sub a1, a1, a1 +# CHECK-NEXT: - 1.00 - - - - - - - - - - - - - - - addw a0, a0, a0 +# CHECK-NEXT: 1.00 - - - - - - - - - - - - - - - - subw a0, a0, a0 +# CHECK-NEXT: - 1.00 - - - - - - - - - - - - - - - slli a0, a0, 4 +# CHECK-NEXT: 1.00 - - - - - - - - - - - - - - - - slliw a0, a0, 4 +# CHECK-NEXT: - 1.00 - - - - - - - - - - - - - - - srl a0, a0, a0 +# CHECK-NEXT: 1.00 - - - - - - - - - - - - - - - - srlw a0, a0, a0 +# CHECK-NEXT: - - - - - - - - 1.00 - - - - - - - - mul a0, a0, a0 +# CHECK-NEXT: - - - - - - - - 1.00 - - - - - - - - mulw a0, a0, a0 +# CHECK-NEXT: - - - - - - - - 39.00 - - - - - - - - div a0, a0, a0 +# CHECK-NEXT: - - - - - - - - 39.00 - - - - - - - - divw a0, a0, a0 +# CHECK-NEXT: - - - - - - - 1.00 - - - - - - - - - lb a0, 4(a1) +# CHECK-NEXT: - - - - - - - 1.00 - - - - - - - - - lh a0, 4(a1) +# CHECK-NEXT: - - - - - - - 1.00 - - - - - - - - - lw a0, 4(a1) +# CHECK-NEXT: - - - - - - - 1.00 - - - - - - - - - ld a0, 4(a1) +# CHECK-NEXT: - - - - - - - 1.00 - - - - - - - - - flw fa0, 4(a1) +# CHECK-NEXT: - - - - - - - 1.00 - - - - - - - - - fld fa0, 4(a1) +# CHECK-NEXT: - - - - - - - 1.00 - - - - - - - - - sb a0, 4(a1) +# CHECK-NEXT: - - - - - - - 1.00 - - - - - - - - - sh a0, 4(a1) +# CHECK-NEXT: - - - - - - - 1.00 - - - - - - - - - sw a0, 4(a1) +# CHECK-NEXT: - - - - - - - 1.00 - - - - - - - - - sd a0, 4(a1) +# CHECK-NEXT: - - - - - - - 1.00 - - - - - - - - - amoswap.w a0, a1, (a0) +# CHECK-NEXT: - - - - - - - 1.00 - - - - - - - - - amoswap.d a0, a1, (a0) +# CHECK-NEXT: - - - - - - - 1.00 - - - - - - - - - lr.w a0, (a0) +# CHECK-NEXT: - - - - - - - 1.00 - - - - - - - - - lr.d a0, (a0) +# CHECK-NEXT: - - - - - - - 1.00 - - - - - - - - - sc.w a0, a1, (a0) +# CHECK-NEXT: - - - - - - - 1.00 - - - - - - - - - sc.d a0, a1, (a0) +# CHECK-NEXT: - - 1.00 - - - - - - - - - - - - - - csrrw a0, mstatus, zero +# CHECK-NEXT: - 1.00 - - - - - - - - - - - - - - - sh1add a0, a0, a0 +# CHECK-NEXT: 1.00 - - - - - - - - - - - - - - - - sh1add.uw a0, a0, a0 +# CHECK-NEXT: - 1.00 - - - - - - - - - - - - - - - rori a0, a0, 4 +# CHECK-NEXT: 1.00 - - - - - - - - - - - - - - - - roriw a0, a0, 4 +# CHECK-NEXT: - 1.00 - - - - - - - - - - - - - - - rol a0, a0, a0 +# CHECK-NEXT: 1.00 - - - - - - - - - - - - - - - - rolw a0, a0, a0 +# CHECK-NEXT: - 1.00 - - - - - - - - - - - - - - - clz a0, a0 +# CHECK-NEXT: 1.00 - - - - - - - - - - - - - - - - clzw a0, a0 +# CHECK-NEXT: - 1.00 - - - - - - - - - - - - - - - clmul a0, a0, a0 +# CHECK-NEXT: 1.00 - - - - - - - - - - - - - - - - bclri a0, a0, 4 +# CHECK-NEXT: - 1.00 - - - - - - - - - - - - - - - bclr a0, a0, a0 +# CHECK-NEXT: 1.00 - - - - - - - - - - - - - - - - bexti a0, a0, 4 +# CHECK-NEXT: - 1.00 - - - - - - - - - - - - - - - bext a0, a0, a0 # CHECK: Timeline view: # CHECK-NEXT: 0123456789 0123456789 012 diff --git a/llvm/test/tools/llvm-mca/RISCV/Andes45/rvv-arithmetic.s b/llvm/test/tools/llvm-mca/RISCV/Andes45/rvv-arithmetic.s new file mode 100644 index 0000000000000..570456c7a1a7a --- /dev/null +++ b/llvm/test/tools/llvm-mca/RISCV/Andes45/rvv-arithmetic.s @@ -0,0 +1,6838 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -mtriple=riscv64 -mcpu=andes-ax45mpv -iterations=1 -instruction-tables=full < %s | FileCheck %s + +# Basic arithmetic operations + +vsetvli x28, x0, e8, mf2, tu, mu +vadd.vi v8, v8, 12 +vsetvli x28, x0, e8, mf4, tu, mu +vadd.vi v8, v8, 12 +vsetvli x28, x0, e8, mf8, tu, mu +vadd.vi v8, v8, 12 +vsetvli x28, x0, e8, m1, tu, mu +vadd.vi v8, v8, 12 +vsetvli x28, x0, e8, m2, tu, mu +vadd.vi v8, v8, 12 +vsetvli x28, x0, e8, m4, tu, mu +vadd.vi v8, v8, 12 +vsetvli x28, x0, e8, m8, tu, mu +vadd.vi v8, v8, 12 +vsetvli x28, x0, e16, mf2, tu, mu +vadd.vi v8, v8, 12 +vsetvli x28, x0, e16, mf4, tu, mu +vadd.vi v8, v8, 12 +vsetvli x28, x0, e16, m1, tu, mu +vadd.vi v8, v8, 12 +vsetvli x28, x0, e16, m2, tu, mu +vadd.vi v8, v8, 12 +vsetvli x28, x0, e16, m4, tu, mu +vadd.vi v8, v8, 12 +vsetvli x28, x0, e16, m8, tu, mu +vadd.vi v8, v8, 12 +vsetvli x28, x0, e32, mf2, tu, mu +vadd.vi v8, v8, 12 +vsetvli x28, x0, e32, m1, tu, mu +vadd.vi v8, v8, 12 +vsetvli x28, x0, e32, m2, tu, mu +vadd.vi v8, v8, 12 +vsetvli x28, x0, e32, m4, tu, mu +vadd.vi v8, v8, 12 +vsetvli x28, x0, e32, m8, tu, mu +vadd.vi v8, v8, 12 +vsetvli x28, x0, e64, m1, tu, mu +vadd.vi v8, v8, 12 +vsetvli x28, x0, e64, m2, tu, mu +vadd.vi v8, v8, 12 +vsetvli x28, x0, e64, m4, tu, mu +vadd.vi v8, v8, 12 +vsetvli x28, x0, e64, m8, tu, mu +vadd.vi v8, v8, 12 + +vsetvli x28, x0, e8, mf2, tu, mu +vadd.vv v8, v8, v8 +vsetvli x28, x0, e8, mf4, tu, mu +vadd.vv v8, v8, v8 +vsetvli x28, x0, e8, mf8, tu, mu +vadd.vv v8, v8, v8 +vsetvli x28, x0, e8, m1, tu, mu +vadd.vv v8, v8, v8 +vsetvli x28, x0, e8, m2, tu, mu +vadd.vv v8, v8, v8 +vsetvli x28, x0, e8, m4, tu, mu +vadd.vv v8, v8, v8 +vsetvli x28, x0, e8, m8, tu, mu +vadd.vv v8, v8, v8 +vsetvli x28, x0, e16, mf2, tu, mu +vadd.vv v8, v8, v8 +vsetvli x28, x0, e16, mf4, tu, mu +vadd.vv v8, v8, v8 +vsetvli x28, x0, e16, m1, tu, mu +vadd.vv v8, v8, v8 +vsetvli x28, x0, e16, m2, tu, mu +vadd.vv v8, v8, v8 +vsetvli x28, x0, e16, m4, tu, mu +vadd.vv v8, v8, v8 +vsetvli x28, x0, e16, m8, tu, mu +vadd.vv v8, v8, v8 +vsetvli x28, x0, e32, mf2, tu, mu +vadd.vv v8, v8, v8 +vsetvli x28, x0, e32, m1, tu, mu +vadd.vv v8, v8, v8 +vsetvli x28, x0, e32, m2, tu, mu +vadd.vv v8, v8, v8 +vsetvli x28, x0, e32, m4, tu, mu +vadd.vv v8, v8, v8 +vsetvli x28, x0, e32, m8, tu, mu +vadd.vv v8, v8, v8 +vsetvli x28, x0, e64, m1, tu, mu +vadd.vv v8, v8, v8 +vsetvli x28, x0, e64, m2, tu, mu +vadd.vv v8, v8, v8 +vsetvli x28, x0, e64, m4, tu, mu +vadd.vv v8, v8, v8 +vsetvli x28, x0, e64, m8, tu, mu +vadd.vv v8, v8, v8 + +vsetvli x28, x0, e8, mf2, tu, mu +vadd.vx v8, v8, x30 +vsetvli x28, x0, e8, mf4, tu, mu +vadd.vx v8, v8, x30 +vsetvli x28, x0, e8, mf8, tu, mu +vadd.vx v8, v8, x30 +vsetvli x28, x0, e8, m1, tu, mu +vadd.vx v8, v8, x30 +vsetvli x28, x0, e8, m2, tu, mu +vadd.vx v8, v8, x30 +vsetvli x28, x0, e8, m4, tu, mu +vadd.vx v8, v8, x30 +vsetvli x28, x0, e8, m8, tu, mu +vadd.vx v8, v8, x30 +vsetvli x28, x0, e16, mf2, tu, mu +vadd.vx v8, v8, x30 +vsetvli x28, x0, e16, mf4, tu, mu +vadd.vx v8, v8, x30 +vsetvli x28, x0, e16, m1, tu, mu +vadd.vx v8, v8, x30 +vsetvli x28, x0, e16, m2, tu, mu +vadd.vx v8, v8, x30 +vsetvli x28, x0, e16, m4, tu, mu +vadd.vx v8, v8, x30 +vsetvli x28, x0, e16, m8, tu, mu +vadd.vx v8, v8, x30 +vsetvli x28, x0, e32, mf2, tu, mu +vadd.vx v8, v8, x30 +vsetvli x28, x0, e32, m1, tu, mu +vadd.vx v8, v8, x30 +vsetvli x28, x0, e32, m2, tu, mu +vadd.vx v8, v8, x30 +vsetvli x28, x0, e32, m4, tu, mu +vadd.vx v8, v8, x30 +vsetvli x28, x0, e32, m8, tu, mu +vadd.vx v8, v8, x30 +vsetvli x28, x0, e64, m1, tu, mu +vadd.vx v8, v8, x30 +vsetvli x28, x0, e64, m2, tu, mu +vadd.vx v8, v8, x30 +vsetvli x28, x0, e64, m4, tu, mu +vadd.vx v8, v8, x30 +vsetvli x28, x0, e64, m8, tu, mu +vadd.vx v8, v8, x30 + +vsetvli x28, x0, e8, mf2, tu, mu +vsub.vv v8, v8, v8 +vsetvli x28, x0, e8, mf4, tu, mu +vsub.vv v8, v8, v8 +vsetvli x28, x0, e8, mf8, tu, mu +vsub.vv v8, v8, v8 +vsetvli x28, x0, e8, m1, tu, mu +vsub.vv v8, v8, v8 +vsetvli x28, x0, e8, m2, tu, mu +vsub.vv v8, v8, v8 +vsetvli x28, x0, e8, m4, tu, mu +vsub.vv v8, v8, v8 +vsetvli x28, x0, e8, m8, tu, mu +vsub.vv v8, v8, v8 +vsetvli x28, x0, e16, mf2, tu, mu +vsub.vv v8, v8, v8 +vsetvli x28, x0, e16, mf4, tu, mu +vsub.vv v8, v8, v8 +vsetvli x28, x0, e16, m1, tu, mu +vsub.vv v8, v8, v8 +vsetvli x28, x0, e16, m2, tu, mu +vsub.vv v8, v8, v8 +vsetvli x28, x0, e16, m4, tu, mu +vsub.vv v8, v8, v8 +vsetvli x28, x0, e16, m8, tu, mu +vsub.vv v8, v8, v8 +vsetvli x28, x0, e32, mf2, tu, mu +vsub.vv v8, v8, v8 +vsetvli x28, x0, e32, m1, tu, mu +vsub.vv v8, v8, v8 +vsetvli x28, x0, e32, m2, tu, mu +vsub.vv v8, v8, v8 +vsetvli x28, x0, e32, m4, tu, mu +vsub.vv v8, v8, v8 +vsetvli x28, x0, e32, m8, tu, mu +vsub.vv v8, v8, v8 +vsetvli x28, x0, e64, m1, tu, mu +vsub.vv v8, v8, v8 +vsetvli x28, x0, e64, m2, tu, mu +vsub.vv v8, v8, v8 +vsetvli x28, x0, e64, m4, tu, mu +vsub.vv v8, v8, v8 +vsetvli x28, x0, e64, m8, tu, mu +vsub.vv v8, v8, v8 + +vsetvli x28, x0, e8, mf2, tu, mu +vsub.vx v8, v8, x30 +vsetvli x28, x0, e8, mf4, tu, mu +vsub.vx v8, v8, x30 +vsetvli x28, x0, e8, mf8, tu, mu +vsub.vx v8, v8, x30 +vsetvli x28, x0, e8, m1, tu, mu +vsub.vx v8, v8, x30 +vsetvli x28, x0, e8, m2, tu, mu +vsub.vx v8, v8, x30 +vsetvli x28, x0, e8, m4, tu, mu +vsub.vx v8, v8, x30 +vsetvli x28, x0, e8, m8, tu, mu +vsub.vx v8, v8, x30 +vsetvli x28, x0, e16, mf2, tu, mu +vsub.vx v8, v8, x30 +vsetvli x28, x0, e16, mf4, tu, mu +vsub.vx v8, v8, x30 +vsetvli x28, x0, e16, m1, tu, mu +vsub.vx v8, v8, x30 +vsetvli x28, x0, e16, m2, tu, mu +vsub.vx v8, v8, x30 +vsetvli x28, x0, e16, m4, tu, mu +vsub.vx v8, v8, x30 +vsetvli x28, x0, e16, m8, tu, mu +vsub.vx v8, v8, x30 +vsetvli x28, x0, e32, mf2, tu, mu +vsub.vx v8, v8, x30 +vsetvli x28, x0, e32, m1, tu, mu +vsub.vx v8, v8, x30 +vsetvli x28, x0, e32, m2, tu, mu +vsub.vx v8, v8, x30 +vsetvli x28, x0, e32, m4, tu, mu +vsub.vx v8, v8, x30 +vsetvli x28, x0, e32, m8, tu, mu +vsub.vx v8, v8, x30 +vsetvli x28, x0, e64, m1, tu, mu +vsub.vx v8, v8, x30 +vsetvli x28, x0, e64, m2, tu, mu +vsub.vx v8, v8, x30 +vsetvli x28, x0, e64, m4, tu, mu +vsub.vx v8, v8, x30 +vsetvli x28, x0, e64, m8, tu, mu +vsub.vx v8, v8, x30 + +vsetvli x28, x0, e8, mf2, tu, mu +vadc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e8, mf4, tu, mu +vadc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e8, mf8, tu, mu +vadc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e8, m1, tu, mu +vadc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e8, m2, tu, mu +vadc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e8, m4, tu, mu +vadc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e8, m8, tu, mu +vadc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e16, mf2, tu, mu +vadc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e16, mf4, tu, mu +vadc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e16, m1, tu, mu +vadc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e16, m2, tu, mu +vadc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e16, m4, tu, mu +vadc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e16, m8, tu, mu +vadc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e32, mf2, tu, mu +vadc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e32, m1, tu, mu +vadc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e32, m2, tu, mu +vadc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e32, m4, tu, mu +vadc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e32, m8, tu, mu +vadc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e64, m1, tu, mu +vadc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e64, m2, tu, mu +vadc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e64, m4, tu, mu +vadc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e64, m8, tu, mu +vadc.vvm v8, v8, v8, v0 + +vsetvli x28, x0, e8, mf2, tu, mu +vadc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e8, mf4, tu, mu +vadc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e8, mf8, tu, mu +vadc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e8, m1, tu, mu +vadc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e8, m2, tu, mu +vadc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e8, m4, tu, mu +vadc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e8, m8, tu, mu +vadc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e16, mf2, tu, mu +vadc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e16, mf4, tu, mu +vadc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e16, m1, tu, mu +vadc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e16, m2, tu, mu +vadc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e16, m4, tu, mu +vadc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e16, m8, tu, mu +vadc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e32, mf2, tu, mu +vadc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e32, m1, tu, mu +vadc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e32, m2, tu, mu +vadc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e32, m4, tu, mu +vadc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e32, m8, tu, mu +vadc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e64, m1, tu, mu +vadc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e64, m2, tu, mu +vadc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e64, m4, tu, mu +vadc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e64, m8, tu, mu +vadc.vxm v8, v8, x30, v0 + +vsetvli x28, x0, e8, mf2, tu, mu +vadc.vim v8, v8, 12, v0 +vsetvli x28, x0, e8, mf4, tu, mu +vadc.vim v8, v8, 12, v0 +vsetvli x28, x0, e8, mf8, tu, mu +vadc.vim v8, v8, 12, v0 +vsetvli x28, x0, e8, m1, tu, mu +vadc.vim v8, v8, 12, v0 +vsetvli x28, x0, e8, m2, tu, mu +vadc.vim v8, v8, 12, v0 +vsetvli x28, x0, e8, m4, tu, mu +vadc.vim v8, v8, 12, v0 +vsetvli x28, x0, e8, m8, tu, mu +vadc.vim v8, v8, 12, v0 +vsetvli x28, x0, e16, mf2, tu, mu +vadc.vim v8, v8, 12, v0 +vsetvli x28, x0, e16, mf4, tu, mu +vadc.vim v8, v8, 12, v0 +vsetvli x28, x0, e16, m1, tu, mu +vadc.vim v8, v8, 12, v0 +vsetvli x28, x0, e16, m2, tu, mu +vadc.vim v8, v8, 12, v0 +vsetvli x28, x0, e16, m4, tu, mu +vadc.vim v8, v8, 12, v0 +vsetvli x28, x0, e16, m8, tu, mu +vadc.vim v8, v8, 12, v0 +vsetvli x28, x0, e32, mf2, tu, mu +vadc.vim v8, v8, 12, v0 +vsetvli x28, x0, e32, m1, tu, mu +vadc.vim v8, v8, 12, v0 +vsetvli x28, x0, e32, m2, tu, mu +vadc.vim v8, v8, 12, v0 +vsetvli x28, x0, e32, m4, tu, mu +vadc.vim v8, v8, 12, v0 +vsetvli x28, x0, e32, m8, tu, mu +vadc.vim v8, v8, 12, v0 +vsetvli x28, x0, e64, m1, tu, mu +vadc.vim v8, v8, 12, v0 +vsetvli x28, x0, e64, m2, tu, mu +vadc.vim v8, v8, 12, v0 +vsetvli x28, x0, e64, m4, tu, mu +vadc.vim v8, v8, 12, v0 +vsetvli x28, x0, e64, m8, tu, mu +vadc.vim v8, v8, 12, v0 + +vsetvli x28, x0, e8, mf2, tu, mu +vsbc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e8, mf4, tu, mu +vsbc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e8, mf8, tu, mu +vsbc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e8, m1, tu, mu +vsbc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e8, m2, tu, mu +vsbc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e8, m4, tu, mu +vsbc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e8, m8, tu, mu +vsbc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e16, mf2, tu, mu +vsbc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e16, mf4, tu, mu +vsbc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e16, m1, tu, mu +vsbc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e16, m2, tu, mu +vsbc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e16, m4, tu, mu +vsbc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e16, m8, tu, mu +vsbc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e32, mf2, tu, mu +vsbc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e32, m1, tu, mu +vsbc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e32, m2, tu, mu +vsbc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e32, m4, tu, mu +vsbc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e32, m8, tu, mu +vsbc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e64, m1, tu, mu +vsbc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e64, m2, tu, mu +vsbc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e64, m4, tu, mu +vsbc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e64, m8, tu, mu +vsbc.vvm v8, v8, v8, v0 + +vsetvli x28, x0, e8, mf2, tu, mu +vsbc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e8, mf4, tu, mu +vsbc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e8, mf8, tu, mu +vsbc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e8, m1, tu, mu +vsbc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e8, m2, tu, mu +vsbc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e8, m4, tu, mu +vsbc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e8, m8, tu, mu +vsbc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e16, mf2, tu, mu +vsbc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e16, mf4, tu, mu +vsbc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e16, m1, tu, mu +vsbc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e16, m2, tu, mu +vsbc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e16, m4, tu, mu +vsbc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e16, m8, tu, mu +vsbc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e32, mf2, tu, mu +vsbc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e32, m1, tu, mu +vsbc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e32, m2, tu, mu +vsbc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e32, m4, tu, mu +vsbc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e32, m8, tu, mu +vsbc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e64, m1, tu, mu +vsbc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e64, m2, tu, mu +vsbc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e64, m4, tu, mu +vsbc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e64, m8, tu, mu +vsbc.vxm v8, v8, x30, v0 + +vsetvli x28, x0, e8, mf2, tu, mu +vwaddu.vv v8, v16, v24 +vsetvli x28, x0, e8, mf4, tu, mu +vwaddu.vv v8, v16, v24 +vsetvli x28, x0, e8, mf8, tu, mu +vwaddu.vv v8, v16, v24 +vsetvli x28, x0, e8, m1, tu, mu +vwaddu.vv v8, v16, v24 +vsetvli x28, x0, e8, m2, tu, mu +vwaddu.vv v8, v16, v24 +vsetvli x28, x0, e8, m4, tu, mu +vwaddu.vv v8, v16, v24 +vsetvli x28, x0, e16, mf2, tu, mu +vwaddu.vv v8, v16, v24 +vsetvli x28, x0, e16, mf4, tu, mu +vwaddu.vv v8, v16, v24 +vsetvli x28, x0, e16, m1, tu, mu +vwaddu.vv v8, v16, v24 +vsetvli x28, x0, e16, m2, tu, mu +vwaddu.vv v8, v16, v24 +vsetvli x28, x0, e16, m4, tu, mu +vwaddu.vv v8, v16, v24 +vsetvli x28, x0, e32, mf2, tu, mu +vwaddu.vv v8, v16, v24 +vsetvli x28, x0, e32, m1, tu, mu +vwaddu.vv v8, v16, v24 +vsetvli x28, x0, e32, m2, tu, mu +vwaddu.vv v8, v16, v24 +vsetvli x28, x0, e32, m4, tu, mu +vwaddu.vv v8, v16, v24 + +vsetvli x28, x0, e8, mf2, tu, mu +vwaddu.vx v8, v16, x30 +vsetvli x28, x0, e8, mf4, tu, mu +vwaddu.vx v8, v16, x30 +vsetvli x28, x0, e8, mf8, tu, mu +vwaddu.vx v8, v16, x30 +vsetvli x28, x0, e8, m1, tu, mu +vwaddu.vx v8, v16, x30 +vsetvli x28, x0, e8, m2, tu, mu +vwaddu.vx v8, v16, x30 +vsetvli x28, x0, e8, m4, tu, mu +vwaddu.vx v8, v16, x30 +vsetvli x28, x0, e16, mf2, tu, mu +vwaddu.vx v8, v16, x30 +vsetvli x28, x0, e16, mf4, tu, mu +vwaddu.vx v8, v16, x30 +vsetvli x28, x0, e16, m1, tu, mu +vwaddu.vx v8, v16, x30 +vsetvli x28, x0, e16, m2, tu, mu +vwaddu.vx v8, v16, x30 +vsetvli x28, x0, e16, m4, tu, mu +vwaddu.vx v8, v16, x30 +vsetvli x28, x0, e32, mf2, tu, mu +vwaddu.vx v8, v16, x30 +vsetvli x28, x0, e32, m1, tu, mu +vwaddu.vx v8, v16, x30 +vsetvli x28, x0, e32, m2, tu, mu +vwaddu.vx v8, v16, x30 +vsetvli x28, x0, e32, m4, tu, mu +vwaddu.vx v8, v16, x30 + +vsetvli x28, x0, e8, mf2, tu, mu +vwadd.vv v8, v16, v24 +vsetvli x28, x0, e8, mf4, tu, mu +vwadd.vv v8, v16, v24 +vsetvli x28, x0, e8, mf8, tu, mu +vwadd.vv v8, v16, v24 +vsetvli x28, x0, e8, m1, tu, mu +vwadd.vv v8, v16, v24 +vsetvli x28, x0, e8, m2, tu, mu +vwadd.vv v8, v16, v24 +vsetvli x28, x0, e8, m4, tu, mu +vwadd.vv v8, v16, v24 +vsetvli x28, x0, e16, mf2, tu, mu +vwadd.vv v8, v16, v24 +vsetvli x28, x0, e16, mf4, tu, mu +vwadd.vv v8, v16, v24 +vsetvli x28, x0, e16, m1, tu, mu +vwadd.vv v8, v16, v24 +vsetvli x28, x0, e16, m2, tu, mu +vwadd.vv v8, v16, v24 +vsetvli x28, x0, e16, m4, tu, mu +vwadd.vv v8, v16, v24 +vsetvli x28, x0, e32, mf2, tu, mu +vwadd.vv v8, v16, v24 +vsetvli x28, x0, e32, m1, tu, mu +vwadd.vv v8, v16, v24 +vsetvli x28, x0, e32, m2, tu, mu +vwadd.vv v8, v16, v24 +vsetvli x28, x0, e32, m4, tu, mu +vwadd.vv v8, v16, v24 + +vsetvli x28, x0, e8, mf2, tu, mu +vwadd.vx v8, v16, x30 +vsetvli x28, x0, e8, mf4, tu, mu +vwadd.vx v8, v16, x30 +vsetvli x28, x0, e8, mf8, tu, mu +vwadd.vx v8, v16, x30 +vsetvli x28, x0, e8, m1, tu, mu +vwadd.vx v8, v16, x30 +vsetvli x28, x0, e8, m2, tu, mu +vwadd.vx v8, v16, x30 +vsetvli x28, x0, e8, m4, tu, mu +vwadd.vx v8, v16, x30 +vsetvli x28, x0, e16, mf2, tu, mu +vwadd.vx v8, v16, x30 +vsetvli x28, x0, e16, mf4, tu, mu +vwadd.vx v8, v16, x30 +vsetvli x28, x0, e16, m1, tu, mu +vwadd.vx v8, v16, x30 +vsetvli x28, x0, e16, m2, tu, mu +vwadd.vx v8, v16, x30 +vsetvli x28, x0, e16, m4, tu, mu +vwadd.vx v8, v16, x30 +vsetvli x28, x0, e32, mf2, tu, mu +vwadd.vx v8, v16, x30 +vsetvli x28, x0, e32, m1, tu, mu +vwadd.vx v8, v16, x30 +vsetvli x28, x0, e32, m2, tu, mu +vwadd.vx v8, v16, x30 +vsetvli x28, x0, e32, m4, tu, mu +vwadd.vx v8, v16, x30 + +vsetvli x28, x0, e8, mf2, tu, mu +vwsubu.vv v8, v16, v24 +vsetvli x28, x0, e8, mf4, tu, mu +vwsubu.vv v8, v16, v24 +vsetvli x28, x0, e8, mf8, tu, mu +vwsubu.vv v8, v16, v24 +vsetvli x28, x0, e8, m1, tu, mu +vwsubu.vv v8, v16, v24 +vsetvli x28, x0, e8, m2, tu, mu +vwsubu.vv v8, v16, v24 +vsetvli x28, x0, e8, m4, tu, mu +vwsubu.vv v8, v16, v24 +vsetvli x28, x0, e16, mf2, tu, mu +vwsubu.vv v8, v16, v24 +vsetvli x28, x0, e16, mf4, tu, mu +vwsubu.vv v8, v16, v24 +vsetvli x28, x0, e16, m1, tu, mu +vwsubu.vv v8, v16, v24 +vsetvli x28, x0, e16, m2, tu, mu +vwsubu.vv v8, v16, v24 +vsetvli x28, x0, e16, m4, tu, mu +vwsubu.vv v8, v16, v24 +vsetvli x28, x0, e32, mf2, tu, mu +vwsubu.vv v8, v16, v24 +vsetvli x28, x0, e32, m1, tu, mu +vwsubu.vv v8, v16, v24 +vsetvli x28, x0, e32, m2, tu, mu +vwsubu.vv v8, v16, v24 +vsetvli x28, x0, e32, m4, tu, mu +vwsubu.vv v8, v16, v24 + +vsetvli x28, x0, e8, mf2, tu, mu +vwsubu.vx v8, v16, x30 +vsetvli x28, x0, e8, mf4, tu, mu +vwsubu.vx v8, v16, x30 +vsetvli x28, x0, e8, mf8, tu, mu +vwsubu.vx v8, v16, x30 +vsetvli x28, x0, e8, m1, tu, mu +vwsubu.vx v8, v16, x30 +vsetvli x28, x0, e8, m2, tu, mu +vwsubu.vx v8, v16, x30 +vsetvli x28, x0, e8, m4, tu, mu +vwsubu.vx v8, v16, x30 +vsetvli x28, x0, e16, mf2, tu, mu +vwsubu.vx v8, v16, x30 +vsetvli x28, x0, e16, mf4, tu, mu +vwsubu.vx v8, v16, x30 +vsetvli x28, x0, e16, m1, tu, mu +vwsubu.vx v8, v16, x30 +vsetvli x28, x0, e16, m2, tu, mu +vwsubu.vx v8, v16, x30 +vsetvli x28, x0, e16, m4, tu, mu +vwsubu.vx v8, v16, x30 +vsetvli x28, x0, e32, mf2, tu, mu +vwsubu.vx v8, v16, x30 +vsetvli x28, x0, e32, m1, tu, mu +vwsubu.vx v8, v16, x30 +vsetvli x28, x0, e32, m2, tu, mu +vwsubu.vx v8, v16, x30 +vsetvli x28, x0, e32, m4, tu, mu +vwsubu.vx v8, v16, x30 + +vsetvli x28, x0, e8, mf2, tu, mu +vwsub.vv v8, v16, v24 +vsetvli x28, x0, e8, mf4, tu, mu +vwsub.vv v8, v16, v24 +vsetvli x28, x0, e8, mf8, tu, mu +vwsub.vv v8, v16, v24 +vsetvli x28, x0, e8, m1, tu, mu +vwsub.vv v8, v16, v24 +vsetvli x28, x0, e8, m2, tu, mu +vwsub.vv v8, v16, v24 +vsetvli x28, x0, e8, m4, tu, mu +vwsub.vv v8, v16, v24 +vsetvli x28, x0, e16, mf2, tu, mu +vwsub.vv v8, v16, v24 +vsetvli x28, x0, e16, mf4, tu, mu +vwsub.vv v8, v16, v24 +vsetvli x28, x0, e16, m1, tu, mu +vwsub.vv v8, v16, v24 +vsetvli x28, x0, e16, m2, tu, mu +vwsub.vv v8, v16, v24 +vsetvli x28, x0, e16, m4, tu, mu +vwsub.vv v8, v16, v24 +vsetvli x28, x0, e32, mf2, tu, mu +vwsub.vv v8, v16, v24 +vsetvli x28, x0, e32, m1, tu, mu +vwsub.vv v8, v16, v24 +vsetvli x28, x0, e32, m2, tu, mu +vwsub.vv v8, v16, v24 +vsetvli x28, x0, e32, m4, tu, mu +vwsub.vv v8, v16, v24 + +vsetvli x28, x0, e8, mf2, tu, mu +vwsub.vx v8, v16, x30 +vsetvli x28, x0, e8, mf4, tu, mu +vwsub.vx v8, v16, x30 +vsetvli x28, x0, e8, mf8, tu, mu +vwsub.vx v8, v16, x30 +vsetvli x28, x0, e8, m1, tu, mu +vwsub.vx v8, v16, x30 +vsetvli x28, x0, e8, m2, tu, mu +vwsub.vx v8, v16, x30 +vsetvli x28, x0, e8, m4, tu, mu +vwsub.vx v8, v16, x30 +vsetvli x28, x0, e16, mf2, tu, mu +vwsub.vx v8, v16, x30 +vsetvli x28, x0, e16, mf4, tu, mu +vwsub.vx v8, v16, x30 +vsetvli x28, x0, e16, m1, tu, mu +vwsub.vx v8, v16, x30 +vsetvli x28, x0, e16, m2, tu, mu +vwsub.vx v8, v16, x30 +vsetvli x28, x0, e16, m4, tu, mu +vwsub.vx v8, v16, x30 +vsetvli x28, x0, e32, mf2, tu, mu +vwsub.vx v8, v16, x30 +vsetvli x28, x0, e32, m1, tu, mu +vwsub.vx v8, v16, x30 +vsetvli x28, x0, e32, m2, tu, mu +vwsub.vx v8, v16, x30 +vsetvli x28, x0, e32, m4, tu, mu +vwsub.vx v8, v16, x30 + +vsetvli x28, x0, e8, mf2, tu, mu +vaaddu.vv v8, v8, v8 +vsetvli x28, x0, e8, mf4, tu, mu +vaaddu.vv v8, v8, v8 +vsetvli x28, x0, e8, mf8, tu, mu +vaaddu.vv v8, v8, v8 +vsetvli x28, x0, e8, m1, tu, mu +vaaddu.vv v8, v8, v8 +vsetvli x28, x0, e8, m2, tu, mu +vaaddu.vv v8, v8, v8 +vsetvli x28, x0, e8, m4, tu, mu +vaaddu.vv v8, v8, v8 +vsetvli x28, x0, e8, m8, tu, mu +vaaddu.vv v8, v8, v8 +vsetvli x28, x0, e16, mf2, tu, mu +vaaddu.vv v8, v8, v8 +vsetvli x28, x0, e16, mf4, tu, mu +vaaddu.vv v8, v8, v8 +vsetvli x28, x0, e16, m1, tu, mu +vaaddu.vv v8, v8, v8 +vsetvli x28, x0, e16, m2, tu, mu +vaaddu.vv v8, v8, v8 +vsetvli x28, x0, e16, m4, tu, mu +vaaddu.vv v8, v8, v8 +vsetvli x28, x0, e16, m8, tu, mu +vaaddu.vv v8, v8, v8 +vsetvli x28, x0, e32, mf2, tu, mu +vaaddu.vv v8, v8, v8 +vsetvli x28, x0, e32, m1, tu, mu +vaaddu.vv v8, v8, v8 +vsetvli x28, x0, e32, m2, tu, mu +vaaddu.vv v8, v8, v8 +vsetvli x28, x0, e32, m4, tu, mu +vaaddu.vv v8, v8, v8 +vsetvli x28, x0, e32, m8, tu, mu +vaaddu.vv v8, v8, v8 +vsetvli x28, x0, e64, m1, tu, mu +vaaddu.vv v8, v8, v8 +vsetvli x28, x0, e64, m2, tu, mu +vaaddu.vv v8, v8, v8 +vsetvli x28, x0, e64, m4, tu, mu +vaaddu.vv v8, v8, v8 +vsetvli x28, x0, e64, m8, tu, mu +vaaddu.vv v8, v8, v8 + +vsetvli x28, x0, e8, mf2, tu, mu +vaaddu.vx v8, v8, x30 +vsetvli x28, x0, e8, mf4, tu, mu +vaaddu.vx v8, v8, x30 +vsetvli x28, x0, e8, mf8, tu, mu +vaaddu.vx v8, v8, x30 +vsetvli x28, x0, e8, m1, tu, mu +vaaddu.vx v8, v8, x30 +vsetvli x28, x0, e8, m2, tu, mu +vaaddu.vx v8, v8, x30 +vsetvli x28, x0, e8, m4, tu, mu +vaaddu.vx v8, v8, x30 +vsetvli x28, x0, e8, m8, tu, mu +vaaddu.vx v8, v8, x30 +vsetvli x28, x0, e16, mf2, tu, mu +vaaddu.vx v8, v8, x30 +vsetvli x28, x0, e16, mf4, tu, mu +vaaddu.vx v8, v8, x30 +vsetvli x28, x0, e16, m1, tu, mu +vaaddu.vx v8, v8, x30 +vsetvli x28, x0, e16, m2, tu, mu +vaaddu.vx v8, v8, x30 +vsetvli x28, x0, e16, m4, tu, mu +vaaddu.vx v8, v8, x30 +vsetvli x28, x0, e16, m8, tu, mu +vaaddu.vx v8, v8, x30 +vsetvli x28, x0, e32, mf2, tu, mu +vaaddu.vx v8, v8, x30 +vsetvli x28, x0, e32, m1, tu, mu +vaaddu.vx v8, v8, x30 +vsetvli x28, x0, e32, m2, tu, mu +vaaddu.vx v8, v8, x30 +vsetvli x28, x0, e32, m4, tu, mu +vaaddu.vx v8, v8, x30 +vsetvli x28, x0, e32, m8, tu, mu +vaaddu.vx v8, v8, x30 +vsetvli x28, x0, e64, m1, tu, mu +vaaddu.vx v8, v8, x30 +vsetvli x28, x0, e64, m2, tu, mu +vaaddu.vx v8, v8, x30 +vsetvli x28, x0, e64, m4, tu, mu +vaaddu.vx v8, v8, x30 +vsetvli x28, x0, e64, m8, tu, mu +vaaddu.vx v8, v8, x30 + +vsetvli x28, x0, e8, mf2, tu, mu +vaadd.vv v8, v8, v8 +vsetvli x28, x0, e8, mf4, tu, mu +vaadd.vv v8, v8, v8 +vsetvli x28, x0, e8, mf8, tu, mu +vaadd.vv v8, v8, v8 +vsetvli x28, x0, e8, m1, tu, mu +vaadd.vv v8, v8, v8 +vsetvli x28, x0, e8, m2, tu, mu +vaadd.vv v8, v8, v8 +vsetvli x28, x0, e8, m4, tu, mu +vaadd.vv v8, v8, v8 +vsetvli x28, x0, e8, m8, tu, mu +vaadd.vv v8, v8, v8 +vsetvli x28, x0, e16, mf2, tu, mu +vaadd.vv v8, v8, v8 +vsetvli x28, x0, e16, mf4, tu, mu +vaadd.vv v8, v8, v8 +vsetvli x28, x0, e16, m1, tu, mu +vaadd.vv v8, v8, v8 +vsetvli x28, x0, e16, m2, tu, mu +vaadd.vv v8, v8, v8 +vsetvli x28, x0, e16, m4, tu, mu +vaadd.vv v8, v8, v8 +vsetvli x28, x0, e16, m8, tu, mu +vaadd.vv v8, v8, v8 +vsetvli x28, x0, e32, mf2, tu, mu +vaadd.vv v8, v8, v8 +vsetvli x28, x0, e32, m1, tu, mu +vaadd.vv v8, v8, v8 +vsetvli x28, x0, e32, m2, tu, mu +vaadd.vv v8, v8, v8 +vsetvli x28, x0, e32, m4, tu, mu +vaadd.vv v8, v8, v8 +vsetvli x28, x0, e32, m8, tu, mu +vaadd.vv v8, v8, v8 +vsetvli x28, x0, e64, m1, tu, mu +vaadd.vv v8, v8, v8 +vsetvli x28, x0, e64, m2, tu, mu +vaadd.vv v8, v8, v8 +vsetvli x28, x0, e64, m4, tu, mu +vaadd.vv v8, v8, v8 +vsetvli x28, x0, e64, m8, tu, mu +vaadd.vv v8, v8, v8 + +vsetvli x28, x0, e8, mf2, tu, mu +vaadd.vx v8, v8, x30 +vsetvli x28, x0, e8, mf4, tu, mu +vaadd.vx v8, v8, x30 +vsetvli x28, x0, e8, mf8, tu, mu +vaadd.vx v8, v8, x30 +vsetvli x28, x0, e8, m1, tu, mu +vaadd.vx v8, v8, x30 +vsetvli x28, x0, e8, m2, tu, mu +vaadd.vx v8, v8, x30 +vsetvli x28, x0, e8, m4, tu, mu +vaadd.vx v8, v8, x30 +vsetvli x28, x0, e8, m8, tu, mu +vaadd.vx v8, v8, x30 +vsetvli x28, x0, e16, mf2, tu, mu +vaadd.vx v8, v8, x30 +vsetvli x28, x0, e16, mf4, tu, mu +vaadd.vx v8, v8, x30 +vsetvli x28, x0, e16, m1, tu, mu +vaadd.vx v8, v8, x30 +vsetvli x28, x0, e16, m2, tu, mu +vaadd.vx v8, v8, x30 +vsetvli x28, x0, e16, m4, tu, mu +vaadd.vx v8, v8, x30 +vsetvli x28, x0, e16, m8, tu, mu +vaadd.vx v8, v8, x30 +vsetvli x28, x0, e32, mf2, tu, mu +vaadd.vx v8, v8, x30 +vsetvli x28, x0, e32, m1, tu, mu +vaadd.vx v8, v8, x30 +vsetvli x28, x0, e32, m2, tu, mu +vaadd.vx v8, v8, x30 +vsetvli x28, x0, e32, m4, tu, mu +vaadd.vx v8, v8, x30 +vsetvli x28, x0, e32, m8, tu, mu +vaadd.vx v8, v8, x30 +vsetvli x28, x0, e64, m1, tu, mu +vaadd.vx v8, v8, x30 +vsetvli x28, x0, e64, m2, tu, mu +vaadd.vx v8, v8, x30 +vsetvli x28, x0, e64, m4, tu, mu +vaadd.vx v8, v8, x30 +vsetvli x28, x0, e64, m8, tu, mu +vaadd.vx v8, v8, x30 + +vsetvli x28, x0, e8, mf2, tu, mu +vasubu.vv v8, v8, v8 +vsetvli x28, x0, e8, mf4, tu, mu +vasubu.vv v8, v8, v8 +vsetvli x28, x0, e8, mf8, tu, mu +vasubu.vv v8, v8, v8 +vsetvli x28, x0, e8, m1, tu, mu +vasubu.vv v8, v8, v8 +vsetvli x28, x0, e8, m2, tu, mu +vasubu.vv v8, v8, v8 +vsetvli x28, x0, e8, m4, tu, mu +vasubu.vv v8, v8, v8 +vsetvli x28, x0, e8, m8, tu, mu +vasubu.vv v8, v8, v8 +vsetvli x28, x0, e16, mf2, tu, mu +vasubu.vv v8, v8, v8 +vsetvli x28, x0, e16, mf4, tu, mu +vasubu.vv v8, v8, v8 +vsetvli x28, x0, e16, m1, tu, mu +vasubu.vv v8, v8, v8 +vsetvli x28, x0, e16, m2, tu, mu +vasubu.vv v8, v8, v8 +vsetvli x28, x0, e16, m4, tu, mu +vasubu.vv v8, v8, v8 +vsetvli x28, x0, e16, m8, tu, mu +vasubu.vv v8, v8, v8 +vsetvli x28, x0, e32, mf2, tu, mu +vasubu.vv v8, v8, v8 +vsetvli x28, x0, e32, m1, tu, mu +vasubu.vv v8, v8, v8 +vsetvli x28, x0, e32, m2, tu, mu +vasubu.vv v8, v8, v8 +vsetvli x28, x0, e32, m4, tu, mu +vasubu.vv v8, v8, v8 +vsetvli x28, x0, e32, m8, tu, mu +vasubu.vv v8, v8, v8 +vsetvli x28, x0, e64, m1, tu, mu +vasubu.vv v8, v8, v8 +vsetvli x28, x0, e64, m2, tu, mu +vasubu.vv v8, v8, v8 +vsetvli x28, x0, e64, m4, tu, mu +vasubu.vv v8, v8, v8 +vsetvli x28, x0, e64, m8, tu, mu +vasubu.vv v8, v8, v8 + +vsetvli x28, x0, e8, mf2, tu, mu +vasubu.vx v8, v8, x30 +vsetvli x28, x0, e8, mf4, tu, mu +vasubu.vx v8, v8, x30 +vsetvli x28, x0, e8, mf8, tu, mu +vasubu.vx v8, v8, x30 +vsetvli x28, x0, e8, m1, tu, mu +vasubu.vx v8, v8, x30 +vsetvli x28, x0, e8, m2, tu, mu +vasubu.vx v8, v8, x30 +vsetvli x28, x0, e8, m4, tu, mu +vasubu.vx v8, v8, x30 +vsetvli x28, x0, e8, m8, tu, mu +vasubu.vx v8, v8, x30 +vsetvli x28, x0, e16, mf2, tu, mu +vasubu.vx v8, v8, x30 +vsetvli x28, x0, e16, mf4, tu, mu +vasubu.vx v8, v8, x30 +vsetvli x28, x0, e16, m1, tu, mu +vasubu.vx v8, v8, x30 +vsetvli x28, x0, e16, m2, tu, mu +vasubu.vx v8, v8, x30 +vsetvli x28, x0, e16, m4, tu, mu +vasubu.vx v8, v8, x30 +vsetvli x28, x0, e16, m8, tu, mu +vasubu.vx v8, v8, x30 +vsetvli x28, x0, e32, mf2, tu, mu +vasubu.vx v8, v8, x30 +vsetvli x28, x0, e32, m1, tu, mu +vasubu.vx v8, v8, x30 +vsetvli x28, x0, e32, m2, tu, mu +vasubu.vx v8, v8, x30 +vsetvli x28, x0, e32, m4, tu, mu +vasubu.vx v8, v8, x30 +vsetvli x28, x0, e32, m8, tu, mu +vasubu.vx v8, v8, x30 +vsetvli x28, x0, e64, m1, tu, mu +vasubu.vx v8, v8, x30 +vsetvli x28, x0, e64, m2, tu, mu +vasubu.vx v8, v8, x30 +vsetvli x28, x0, e64, m4, tu, mu +vasubu.vx v8, v8, x30 +vsetvli x28, x0, e64, m8, tu, mu +vasubu.vx v8, v8, x30 + +vsetvli x28, x0, e8, mf2, tu, mu +vasub.vv v8, v8, v8 +vsetvli x28, x0, e8, mf4, tu, mu +vasub.vv v8, v8, v8 +vsetvli x28, x0, e8, mf8, tu, mu +vasub.vv v8, v8, v8 +vsetvli x28, x0, e8, m1, tu, mu +vasub.vv v8, v8, v8 +vsetvli x28, x0, e8, m2, tu, mu +vasub.vv v8, v8, v8 +vsetvli x28, x0, e8, m4, tu, mu +vasub.vv v8, v8, v8 +vsetvli x28, x0, e8, m8, tu, mu +vasub.vv v8, v8, v8 +vsetvli x28, x0, e16, mf2, tu, mu +vasub.vv v8, v8, v8 +vsetvli x28, x0, e16, mf4, tu, mu +vasub.vv v8, v8, v8 +vsetvli x28, x0, e16, m1, tu, mu +vasub.vv v8, v8, v8 +vsetvli x28, x0, e16, m2, tu, mu +vasub.vv v8, v8, v8 +vsetvli x28, x0, e16, m4, tu, mu +vasub.vv v8, v8, v8 +vsetvli x28, x0, e16, m8, tu, mu +vasub.vv v8, v8, v8 +vsetvli x28, x0, e32, mf2, tu, mu +vasub.vv v8, v8, v8 +vsetvli x28, x0, e32, m1, tu, mu +vasub.vv v8, v8, v8 +vsetvli x28, x0, e32, m2, tu, mu +vasub.vv v8, v8, v8 +vsetvli x28, x0, e32, m4, tu, mu +vasub.vv v8, v8, v8 +vsetvli x28, x0, e32, m8, tu, mu +vasub.vv v8, v8, v8 +vsetvli x28, x0, e64, m1, tu, mu +vasub.vv v8, v8, v8 +vsetvli x28, x0, e64, m2, tu, mu +vasub.vv v8, v8, v8 +vsetvli x28, x0, e64, m4, tu, mu +vasub.vv v8, v8, v8 +vsetvli x28, x0, e64, m8, tu, mu +vasub.vv v8, v8, v8 + +vsetvli x28, x0, e8, mf2, tu, mu +vasub.vx v8, v8, x30 +vsetvli x28, x0, e8, mf4, tu, mu +vasub.vx v8, v8, x30 +vsetvli x28, x0, e8, mf8, tu, mu +vasub.vx v8, v8, x30 +vsetvli x28, x0, e8, m1, tu, mu +vasub.vx v8, v8, x30 +vsetvli x28, x0, e8, m2, tu, mu +vasub.vx v8, v8, x30 +vsetvli x28, x0, e8, m4, tu, mu +vasub.vx v8, v8, x30 +vsetvli x28, x0, e8, m8, tu, mu +vasub.vx v8, v8, x30 +vsetvli x28, x0, e16, mf2, tu, mu +vasub.vx v8, v8, x30 +vsetvli x28, x0, e16, mf4, tu, mu +vasub.vx v8, v8, x30 +vsetvli x28, x0, e16, m1, tu, mu +vasub.vx v8, v8, x30 +vsetvli x28, x0, e16, m2, tu, mu +vasub.vx v8, v8, x30 +vsetvli x28, x0, e16, m4, tu, mu +vasub.vx v8, v8, x30 +vsetvli x28, x0, e16, m8, tu, mu +vasub.vx v8, v8, x30 +vsetvli x28, x0, e32, mf2, tu, mu +vasub.vx v8, v8, x30 +vsetvli x28, x0, e32, m1, tu, mu +vasub.vx v8, v8, x30 +vsetvli x28, x0, e32, m2, tu, mu +vasub.vx v8, v8, x30 +vsetvli x28, x0, e32, m4, tu, mu +vasub.vx v8, v8, x30 +vsetvli x28, x0, e32, m8, tu, mu +vasub.vx v8, v8, x30 +vsetvli x28, x0, e64, m1, tu, mu +vasub.vx v8, v8, x30 +vsetvli x28, x0, e64, m2, tu, mu +vasub.vx v8, v8, x30 +vsetvli x28, x0, e64, m4, tu, mu +vasub.vx v8, v8, x30 +vsetvli x28, x0, e64, m8, tu, mu +vasub.vx v8, v8, x30 + +vsetvli x28, x0, e8, mf2, tu, mu +vmadc.vi v8, v8, 12 +vsetvli x28, x0, e8, mf4, tu, mu +vmadc.vi v8, v8, 12 +vsetvli x28, x0, e8, mf8, tu, mu +vmadc.vi v8, v8, 12 +vsetvli x28, x0, e8, m1, tu, mu +vmadc.vi v8, v8, 12 +vsetvli x28, x0, e8, m2, tu, mu +vmadc.vi v8, v8, 12 +vsetvli x28, x0, e8, m4, tu, mu +vmadc.vi v8, v8, 12 +vsetvli x28, x0, e8, m8, tu, mu +vmadc.vi v8, v8, 12 +vsetvli x28, x0, e16, mf2, tu, mu +vmadc.vi v8, v8, 12 +vsetvli x28, x0, e16, mf4, tu, mu +vmadc.vi v8, v8, 12 +vsetvli x28, x0, e16, m1, tu, mu +vmadc.vi v8, v8, 12 +vsetvli x28, x0, e16, m2, tu, mu +vmadc.vi v8, v8, 12 +vsetvli x28, x0, e16, m4, tu, mu +vmadc.vi v8, v8, 12 +vsetvli x28, x0, e16, m8, tu, mu +vmadc.vi v8, v8, 12 +vsetvli x28, x0, e32, mf2, tu, mu +vmadc.vi v8, v8, 12 +vsetvli x28, x0, e32, m1, tu, mu +vmadc.vi v8, v8, 12 +vsetvli x28, x0, e32, m2, tu, mu +vmadc.vi v8, v8, 12 +vsetvli x28, x0, e32, m4, tu, mu +vmadc.vi v8, v8, 12 +vsetvli x28, x0, e32, m8, tu, mu +vmadc.vi v8, v8, 12 +vsetvli x28, x0, e64, m1, tu, mu +vmadc.vi v8, v8, 12 +vsetvli x28, x0, e64, m2, tu, mu +vmadc.vi v8, v8, 12 +vsetvli x28, x0, e64, m4, tu, mu +vmadc.vi v8, v8, 12 +vsetvli x28, x0, e64, m8, tu, mu +vmadc.vi v8, v8, 12 + +vsetvli x28, x0, e8, mf2, tu, mu +vmadc.vim v8, v8, 12, v0 +vsetvli x28, x0, e8, mf4, tu, mu +vmadc.vim v8, v8, 12, v0 +vsetvli x28, x0, e8, mf8, tu, mu +vmadc.vim v8, v8, 12, v0 +vsetvli x28, x0, e8, m1, tu, mu +vmadc.vim v8, v8, 12, v0 +vsetvli x28, x0, e8, m2, tu, mu +vmadc.vim v8, v8, 12, v0 +vsetvli x28, x0, e8, m4, tu, mu +vmadc.vim v8, v8, 12, v0 +vsetvli x28, x0, e8, m8, tu, mu +vmadc.vim v8, v8, 12, v0 +vsetvli x28, x0, e16, mf2, tu, mu +vmadc.vim v8, v8, 12, v0 +vsetvli x28, x0, e16, mf4, tu, mu +vmadc.vim v8, v8, 12, v0 +vsetvli x28, x0, e16, m1, tu, mu +vmadc.vim v8, v8, 12, v0 +vsetvli x28, x0, e16, m2, tu, mu +vmadc.vim v8, v8, 12, v0 +vsetvli x28, x0, e16, m4, tu, mu +vmadc.vim v8, v8, 12, v0 +vsetvli x28, x0, e16, m8, tu, mu +vmadc.vim v8, v8, 12, v0 +vsetvli x28, x0, e32, mf2, tu, mu +vmadc.vim v8, v8, 12, v0 +vsetvli x28, x0, e32, m1, tu, mu +vmadc.vim v8, v8, 12, v0 +vsetvli x28, x0, e32, m2, tu, mu +vmadc.vim v8, v8, 12, v0 +vsetvli x28, x0, e32, m4, tu, mu +vmadc.vim v8, v8, 12, v0 +vsetvli x28, x0, e32, m8, tu, mu +vmadc.vim v8, v8, 12, v0 +vsetvli x28, x0, e64, m1, tu, mu +vmadc.vim v8, v8, 12, v0 +vsetvli x28, x0, e64, m2, tu, mu +vmadc.vim v8, v8, 12, v0 +vsetvli x28, x0, e64, m4, tu, mu +vmadc.vim v8, v8, 12, v0 +vsetvli x28, x0, e64, m8, tu, mu +vmadc.vim v8, v8, 12, v0 + +vsetvli x28, x0, e8, mf2, tu, mu +vmadc.vv v8, v8, v8 +vsetvli x28, x0, e8, mf4, tu, mu +vmadc.vv v8, v8, v8 +vsetvli x28, x0, e8, mf8, tu, mu +vmadc.vv v8, v8, v8 +vsetvli x28, x0, e8, m1, tu, mu +vmadc.vv v8, v8, v8 +vsetvli x28, x0, e8, m2, tu, mu +vmadc.vv v8, v8, v8 +vsetvli x28, x0, e8, m4, tu, mu +vmadc.vv v8, v8, v8 +vsetvli x28, x0, e8, m8, tu, mu +vmadc.vv v8, v8, v8 +vsetvli x28, x0, e16, mf2, tu, mu +vmadc.vv v8, v8, v8 +vsetvli x28, x0, e16, mf4, tu, mu +vmadc.vv v8, v8, v8 +vsetvli x28, x0, e16, m1, tu, mu +vmadc.vv v8, v8, v8 +vsetvli x28, x0, e16, m2, tu, mu +vmadc.vv v8, v8, v8 +vsetvli x28, x0, e16, m4, tu, mu +vmadc.vv v8, v8, v8 +vsetvli x28, x0, e16, m8, tu, mu +vmadc.vv v8, v8, v8 +vsetvli x28, x0, e32, mf2, tu, mu +vmadc.vv v8, v8, v8 +vsetvli x28, x0, e32, m1, tu, mu +vmadc.vv v8, v8, v8 +vsetvli x28, x0, e32, m2, tu, mu +vmadc.vv v8, v8, v8 +vsetvli x28, x0, e32, m4, tu, mu +vmadc.vv v8, v8, v8 +vsetvli x28, x0, e32, m8, tu, mu +vmadc.vv v8, v8, v8 +vsetvli x28, x0, e64, m1, tu, mu +vmadc.vv v8, v8, v8 +vsetvli x28, x0, e64, m2, tu, mu +vmadc.vv v8, v8, v8 +vsetvli x28, x0, e64, m4, tu, mu +vmadc.vv v8, v8, v8 +vsetvli x28, x0, e64, m8, tu, mu +vmadc.vv v8, v8, v8 + +vsetvli x28, x0, e8, mf2, tu, mu +vmadc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e8, mf4, tu, mu +vmadc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e8, mf8, tu, mu +vmadc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e8, m1, tu, mu +vmadc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e8, m2, tu, mu +vmadc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e8, m4, tu, mu +vmadc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e8, m8, tu, mu +vmadc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e16, mf2, tu, mu +vmadc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e16, mf4, tu, mu +vmadc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e16, m1, tu, mu +vmadc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e16, m2, tu, mu +vmadc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e16, m4, tu, mu +vmadc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e16, m8, tu, mu +vmadc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e32, mf2, tu, mu +vmadc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e32, m1, tu, mu +vmadc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e32, m2, tu, mu +vmadc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e32, m4, tu, mu +vmadc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e32, m8, tu, mu +vmadc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e64, m1, tu, mu +vmadc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e64, m2, tu, mu +vmadc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e64, m4, tu, mu +vmadc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e64, m8, tu, mu +vmadc.vvm v8, v8, v8, v0 + +vsetvli x28, x0, e8, mf2, tu, mu +vmadc.vx v8, v8, x30 +vsetvli x28, x0, e8, mf4, tu, mu +vmadc.vx v8, v8, x30 +vsetvli x28, x0, e8, mf8, tu, mu +vmadc.vx v8, v8, x30 +vsetvli x28, x0, e8, m1, tu, mu +vmadc.vx v8, v8, x30 +vsetvli x28, x0, e8, m2, tu, mu +vmadc.vx v8, v8, x30 +vsetvli x28, x0, e8, m4, tu, mu +vmadc.vx v8, v8, x30 +vsetvli x28, x0, e8, m8, tu, mu +vmadc.vx v8, v8, x30 +vsetvli x28, x0, e16, mf2, tu, mu +vmadc.vx v8, v8, x30 +vsetvli x28, x0, e16, mf4, tu, mu +vmadc.vx v8, v8, x30 +vsetvli x28, x0, e16, m1, tu, mu +vmadc.vx v8, v8, x30 +vsetvli x28, x0, e16, m2, tu, mu +vmadc.vx v8, v8, x30 +vsetvli x28, x0, e16, m4, tu, mu +vmadc.vx v8, v8, x30 +vsetvli x28, x0, e16, m8, tu, mu +vmadc.vx v8, v8, x30 +vsetvli x28, x0, e32, mf2, tu, mu +vmadc.vx v8, v8, x30 +vsetvli x28, x0, e32, m1, tu, mu +vmadc.vx v8, v8, x30 +vsetvli x28, x0, e32, m2, tu, mu +vmadc.vx v8, v8, x30 +vsetvli x28, x0, e32, m4, tu, mu +vmadc.vx v8, v8, x30 +vsetvli x28, x0, e32, m8, tu, mu +vmadc.vx v8, v8, x30 +vsetvli x28, x0, e64, m1, tu, mu +vmadc.vx v8, v8, x30 +vsetvli x28, x0, e64, m2, tu, mu +vmadc.vx v8, v8, x30 +vsetvli x28, x0, e64, m4, tu, mu +vmadc.vx v8, v8, x30 +vsetvli x28, x0, e64, m8, tu, mu +vmadc.vx v8, v8, x30 + +vsetvli x28, x0, e8, mf2, tu, mu +vmadc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e8, mf4, tu, mu +vmadc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e8, mf8, tu, mu +vmadc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e8, m1, tu, mu +vmadc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e8, m2, tu, mu +vmadc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e8, m4, tu, mu +vmadc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e8, m8, tu, mu +vmadc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e16, mf2, tu, mu +vmadc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e16, mf4, tu, mu +vmadc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e16, m1, tu, mu +vmadc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e16, m2, tu, mu +vmadc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e16, m4, tu, mu +vmadc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e16, m8, tu, mu +vmadc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e32, mf2, tu, mu +vmadc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e32, m1, tu, mu +vmadc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e32, m2, tu, mu +vmadc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e32, m4, tu, mu +vmadc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e32, m8, tu, mu +vmadc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e64, m1, tu, mu +vmadc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e64, m2, tu, mu +vmadc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e64, m4, tu, mu +vmadc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e64, m8, tu, mu +vmadc.vxm v8, v8, x30, v0 + +vsetvli x28, x0, e8, mf2, tu, mu +vmsbc.vv v8, v8, v8 +vsetvli x28, x0, e8, mf4, tu, mu +vmsbc.vv v8, v8, v8 +vsetvli x28, x0, e8, mf8, tu, mu +vmsbc.vv v8, v8, v8 +vsetvli x28, x0, e8, m1, tu, mu +vmsbc.vv v8, v8, v8 +vsetvli x28, x0, e8, m2, tu, mu +vmsbc.vv v8, v8, v8 +vsetvli x28, x0, e8, m4, tu, mu +vmsbc.vv v8, v8, v8 +vsetvli x28, x0, e8, m8, tu, mu +vmsbc.vv v8, v8, v8 +vsetvli x28, x0, e16, mf2, tu, mu +vmsbc.vv v8, v8, v8 +vsetvli x28, x0, e16, mf4, tu, mu +vmsbc.vv v8, v8, v8 +vsetvli x28, x0, e16, m1, tu, mu +vmsbc.vv v8, v8, v8 +vsetvli x28, x0, e16, m2, tu, mu +vmsbc.vv v8, v8, v8 +vsetvli x28, x0, e16, m4, tu, mu +vmsbc.vv v8, v8, v8 +vsetvli x28, x0, e16, m8, tu, mu +vmsbc.vv v8, v8, v8 +vsetvli x28, x0, e32, mf2, tu, mu +vmsbc.vv v8, v8, v8 +vsetvli x28, x0, e32, m1, tu, mu +vmsbc.vv v8, v8, v8 +vsetvli x28, x0, e32, m2, tu, mu +vmsbc.vv v8, v8, v8 +vsetvli x28, x0, e32, m4, tu, mu +vmsbc.vv v8, v8, v8 +vsetvli x28, x0, e32, m8, tu, mu +vmsbc.vv v8, v8, v8 +vsetvli x28, x0, e64, m1, tu, mu +vmsbc.vv v8, v8, v8 +vsetvli x28, x0, e64, m2, tu, mu +vmsbc.vv v8, v8, v8 +vsetvli x28, x0, e64, m4, tu, mu +vmsbc.vv v8, v8, v8 +vsetvli x28, x0, e64, m8, tu, mu +vmsbc.vv v8, v8, v8 + +vsetvli x28, x0, e8, mf2, tu, mu +vmsbc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e8, mf4, tu, mu +vmsbc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e8, mf8, tu, mu +vmsbc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e8, m1, tu, mu +vmsbc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e8, m2, tu, mu +vmsbc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e8, m4, tu, mu +vmsbc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e8, m8, tu, mu +vmsbc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e16, mf2, tu, mu +vmsbc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e16, mf4, tu, mu +vmsbc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e16, m1, tu, mu +vmsbc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e16, m2, tu, mu +vmsbc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e16, m4, tu, mu +vmsbc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e16, m8, tu, mu +vmsbc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e32, mf2, tu, mu +vmsbc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e32, m1, tu, mu +vmsbc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e32, m2, tu, mu +vmsbc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e32, m4, tu, mu +vmsbc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e32, m8, tu, mu +vmsbc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e64, m1, tu, mu +vmsbc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e64, m2, tu, mu +vmsbc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e64, m4, tu, mu +vmsbc.vvm v8, v8, v8, v0 +vsetvli x28, x0, e64, m8, tu, mu +vmsbc.vvm v8, v8, v8, v0 + +vsetvli x28, x0, e8, mf2, tu, mu +vmsbc.vx v8, v8, x30 +vsetvli x28, x0, e8, mf4, tu, mu +vmsbc.vx v8, v8, x30 +vsetvli x28, x0, e8, mf8, tu, mu +vmsbc.vx v8, v8, x30 +vsetvli x28, x0, e8, m1, tu, mu +vmsbc.vx v8, v8, x30 +vsetvli x28, x0, e8, m2, tu, mu +vmsbc.vx v8, v8, x30 +vsetvli x28, x0, e8, m4, tu, mu +vmsbc.vx v8, v8, x30 +vsetvli x28, x0, e8, m8, tu, mu +vmsbc.vx v8, v8, x30 +vsetvli x28, x0, e16, mf2, tu, mu +vmsbc.vx v8, v8, x30 +vsetvli x28, x0, e16, mf4, tu, mu +vmsbc.vx v8, v8, x30 +vsetvli x28, x0, e16, m1, tu, mu +vmsbc.vx v8, v8, x30 +vsetvli x28, x0, e16, m2, tu, mu +vmsbc.vx v8, v8, x30 +vsetvli x28, x0, e16, m4, tu, mu +vmsbc.vx v8, v8, x30 +vsetvli x28, x0, e16, m8, tu, mu +vmsbc.vx v8, v8, x30 +vsetvli x28, x0, e32, mf2, tu, mu +vmsbc.vx v8, v8, x30 +vsetvli x28, x0, e32, m1, tu, mu +vmsbc.vx v8, v8, x30 +vsetvli x28, x0, e32, m2, tu, mu +vmsbc.vx v8, v8, x30 +vsetvli x28, x0, e32, m4, tu, mu +vmsbc.vx v8, v8, x30 +vsetvli x28, x0, e32, m8, tu, mu +vmsbc.vx v8, v8, x30 +vsetvli x28, x0, e64, m1, tu, mu +vmsbc.vx v8, v8, x30 +vsetvli x28, x0, e64, m2, tu, mu +vmsbc.vx v8, v8, x30 +vsetvli x28, x0, e64, m4, tu, mu +vmsbc.vx v8, v8, x30 +vsetvli x28, x0, e64, m8, tu, mu +vmsbc.vx v8, v8, x30 + +vsetvli x28, x0, e8, mf2, tu, mu +vmsbc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e8, mf4, tu, mu +vmsbc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e8, mf8, tu, mu +vmsbc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e8, m1, tu, mu +vmsbc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e8, m2, tu, mu +vmsbc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e8, m4, tu, mu +vmsbc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e8, m8, tu, mu +vmsbc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e16, mf2, tu, mu +vmsbc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e16, mf4, tu, mu +vmsbc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e16, m1, tu, mu +vmsbc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e16, m2, tu, mu +vmsbc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e16, m4, tu, mu +vmsbc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e16, m8, tu, mu +vmsbc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e32, mf2, tu, mu +vmsbc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e32, m1, tu, mu +vmsbc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e32, m2, tu, mu +vmsbc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e32, m4, tu, mu +vmsbc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e32, m8, tu, mu +vmsbc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e64, m1, tu, mu +vmsbc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e64, m2, tu, mu +vmsbc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e64, m4, tu, mu +vmsbc.vxm v8, v8, x30, v0 +vsetvli x28, x0, e64, m8, tu, mu +vmsbc.vxm v8, v8, x30, v0 + +vsetvli x28, x0, e8, mf2, tu, mu +vrsub.vi v8, v8, 12 +vsetvli x28, x0, e8, mf4, tu, mu +vrsub.vi v8, v8, 12 +vsetvli x28, x0, e8, mf8, tu, mu +vrsub.vi v8, v8, 12 +vsetvli x28, x0, e8, m1, tu, mu +vrsub.vi v8, v8, 12 +vsetvli x28, x0, e8, m2, tu, mu +vrsub.vi v8, v8, 12 +vsetvli x28, x0, e8, m4, tu, mu +vrsub.vi v8, v8, 12 +vsetvli x28, x0, e8, m8, tu, mu +vrsub.vi v8, v8, 12 +vsetvli x28, x0, e16, mf2, tu, mu +vrsub.vi v8, v8, 12 +vsetvli x28, x0, e16, mf4, tu, mu +vrsub.vi v8, v8, 12 +vsetvli x28, x0, e16, m1, tu, mu +vrsub.vi v8, v8, 12 +vsetvli x28, x0, e16, m2, tu, mu +vrsub.vi v8, v8, 12 +vsetvli x28, x0, e16, m4, tu, mu +vrsub.vi v8, v8, 12 +vsetvli x28, x0, e16, m8, tu, mu +vrsub.vi v8, v8, 12 +vsetvli x28, x0, e32, mf2, tu, mu +vrsub.vi v8, v8, 12 +vsetvli x28, x0, e32, m1, tu, mu +vrsub.vi v8, v8, 12 +vsetvli x28, x0, e32, m2, tu, mu +vrsub.vi v8, v8, 12 +vsetvli x28, x0, e32, m4, tu, mu +vrsub.vi v8, v8, 12 +vsetvli x28, x0, e32, m8, tu, mu +vrsub.vi v8, v8, 12 +vsetvli x28, x0, e64, m1, tu, mu +vrsub.vi v8, v8, 12 +vsetvli x28, x0, e64, m2, tu, mu +vrsub.vi v8, v8, 12 +vsetvli x28, x0, e64, m4, tu, mu +vrsub.vi v8, v8, 12 +vsetvli x28, x0, e64, m8, tu, mu +vrsub.vi v8, v8, 12 + +vsetvli x28, x0, e8, mf2, tu, mu +vrsub.vx v8, v8, x30 +vsetvli x28, x0, e8, mf4, tu, mu +vrsub.vx v8, v8, x30 +vsetvli x28, x0, e8, mf8, tu, mu +vrsub.vx v8, v8, x30 +vsetvli x28, x0, e8, m1, tu, mu +vrsub.vx v8, v8, x30 +vsetvli x28, x0, e8, m2, tu, mu +vrsub.vx v8, v8, x30 +vsetvli x28, x0, e8, m4, tu, mu +vrsub.vx v8, v8, x30 +vsetvli x28, x0, e8, m8, tu, mu +vrsub.vx v8, v8, x30 +vsetvli x28, x0, e16, mf2, tu, mu +vrsub.vx v8, v8, x30 +vsetvli x28, x0, e16, mf4, tu, mu +vrsub.vx v8, v8, x30 +vsetvli x28, x0, e16, m1, tu, mu +vrsub.vx v8, v8, x30 +vsetvli x28, x0, e16, m2, tu, mu +vrsub.vx v8, v8, x30 +vsetvli x28, x0, e16, m4, tu, mu +vrsub.vx v8, v8, x30 +vsetvli x28, x0, e16, m8, tu, mu +vrsub.vx v8, v8, x30 +vsetvli x28, x0, e32, mf2, tu, mu +vrsub.vx v8, v8, x30 +vsetvli x28, x0, e32, m1, tu, mu +vrsub.vx v8, v8, x30 +vsetvli x28, x0, e32, m2, tu, mu +vrsub.vx v8, v8, x30 +vsetvli x28, x0, e32, m4, tu, mu +vrsub.vx v8, v8, x30 +vsetvli x28, x0, e32, m8, tu, mu +vrsub.vx v8, v8, x30 +vsetvli x28, x0, e64, m1, tu, mu +vrsub.vx v8, v8, x30 +vsetvli x28, x0, e64, m2, tu, mu +vrsub.vx v8, v8, x30 +vsetvli x28, x0, e64, m4, tu, mu +vrsub.vx v8, v8, x30 +vsetvli x28, x0, e64, m8, tu, mu +vrsub.vx v8, v8, x30 + +vsetvli x28, x0, e8, mf2, tu, mu +vsaddu.vi v8, v8, 12 +vsetvli x28, x0, e8, mf4, tu, mu +vsaddu.vi v8, v8, 12 +vsetvli x28, x0, e8, mf8, tu, mu +vsaddu.vi v8, v8, 12 +vsetvli x28, x0, e8, m1, tu, mu +vsaddu.vi v8, v8, 12 +vsetvli x28, x0, e8, m2, tu, mu +vsaddu.vi v8, v8, 12 +vsetvli x28, x0, e8, m4, tu, mu +vsaddu.vi v8, v8, 12 +vsetvli x28, x0, e8, m8, tu, mu +vsaddu.vi v8, v8, 12 +vsetvli x28, x0, e16, mf2, tu, mu +vsaddu.vi v8, v8, 12 +vsetvli x28, x0, e16, mf4, tu, mu +vsaddu.vi v8, v8, 12 +vsetvli x28, x0, e16, m1, tu, mu +vsaddu.vi v8, v8, 12 +vsetvli x28, x0, e16, m2, tu, mu +vsaddu.vi v8, v8, 12 +vsetvli x28, x0, e16, m4, tu, mu +vsaddu.vi v8, v8, 12 +vsetvli x28, x0, e16, m8, tu, mu +vsaddu.vi v8, v8, 12 +vsetvli x28, x0, e32, mf2, tu, mu +vsaddu.vi v8, v8, 12 +vsetvli x28, x0, e32, m1, tu, mu +vsaddu.vi v8, v8, 12 +vsetvli x28, x0, e32, m2, tu, mu +vsaddu.vi v8, v8, 12 +vsetvli x28, x0, e32, m4, tu, mu +vsaddu.vi v8, v8, 12 +vsetvli x28, x0, e32, m8, tu, mu +vsaddu.vi v8, v8, 12 +vsetvli x28, x0, e64, m1, tu, mu +vsaddu.vi v8, v8, 12 +vsetvli x28, x0, e64, m2, tu, mu +vsaddu.vi v8, v8, 12 +vsetvli x28, x0, e64, m4, tu, mu +vsaddu.vi v8, v8, 12 +vsetvli x28, x0, e64, m8, tu, mu +vsaddu.vi v8, v8, 12 + +vsetvli x28, x0, e8, mf2, tu, mu +vsaddu.vv v8, v8, v8 +vsetvli x28, x0, e8, mf4, tu, mu +vsaddu.vv v8, v8, v8 +vsetvli x28, x0, e8, mf8, tu, mu +vsaddu.vv v8, v8, v8 +vsetvli x28, x0, e8, m1, tu, mu +vsaddu.vv v8, v8, v8 +vsetvli x28, x0, e8, m2, tu, mu +vsaddu.vv v8, v8, v8 +vsetvli x28, x0, e8, m4, tu, mu +vsaddu.vv v8, v8, v8 +vsetvli x28, x0, e8, m8, tu, mu +vsaddu.vv v8, v8, v8 +vsetvli x28, x0, e16, mf2, tu, mu +vsaddu.vv v8, v8, v8 +vsetvli x28, x0, e16, mf4, tu, mu +vsaddu.vv v8, v8, v8 +vsetvli x28, x0, e16, m1, tu, mu +vsaddu.vv v8, v8, v8 +vsetvli x28, x0, e16, m2, tu, mu +vsaddu.vv v8, v8, v8 +vsetvli x28, x0, e16, m4, tu, mu +vsaddu.vv v8, v8, v8 +vsetvli x28, x0, e16, m8, tu, mu +vsaddu.vv v8, v8, v8 +vsetvli x28, x0, e32, mf2, tu, mu +vsaddu.vv v8, v8, v8 +vsetvli x28, x0, e32, m1, tu, mu +vsaddu.vv v8, v8, v8 +vsetvli x28, x0, e32, m2, tu, mu +vsaddu.vv v8, v8, v8 +vsetvli x28, x0, e32, m4, tu, mu +vsaddu.vv v8, v8, v8 +vsetvli x28, x0, e32, m8, tu, mu +vsaddu.vv v8, v8, v8 +vsetvli x28, x0, e64, m1, tu, mu +vsaddu.vv v8, v8, v8 +vsetvli x28, x0, e64, m2, tu, mu +vsaddu.vv v8, v8, v8 +vsetvli x28, x0, e64, m4, tu, mu +vsaddu.vv v8, v8, v8 +vsetvli x28, x0, e64, m8, tu, mu +vsaddu.vv v8, v8, v8 + +vsetvli x28, x0, e8, mf2, tu, mu +vsaddu.vx v8, v8, x30 +vsetvli x28, x0, e8, mf4, tu, mu +vsaddu.vx v8, v8, x30 +vsetvli x28, x0, e8, mf8, tu, mu +vsaddu.vx v8, v8, x30 +vsetvli x28, x0, e8, m1, tu, mu +vsaddu.vx v8, v8, x30 +vsetvli x28, x0, e8, m2, tu, mu +vsaddu.vx v8, v8, x30 +vsetvli x28, x0, e8, m4, tu, mu +vsaddu.vx v8, v8, x30 +vsetvli x28, x0, e8, m8, tu, mu +vsaddu.vx v8, v8, x30 +vsetvli x28, x0, e16, mf2, tu, mu +vsaddu.vx v8, v8, x30 +vsetvli x28, x0, e16, mf4, tu, mu +vsaddu.vx v8, v8, x30 +vsetvli x28, x0, e16, m1, tu, mu +vsaddu.vx v8, v8, x30 +vsetvli x28, x0, e16, m2, tu, mu +vsaddu.vx v8, v8, x30 +vsetvli x28, x0, e16, m4, tu, mu +vsaddu.vx v8, v8, x30 +vsetvli x28, x0, e16, m8, tu, mu +vsaddu.vx v8, v8, x30 +vsetvli x28, x0, e32, mf2, tu, mu +vsaddu.vx v8, v8, x30 +vsetvli x28, x0, e32, m1, tu, mu +vsaddu.vx v8, v8, x30 +vsetvli x28, x0, e32, m2, tu, mu +vsaddu.vx v8, v8, x30 +vsetvli x28, x0, e32, m4, tu, mu +vsaddu.vx v8, v8, x30 +vsetvli x28, x0, e32, m8, tu, mu +vsaddu.vx v8, v8, x30 +vsetvli x28, x0, e64, m1, tu, mu +vsaddu.vx v8, v8, x30 +vsetvli x28, x0, e64, m2, tu, mu +vsaddu.vx v8, v8, x30 +vsetvli x28, x0, e64, m4, tu, mu +vsaddu.vx v8, v8, x30 +vsetvli x28, x0, e64, m8, tu, mu +vsaddu.vx v8, v8, x30 + +vsetvli x28, x0, e8, mf2, tu, mu +vsadd.vi v8, v8, 12 +vsetvli x28, x0, e8, mf4, tu, mu +vsadd.vi v8, v8, 12 +vsetvli x28, x0, e8, mf8, tu, mu +vsadd.vi v8, v8, 12 +vsetvli x28, x0, e8, m1, tu, mu +vsadd.vi v8, v8, 12 +vsetvli x28, x0, e8, m2, tu, mu +vsadd.vi v8, v8, 12 +vsetvli x28, x0, e8, m4, tu, mu +vsadd.vi v8, v8, 12 +vsetvli x28, x0, e8, m8, tu, mu +vsadd.vi v8, v8, 12 +vsetvli x28, x0, e16, mf2, tu, mu +vsadd.vi v8, v8, 12 +vsetvli x28, x0, e16, mf4, tu, mu +vsadd.vi v8, v8, 12 +vsetvli x28, x0, e16, m1, tu, mu +vsadd.vi v8, v8, 12 +vsetvli x28, x0, e16, m2, tu, mu +vsadd.vi v8, v8, 12 +vsetvli x28, x0, e16, m4, tu, mu +vsadd.vi v8, v8, 12 +vsetvli x28, x0, e16, m8, tu, mu +vsadd.vi v8, v8, 12 +vsetvli x28, x0, e32, mf2, tu, mu +vsadd.vi v8, v8, 12 +vsetvli x28, x0, e32, m1, tu, mu +vsadd.vi v8, v8, 12 +vsetvli x28, x0, e32, m2, tu, mu +vsadd.vi v8, v8, 12 +vsetvli x28, x0, e32, m4, tu, mu +vsadd.vi v8, v8, 12 +vsetvli x28, x0, e32, m8, tu, mu +vsadd.vi v8, v8, 12 +vsetvli x28, x0, e64, m1, tu, mu +vsadd.vi v8, v8, 12 +vsetvli x28, x0, e64, m2, tu, mu +vsadd.vi v8, v8, 12 +vsetvli x28, x0, e64, m4, tu, mu +vsadd.vi v8, v8, 12 +vsetvli x28, x0, e64, m8, tu, mu +vsadd.vi v8, v8, 12 + +vsetvli x28, x0, e8, mf2, tu, mu +vsadd.vv v8, v8, v8 +vsetvli x28, x0, e8, mf4, tu, mu +vsadd.vv v8, v8, v8 +vsetvli x28, x0, e8, mf8, tu, mu +vsadd.vv v8, v8, v8 +vsetvli x28, x0, e8, m1, tu, mu +vsadd.vv v8, v8, v8 +vsetvli x28, x0, e8, m2, tu, mu +vsadd.vv v8, v8, v8 +vsetvli x28, x0, e8, m4, tu, mu +vsadd.vv v8, v8, v8 +vsetvli x28, x0, e8, m8, tu, mu +vsadd.vv v8, v8, v8 +vsetvli x28, x0, e16, mf2, tu, mu +vsadd.vv v8, v8, v8 +vsetvli x28, x0, e16, mf4, tu, mu +vsadd.vv v8, v8, v8 +vsetvli x28, x0, e16, m1, tu, mu +vsadd.vv v8, v8, v8 +vsetvli x28, x0, e16, m2, tu, mu +vsadd.vv v8, v8, v8 +vsetvli x28, x0, e16, m4, tu, mu +vsadd.vv v8, v8, v8 +vsetvli x28, x0, e16, m8, tu, mu +vsadd.vv v8, v8, v8 +vsetvli x28, x0, e32, mf2, tu, mu +vsadd.vv v8, v8, v8 +vsetvli x28, x0, e32, m1, tu, mu +vsadd.vv v8, v8, v8 +vsetvli x28, x0, e32, m2, tu, mu +vsadd.vv v8, v8, v8 +vsetvli x28, x0, e32, m4, tu, mu +vsadd.vv v8, v8, v8 +vsetvli x28, x0, e32, m8, tu, mu +vsadd.vv v8, v8, v8 +vsetvli x28, x0, e64, m1, tu, mu +vsadd.vv v8, v8, v8 +vsetvli x28, x0, e64, m2, tu, mu +vsadd.vv v8, v8, v8 +vsetvli x28, x0, e64, m4, tu, mu +vsadd.vv v8, v8, v8 +vsetvli x28, x0, e64, m8, tu, mu +vsadd.vv v8, v8, v8 + +vsetvli x28, x0, e8, mf2, tu, mu +vsadd.vx v8, v8, x30 +vsetvli x28, x0, e8, mf4, tu, mu +vsadd.vx v8, v8, x30 +vsetvli x28, x0, e8, mf8, tu, mu +vsadd.vx v8, v8, x30 +vsetvli x28, x0, e8, m1, tu, mu +vsadd.vx v8, v8, x30 +vsetvli x28, x0, e8, m2, tu, mu +vsadd.vx v8, v8, x30 +vsetvli x28, x0, e8, m4, tu, mu +vsadd.vx v8, v8, x30 +vsetvli x28, x0, e8, m8, tu, mu +vsadd.vx v8, v8, x30 +vsetvli x28, x0, e16, mf2, tu, mu +vsadd.vx v8, v8, x30 +vsetvli x28, x0, e16, mf4, tu, mu +vsadd.vx v8, v8, x30 +vsetvli x28, x0, e16, m1, tu, mu +vsadd.vx v8, v8, x30 +vsetvli x28, x0, e16, m2, tu, mu +vsadd.vx v8, v8, x30 +vsetvli x28, x0, e16, m4, tu, mu +vsadd.vx v8, v8, x30 +vsetvli x28, x0, e16, m8, tu, mu +vsadd.vx v8, v8, x30 +vsetvli x28, x0, e32, mf2, tu, mu +vsadd.vx v8, v8, x30 +vsetvli x28, x0, e32, m1, tu, mu +vsadd.vx v8, v8, x30 +vsetvli x28, x0, e32, m2, tu, mu +vsadd.vx v8, v8, x30 +vsetvli x28, x0, e32, m4, tu, mu +vsadd.vx v8, v8, x30 +vsetvli x28, x0, e32, m8, tu, mu +vsadd.vx v8, v8, x30 +vsetvli x28, x0, e64, m1, tu, mu +vsadd.vx v8, v8, x30 +vsetvli x28, x0, e64, m2, tu, mu +vsadd.vx v8, v8, x30 +vsetvli x28, x0, e64, m4, tu, mu +vsadd.vx v8, v8, x30 +vsetvli x28, x0, e64, m8, tu, mu +vsadd.vx v8, v8, x30 + +vsetvli x28, x0, e8, mf2, tu, mu +vssubu.vv v8, v8, v8 +vsetvli x28, x0, e8, mf4, tu, mu +vssubu.vv v8, v8, v8 +vsetvli x28, x0, e8, mf8, tu, mu +vssubu.vv v8, v8, v8 +vsetvli x28, x0, e8, m1, tu, mu +vssubu.vv v8, v8, v8 +vsetvli x28, x0, e8, m2, tu, mu +vssubu.vv v8, v8, v8 +vsetvli x28, x0, e8, m4, tu, mu +vssubu.vv v8, v8, v8 +vsetvli x28, x0, e8, m8, tu, mu +vssubu.vv v8, v8, v8 +vsetvli x28, x0, e16, mf2, tu, mu +vssubu.vv v8, v8, v8 +vsetvli x28, x0, e16, mf4, tu, mu +vssubu.vv v8, v8, v8 +vsetvli x28, x0, e16, m1, tu, mu +vssubu.vv v8, v8, v8 +vsetvli x28, x0, e16, m2, tu, mu +vssubu.vv v8, v8, v8 +vsetvli x28, x0, e16, m4, tu, mu +vssubu.vv v8, v8, v8 +vsetvli x28, x0, e16, m8, tu, mu +vssubu.vv v8, v8, v8 +vsetvli x28, x0, e32, mf2, tu, mu +vssubu.vv v8, v8, v8 +vsetvli x28, x0, e32, m1, tu, mu +vssubu.vv v8, v8, v8 +vsetvli x28, x0, e32, m2, tu, mu +vssubu.vv v8, v8, v8 +vsetvli x28, x0, e32, m4, tu, mu +vssubu.vv v8, v8, v8 +vsetvli x28, x0, e32, m8, tu, mu +vssubu.vv v8, v8, v8 +vsetvli x28, x0, e64, m1, tu, mu +vssubu.vv v8, v8, v8 +vsetvli x28, x0, e64, m2, tu, mu +vssubu.vv v8, v8, v8 +vsetvli x28, x0, e64, m4, tu, mu +vssubu.vv v8, v8, v8 +vsetvli x28, x0, e64, m8, tu, mu +vssubu.vv v8, v8, v8 + +vsetvli x28, x0, e8, mf2, tu, mu +vssubu.vx v8, v8, x30 +vsetvli x28, x0, e8, mf4, tu, mu +vssubu.vx v8, v8, x30 +vsetvli x28, x0, e8, mf8, tu, mu +vssubu.vx v8, v8, x30 +vsetvli x28, x0, e8, m1, tu, mu +vssubu.vx v8, v8, x30 +vsetvli x28, x0, e8, m2, tu, mu +vssubu.vx v8, v8, x30 +vsetvli x28, x0, e8, m4, tu, mu +vssubu.vx v8, v8, x30 +vsetvli x28, x0, e8, m8, tu, mu +vssubu.vx v8, v8, x30 +vsetvli x28, x0, e16, mf2, tu, mu +vssubu.vx v8, v8, x30 +vsetvli x28, x0, e16, mf4, tu, mu +vssubu.vx v8, v8, x30 +vsetvli x28, x0, e16, m1, tu, mu +vssubu.vx v8, v8, x30 +vsetvli x28, x0, e16, m2, tu, mu +vssubu.vx v8, v8, x30 +vsetvli x28, x0, e16, m4, tu, mu +vssubu.vx v8, v8, x30 +vsetvli x28, x0, e16, m8, tu, mu +vssubu.vx v8, v8, x30 +vsetvli x28, x0, e32, mf2, tu, mu +vssubu.vx v8, v8, x30 +vsetvli x28, x0, e32, m1, tu, mu +vssubu.vx v8, v8, x30 +vsetvli x28, x0, e32, m2, tu, mu +vssubu.vx v8, v8, x30 +vsetvli x28, x0, e32, m4, tu, mu +vssubu.vx v8, v8, x30 +vsetvli x28, x0, e32, m8, tu, mu +vssubu.vx v8, v8, x30 +vsetvli x28, x0, e64, m1, tu, mu +vssubu.vx v8, v8, x30 +vsetvli x28, x0, e64, m2, tu, mu +vssubu.vx v8, v8, x30 +vsetvli x28, x0, e64, m4, tu, mu +vssubu.vx v8, v8, x30 +vsetvli x28, x0, e64, m8, tu, mu +vssubu.vx v8, v8, x30 + +vsetvli x28, x0, e8, mf2, tu, mu +vssub.vv v8, v8, v8 +vsetvli x28, x0, e8, mf4, tu, mu +vssub.vv v8, v8, v8 +vsetvli x28, x0, e8, mf8, tu, mu +vssub.vv v8, v8, v8 +vsetvli x28, x0, e8, m1, tu, mu +vssub.vv v8, v8, v8 +vsetvli x28, x0, e8, m2, tu, mu +vssub.vv v8, v8, v8 +vsetvli x28, x0, e8, m4, tu, mu +vssub.vv v8, v8, v8 +vsetvli x28, x0, e8, m8, tu, mu +vssub.vv v8, v8, v8 +vsetvli x28, x0, e16, mf2, tu, mu +vssub.vv v8, v8, v8 +vsetvli x28, x0, e16, mf4, tu, mu +vssub.vv v8, v8, v8 +vsetvli x28, x0, e16, m1, tu, mu +vssub.vv v8, v8, v8 +vsetvli x28, x0, e16, m2, tu, mu +vssub.vv v8, v8, v8 +vsetvli x28, x0, e16, m4, tu, mu +vssub.vv v8, v8, v8 +vsetvli x28, x0, e16, m8, tu, mu +vssub.vv v8, v8, v8 +vsetvli x28, x0, e32, mf2, tu, mu +vssub.vv v8, v8, v8 +vsetvli x28, x0, e32, m1, tu, mu +vssub.vv v8, v8, v8 +vsetvli x28, x0, e32, m2, tu, mu +vssub.vv v8, v8, v8 +vsetvli x28, x0, e32, m4, tu, mu +vssub.vv v8, v8, v8 +vsetvli x28, x0, e32, m8, tu, mu +vssub.vv v8, v8, v8 +vsetvli x28, x0, e64, m1, tu, mu +vssub.vv v8, v8, v8 +vsetvli x28, x0, e64, m2, tu, mu +vssub.vv v8, v8, v8 +vsetvli x28, x0, e64, m4, tu, mu +vssub.vv v8, v8, v8 +vsetvli x28, x0, e64, m8, tu, mu +vssub.vv v8, v8, v8 + +vsetvli x28, x0, e8, mf2, tu, mu +vssub.vx v8, v8, x30 +vsetvli x28, x0, e8, mf4, tu, mu +vssub.vx v8, v8, x30 +vsetvli x28, x0, e8, mf8, tu, mu +vssub.vx v8, v8, x30 +vsetvli x28, x0, e8, m1, tu, mu +vssub.vx v8, v8, x30 +vsetvli x28, x0, e8, m2, tu, mu +vssub.vx v8, v8, x30 +vsetvli x28, x0, e8, m4, tu, mu +vssub.vx v8, v8, x30 +vsetvli x28, x0, e8, m8, tu, mu +vssub.vx v8, v8, x30 +vsetvli x28, x0, e16, mf2, tu, mu +vssub.vx v8, v8, x30 +vsetvli x28, x0, e16, mf4, tu, mu +vssub.vx v8, v8, x30 +vsetvli x28, x0, e16, m1, tu, mu +vssub.vx v8, v8, x30 +vsetvli x28, x0, e16, m2, tu, mu +vssub.vx v8, v8, x30 +vsetvli x28, x0, e16, m4, tu, mu +vssub.vx v8, v8, x30 +vsetvli x28, x0, e16, m8, tu, mu +vssub.vx v8, v8, x30 +vsetvli x28, x0, e32, mf2, tu, mu +vssub.vx v8, v8, x30 +vsetvli x28, x0, e32, m1, tu, mu +vssub.vx v8, v8, x30 +vsetvli x28, x0, e32, m2, tu, mu +vssub.vx v8, v8, x30 +vsetvli x28, x0, e32, m4, tu, mu +vssub.vx v8, v8, x30 +vsetvli x28, x0, e32, m8, tu, mu +vssub.vx v8, v8, x30 +vsetvli x28, x0, e64, m1, tu, mu +vssub.vx v8, v8, x30 +vsetvli x28, x0, e64, m2, tu, mu +vssub.vx v8, v8, x30 +vsetvli x28, x0, e64, m4, tu, mu +vssub.vx v8, v8, x30 +vsetvli x28, x0, e64, m8, tu, mu +vssub.vx v8, v8, x30 + +vsetvli x28, x0, e8, mf2, tu, mu +vwaddu.wv v8, v16, v24 +vsetvli x28, x0, e8, mf4, tu, mu +vwaddu.wv v8, v16, v24 +vsetvli x28, x0, e8, mf8, tu, mu +vwaddu.wv v8, v16, v24 +vsetvli x28, x0, e8, m1, tu, mu +vwaddu.wv v8, v16, v24 +vsetvli x28, x0, e8, m2, tu, mu +vwaddu.wv v8, v16, v24 +vsetvli x28, x0, e8, m4, tu, mu +vwaddu.wv v8, v16, v24 +vsetvli x28, x0, e16, mf2, tu, mu +vwaddu.wv v8, v16, v24 +vsetvli x28, x0, e16, mf4, tu, mu +vwaddu.wv v8, v16, v24 +vsetvli x28, x0, e16, m1, tu, mu +vwaddu.wv v8, v16, v24 +vsetvli x28, x0, e16, m2, tu, mu +vwaddu.wv v8, v16, v24 +vsetvli x28, x0, e16, m4, tu, mu +vwaddu.wv v8, v16, v24 +vsetvli x28, x0, e32, mf2, tu, mu +vwaddu.wv v8, v16, v24 +vsetvli x28, x0, e32, m1, tu, mu +vwaddu.wv v8, v16, v24 +vsetvli x28, x0, e32, m2, tu, mu +vwaddu.wv v8, v16, v24 +vsetvli x28, x0, e32, m4, tu, mu +vwaddu.wv v8, v16, v24 + +vsetvli x28, x0, e8, mf2, tu, mu +vwaddu.wx v8, v16, x30 +vsetvli x28, x0, e8, mf4, tu, mu +vwaddu.wx v8, v16, x30 +vsetvli x28, x0, e8, mf8, tu, mu +vwaddu.wx v8, v16, x30 +vsetvli x28, x0, e8, m1, tu, mu +vwaddu.wx v8, v16, x30 +vsetvli x28, x0, e8, m2, tu, mu +vwaddu.wx v8, v16, x30 +vsetvli x28, x0, e8, m4, tu, mu +vwaddu.wx v8, v16, x30 +vsetvli x28, x0, e16, mf2, tu, mu +vwaddu.wx v8, v16, x30 +vsetvli x28, x0, e16, mf4, tu, mu +vwaddu.wx v8, v16, x30 +vsetvli x28, x0, e16, m1, tu, mu +vwaddu.wx v8, v16, x30 +vsetvli x28, x0, e16, m2, tu, mu +vwaddu.wx v8, v16, x30 +vsetvli x28, x0, e16, m4, tu, mu +vwaddu.wx v8, v16, x30 +vsetvli x28, x0, e32, mf2, tu, mu +vwaddu.wx v8, v16, x30 +vsetvli x28, x0, e32, m1, tu, mu +vwaddu.wx v8, v16, x30 +vsetvli x28, x0, e32, m2, tu, mu +vwaddu.wx v8, v16, x30 +vsetvli x28, x0, e32, m4, tu, mu +vwaddu.wx v8, v16, x30 + +vsetvli x28, x0, e8, mf2, tu, mu +vwadd.wv v8, v16, v24 +vsetvli x28, x0, e8, mf4, tu, mu +vwadd.wv v8, v16, v24 +vsetvli x28, x0, e8, mf8, tu, mu +vwadd.wv v8, v16, v24 +vsetvli x28, x0, e8, m1, tu, mu +vwadd.wv v8, v16, v24 +vsetvli x28, x0, e8, m2, tu, mu +vwadd.wv v8, v16, v24 +vsetvli x28, x0, e8, m4, tu, mu +vwadd.wv v8, v16, v24 +vsetvli x28, x0, e16, mf2, tu, mu +vwadd.wv v8, v16, v24 +vsetvli x28, x0, e16, mf4, tu, mu +vwadd.wv v8, v16, v24 +vsetvli x28, x0, e16, m1, tu, mu +vwadd.wv v8, v16, v24 +vsetvli x28, x0, e16, m2, tu, mu +vwadd.wv v8, v16, v24 +vsetvli x28, x0, e16, m4, tu, mu +vwadd.wv v8, v16, v24 +vsetvli x28, x0, e32, mf2, tu, mu +vwadd.wv v8, v16, v24 +vsetvli x28, x0, e32, m1, tu, mu +vwadd.wv v8, v16, v24 +vsetvli x28, x0, e32, m2, tu, mu +vwadd.wv v8, v16, v24 +vsetvli x28, x0, e32, m4, tu, mu +vwadd.wv v8, v16, v24 + +vsetvli x28, x0, e8, mf2, tu, mu +vwadd.wx v8, v16, x30 +vsetvli x28, x0, e8, mf4, tu, mu +vwadd.wx v8, v16, x30 +vsetvli x28, x0, e8, mf8, tu, mu +vwadd.wx v8, v16, x30 +vsetvli x28, x0, e8, m1, tu, mu +vwadd.wx v8, v16, x30 +vsetvli x28, x0, e8, m2, tu, mu +vwadd.wx v8, v16, x30 +vsetvli x28, x0, e8, m4, tu, mu +vwadd.wx v8, v16, x30 +vsetvli x28, x0, e16, mf2, tu, mu +vwadd.wx v8, v16, x30 +vsetvli x28, x0, e16, mf4, tu, mu +vwadd.wx v8, v16, x30 +vsetvli x28, x0, e16, m1, tu, mu +vwadd.wx v8, v16, x30 +vsetvli x28, x0, e16, m2, tu, mu +vwadd.wx v8, v16, x30 +vsetvli x28, x0, e16, m4, tu, mu +vwadd.wx v8, v16, x30 +vsetvli x28, x0, e32, mf2, tu, mu +vwadd.wx v8, v16, x30 +vsetvli x28, x0, e32, m1, tu, mu +vwadd.wx v8, v16, x30 +vsetvli x28, x0, e32, m2, tu, mu +vwadd.wx v8, v16, x30 +vsetvli x28, x0, e32, m4, tu, mu +vwadd.wx v8, v16, x30 + +vsetvli x28, x0, e8, mf2, tu, mu +vwsubu.wv v8, v16, v24 +vsetvli x28, x0, e8, mf4, tu, mu +vwsubu.wv v8, v16, v24 +vsetvli x28, x0, e8, mf8, tu, mu +vwsubu.wv v8, v16, v24 +vsetvli x28, x0, e8, m1, tu, mu +vwsubu.wv v8, v16, v24 +vsetvli x28, x0, e8, m2, tu, mu +vwsubu.wv v8, v16, v24 +vsetvli x28, x0, e8, m4, tu, mu +vwsubu.wv v8, v16, v24 +vsetvli x28, x0, e16, mf2, tu, mu +vwsubu.wv v8, v16, v24 +vsetvli x28, x0, e16, mf4, tu, mu +vwsubu.wv v8, v16, v24 +vsetvli x28, x0, e16, m1, tu, mu +vwsubu.wv v8, v16, v24 +vsetvli x28, x0, e16, m2, tu, mu +vwsubu.wv v8, v16, v24 +vsetvli x28, x0, e16, m4, tu, mu +vwsubu.wv v8, v16, v24 +vsetvli x28, x0, e32, mf2, tu, mu +vwsubu.wv v8, v16, v24 +vsetvli x28, x0, e32, m1, tu, mu +vwsubu.wv v8, v16, v24 +vsetvli x28, x0, e32, m2, tu, mu +vwsubu.wv v8, v16, v24 +vsetvli x28, x0, e32, m4, tu, mu +vwsubu.wv v8, v16, v24 + +vsetvli x28, x0, e8, mf2, tu, mu +vwsubu.wx v8, v16, x30 +vsetvli x28, x0, e8, mf4, tu, mu +vwsubu.wx v8, v16, x30 +vsetvli x28, x0, e8, mf8, tu, mu +vwsubu.wx v8, v16, x30 +vsetvli x28, x0, e8, m1, tu, mu +vwsubu.wx v8, v16, x30 +vsetvli x28, x0, e8, m2, tu, mu +vwsubu.wx v8, v16, x30 +vsetvli x28, x0, e8, m4, tu, mu +vwsubu.wx v8, v16, x30 +vsetvli x28, x0, e16, mf2, tu, mu +vwsubu.wx v8, v16, x30 +vsetvli x28, x0, e16, mf4, tu, mu +vwsubu.wx v8, v16, x30 +vsetvli x28, x0, e16, m1, tu, mu +vwsubu.wx v8, v16, x30 +vsetvli x28, x0, e16, m2, tu, mu +vwsubu.wx v8, v16, x30 +vsetvli x28, x0, e16, m4, tu, mu +vwsubu.wx v8, v16, x30 +vsetvli x28, x0, e32, mf2, tu, mu +vwsubu.wx v8, v16, x30 +vsetvli x28, x0, e32, m1, tu, mu +vwsubu.wx v8, v16, x30 +vsetvli x28, x0, e32, m2, tu, mu +vwsubu.wx v8, v16, x30 +vsetvli x28, x0, e32, m4, tu, mu +vwsubu.wx v8, v16, x30 + +vsetvli x28, x0, e8, mf2, tu, mu +vwsub.wv v8, v16, v24 +vsetvli x28, x0, e8, mf4, tu, mu +vwsub.wv v8, v16, v24 +vsetvli x28, x0, e8, mf8, tu, mu +vwsub.wv v8, v16, v24 +vsetvli x28, x0, e8, m1, tu, mu +vwsub.wv v8, v16, v24 +vsetvli x28, x0, e8, m2, tu, mu +vwsub.wv v8, v16, v24 +vsetvli x28, x0, e8, m4, tu, mu +vwsub.wv v8, v16, v24 +vsetvli x28, x0, e16, mf2, tu, mu +vwsub.wv v8, v16, v24 +vsetvli x28, x0, e16, mf4, tu, mu +vwsub.wv v8, v16, v24 +vsetvli x28, x0, e16, m1, tu, mu +vwsub.wv v8, v16, v24 +vsetvli x28, x0, e16, m2, tu, mu +vwsub.wv v8, v16, v24 +vsetvli x28, x0, e16, m4, tu, mu +vwsub.wv v8, v16, v24 +vsetvli x28, x0, e32, mf2, tu, mu +vwsub.wv v8, v16, v24 +vsetvli x28, x0, e32, m1, tu, mu +vwsub.wv v8, v16, v24 +vsetvli x28, x0, e32, m2, tu, mu +vwsub.wv v8, v16, v24 +vsetvli x28, x0, e32, m4, tu, mu +vwsub.wv v8, v16, v24 + +vsetvli x28, x0, e8, mf2, tu, mu +vwsub.wx v8, v16, x30 +vsetvli x28, x0, e8, mf4, tu, mu +vwsub.wx v8, v16, x30 +vsetvli x28, x0, e8, mf8, tu, mu +vwsub.wx v8, v16, x30 +vsetvli x28, x0, e8, m1, tu, mu +vwsub.wx v8, v16, x30 +vsetvli x28, x0, e8, m2, tu, mu +vwsub.wx v8, v16, x30 +vsetvli x28, x0, e8, m4, tu, mu +vwsub.wx v8, v16, x30 +vsetvli x28, x0, e16, mf2, tu, mu +vwsub.wx v8, v16, x30 +vsetvli x28, x0, e16, mf4, tu, mu +vwsub.wx v8, v16, x30 +vsetvli x28, x0, e16, m1, tu, mu +vwsub.wx v8, v16, x30 +vsetvli x28, x0, e16, m2, tu, mu +vwsub.wx v8, v16, x30 +vsetvli x28, x0, e16, m4, tu, mu +vwsub.wx v8, v16, x30 +vsetvli x28, x0, e32, mf2, tu, mu +vwsub.wx v8, v16, x30 +vsetvli x28, x0, e32, m1, tu, mu +vwsub.wx v8, v16, x30 +vsetvli x28, x0, e32, m2, tu, mu +vwsub.wx v8, v16, x30 +vsetvli x28, x0, e32, m4, tu, mu +vwsub.wx v8, v16, x30 + +# CHECK: Resources: +# CHECK-NEXT: [0] - Andes45ALU:2 +# CHECK-NEXT: [1] - Andes45CSR:1 +# CHECK-NEXT: [2] - Andes45FDIV:1 +# CHECK-NEXT: [3] - Andes45FMAC:1 +# CHECK-NEXT: [4] - Andes45FMISC:1 +# CHECK-NEXT: [5] - Andes45FMV:1 +# CHECK-NEXT: [6] - Andes45LSU:1 +# CHECK-NEXT: [7] - Andes45MDU:1 +# CHECK-NEXT: [8] - Andes45VALU:1 +# CHECK-NEXT: [9] - Andes45VDIV:1 +# CHECK-NEXT: [10] - Andes45VFDIV:1 +# CHECK-NEXT: [11] - Andes45VFMIS:1 +# CHECK-NEXT: [12] - Andes45VLSU:1 +# CHECK-NEXT: [13] - Andes45VMAC:1 +# CHECK-NEXT: [14] - Andes45VMASK:1 +# CHECK-NEXT: [15] - Andes45VPERMUT:1 +# CHECK-NEXT: [16] - Andes45VPU:8 Andes45VALU, Andes45VMAC, Andes45VFMIS, Andes45VPERMUT, Andes45VDIV, Andes45VFDIV, Andes45VMASK, Andes45VLSU + +# CHECK: Instruction Info: +# CHECK-NEXT: [1]: #uOps +# CHECK-NEXT: [2]: Latency +# CHECK-NEXT: [3]: RThroughput +# CHECK-NEXT: [4]: MayLoad +# CHECK-NEXT: [5]: MayStore +# CHECK-NEXT: [6]: HasSideEffects (U) +# CHECK-NEXT: [7]: Bypass Latency +# CHECK-NEXT: [8]: Resources ( | [] | [, | [] | [, | [] | [, | [] | [, | [] | [, | [] | [, | [] | [, | [] | [, | [] | [, | [] | [, | [] | [, | [] | [, | [] | [, | [] | [, | [] | [,