diff --git a/llvm/lib/Target/AArch64/AArch64.td b/llvm/lib/Target/AArch64/AArch64.td index 156c48e02abf9..e76204f552251 100644 --- a/llvm/lib/Target/AArch64/AArch64.td +++ b/llvm/lib/Target/AArch64/AArch64.td @@ -837,7 +837,6 @@ include "AArch64SchedA64FX.td" include "AArch64SchedThunderX3T110.td" include "AArch64SchedTSV110.td" include "AArch64SchedAmpere1.td" -include "AArch64SchedAmpere1B.td" include "AArch64SchedNeoverseN1.td" include "AArch64SchedNeoverseN2.td" include "AArch64SchedNeoverseV1.td" @@ -1723,7 +1722,7 @@ def : ProcessorModel<"ampere1", Ampere1Model, ProcessorFeatures.Ampere1, def : ProcessorModel<"ampere1a", Ampere1Model, ProcessorFeatures.Ampere1A, [TuneAmpere1A]>; -def : ProcessorModel<"ampere1b", Ampere1BModel, ProcessorFeatures.Ampere1B, +def : ProcessorModel<"ampere1b", Ampere1Model, ProcessorFeatures.Ampere1B, [TuneAmpere1B]>; //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AArch64/AArch64SchedAmpere1B.td b/llvm/lib/Target/AArch64/AArch64SchedAmpere1B.td deleted file mode 100644 index 43da76207ff7d..0000000000000 --- a/llvm/lib/Target/AArch64/AArch64SchedAmpere1B.td +++ /dev/null @@ -1,1061 +0,0 @@ -//=- AArch64SchedAmpere1B.td - Ampere-1B scheduling def -----*- tablegen -*-=// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file defines the machine model for the Ampere Computing Ampere-1B to -// support instruction scheduling and other instruction cost heuristics. -// -//===----------------------------------------------------------------------===// - -// The Ampere-1 core is an out-of-order micro-architecture. The front -// end has branch prediction, with a 10-cycle recovery time from a -// mispredicted branch. Instructions coming out of the front end are -// decoded into internal micro-ops (uops). - -def Ampere1BModel : SchedMachineModel { - let IssueWidth = 4; // 4-way decode and dispatch - let MicroOpBufferSize = 192; // micro-op re-order buffer size - let LoadLatency = 3; // Optimistic load latency - let MispredictPenalty = 10; // Branch mispredict penalty - let LoopMicroOpBufferSize = 32; // Instruction queue size - let CompleteModel = 0; - - list UnsupportedFeatures = !listconcat(SVEUnsupported.F, - SMEUnsupported.F, - PAUnsupported.F); -} - -let SchedModel = Ampere1BModel in { - -//===----------------------------------------------------------------------===// -// Define each kind of processor resource and number available on Ampere-1. -// Ampere-1 has 12 pipelines that 8 independent scheduler (4 integer, 2 FP, -// and 2 memory) issue into. The integer and FP schedulers can each issue -// one uop per cycle, while the memory schedulers can each issue one load -// and one store address calculation per cycle. - -def Ampere1BUnitA : ProcResource<2>; // integer single-cycle, branch, and flags r/w -def Ampere1BUnitB : ProcResource<2>; // integer single-cycle, and complex shifts -def Ampere1BUnitBS : ProcResource<1>; // integer multi-cycle -def Ampere1BUnitL : ProcResource<2>; // load -def Ampere1BUnitS : ProcResource<2>; // store address calculation -def Ampere1BUnitX : ProcResource<1>; // FP and vector operations, and flag write -def Ampere1BUnitY : ProcResource<1>; // FP and vector operations, and crypto -def Ampere1BUnitZ : ProcResource<1>; // FP store data and FP-to-integer moves - -def Ampere1BUnitAB : ProcResGroup<[Ampere1BUnitA, Ampere1BUnitB]>; -def Ampere1BUnitXY : ProcResGroup<[Ampere1BUnitX, Ampere1BUnitY]>; - -//===----------------------------------------------------------------------===// -// Define customized scheduler read/write types specific to the Ampere-1. - -def Ampere1BWrite_1cyc_1A : SchedWriteRes<[Ampere1BUnitA]> { - let Latency = 1; - let NumMicroOps = 1; -} - -def Ampere1BWrite_1cyc_2A : SchedWriteRes<[Ampere1BUnitA, Ampere1BUnitA]> { - let Latency = 1; - let NumMicroOps = 2; -} - -def Ampere1BWrite_1cyc_1B : SchedWriteRes<[Ampere1BUnitB]> { - let Latency = 1; - let NumMicroOps = 1; -} - -def Ampere1BWrite_1cyc_1BS : SchedWriteRes<[Ampere1BUnitBS]> { - let Latency = 1; - let NumMicroOps = 1; -} - -def Ampere1BWrite_1cyc_1BS_1B : SchedWriteRes<[Ampere1BUnitBS, Ampere1BUnitB]> { - let Latency = 1; - let NumMicroOps = 2; -} - -def Ampere1BWrite_1cyc_1AB : SchedWriteRes<[Ampere1BUnitAB]> { - let Latency = 1; - let NumMicroOps = 1; -} - -def Ampere1BWrite_1cyc_1AB_1A : SchedWriteRes<[Ampere1BUnitAB, Ampere1BUnitA]> { - let Latency = 1; - let NumMicroOps = 2; -} - -def Ampere1BWrite_1cyc_1L : SchedWriteRes<[Ampere1BUnitL]> { - let Latency = 1; - let NumMicroOps = 1; -} - -def Ampere1BWrite_1cyc_1S : SchedWriteRes<[Ampere1BUnitS]> { - let Latency = 1; - let NumMicroOps = 1; -} - -def Ampere1BWrite_1cyc_2S : SchedWriteRes<[Ampere1BUnitS, Ampere1BUnitS]> { - let Latency = 1; - let NumMicroOps = 2; -} - -def Ampere1BWrite_2cyc_1Y : SchedWriteRes<[Ampere1BUnitY]> { - let Latency = 2; - let NumMicroOps = 1; -} - -def Ampere1BWrite_2cyc_2AB : SchedWriteRes<[Ampere1BUnitAB, Ampere1BUnitAB]> { - let Latency = 2; - let NumMicroOps = 2; -} - -def Ampere1BWrite_2cyc_1B_1AB : SchedWriteRes<[Ampere1BUnitB, Ampere1BUnitAB]> { - let Latency = 2; - let NumMicroOps = 2; -} - -def Ampere1BWrite_2cyc_1B_1S : SchedWriteRes<[Ampere1BUnitB, Ampere1BUnitS]> { - let Latency = 2; - let NumMicroOps = 2; -} - -def Ampere1BWrite_2cyc_1B_1S_1AB : SchedWriteRes<[Ampere1BUnitB, - Ampere1BUnitS, - Ampere1BUnitAB]> { - let Latency = 2; - let NumMicroOps = 3; -} - -def Ampere1BWrite_2cyc_1XY : SchedWriteRes<[Ampere1BUnitXY]> { - let Latency = 2; - let NumMicroOps = 1; -} - -def Ampere1BWrite_2cyc_1S_1Z : SchedWriteRes<[Ampere1BUnitS, Ampere1BUnitZ]> { - let Latency = 2; - let NumMicroOps = 2; -} - -def Ampere1BWrite_3cyc_1BS : SchedWriteRes<[Ampere1BUnitBS]> { - let Latency = 3; - let NumMicroOps = 1; -} - -def Ampere1BWrite_3cyc_1L : SchedWriteRes<[Ampere1BUnitL]> { - let Latency = 3; - let NumMicroOps = 1; -} - -def Ampere1BWrite_3cyc_1X : SchedWriteRes<[Ampere1BUnitX]> { - let Latency = 3; - let NumMicroOps = 1; -} - -def Ampere1BWrite_3cyc_1XY : SchedWriteRes<[Ampere1BUnitXY]> { - let Latency = 3; - let NumMicroOps = 1; -} - -def Ampere1BWrite_3cyc_1Z : SchedWriteRes<[Ampere1BUnitZ]> { - let Latency = 3; - let NumMicroOps = 1; -} - -def Ampere1BWrite_3cyc_1S_1Z : SchedWriteRes<[Ampere1BUnitS, - Ampere1BUnitZ]> { - let Latency = 3; - let NumMicroOps = 2; -} - -def Ampere1BWrite_3cyc_1S_2Z : SchedWriteRes<[Ampere1BUnitS, - Ampere1BUnitZ, Ampere1BUnitZ]> { - let Latency = 3; - let NumMicroOps = 3; -} - -def Ampere1BWrite_3cyc_2S_2Z : SchedWriteRes<[Ampere1BUnitS, Ampere1BUnitS, - Ampere1BUnitZ, Ampere1BUnitZ]> { - let Latency = 3; - let NumMicroOps = 4; -} - -def Ampere1BWrite_4cyc_1BS_1AB : SchedWriteRes<[Ampere1BUnitBS, Ampere1BUnitAB]> { - let Latency = 4; - let NumMicroOps = 2; -} - -def Ampere1BWrite_4cyc_1L : SchedWriteRes<[Ampere1BUnitL]> { - let Latency = 4; - let NumMicroOps = 1; -} - -def Ampere1BWrite_4cyc_1L_1B : SchedWriteRes<[Ampere1BUnitL, Ampere1BUnitB]> { - let Latency = 4; - let NumMicroOps = 2; -} - -def Ampere1BWrite_4cyc_1X : SchedWriteRes<[Ampere1BUnitX]> { - let Latency = 4; - let NumMicroOps = 1; -} - -def Ampere1BWrite_4cyc_1XY : SchedWriteRes<[Ampere1BUnitXY]> { - let Latency = 4; - let NumMicroOps = 1; -} - -def Ampere1BWrite_4cyc_2XY : SchedWriteRes<[Ampere1BUnitXY, Ampere1BUnitXY]> { - let Latency = 4; - let NumMicroOps = 2; -} - -def Ampere1BWrite_5cyc_1BS : SchedWriteRes<[Ampere1BUnitBS]> { - let Latency = 5; - let NumMicroOps = 1; -} - -def Ampere1BWrite_4cyc_1XY_1S_1Z : SchedWriteRes<[Ampere1BUnitXY, - Ampere1BUnitS - Ampere1BUnitZ]> { - let Latency = 4; - let NumMicroOps = 3; -} - -def Ampere1BWrite_5cyc_1BS : SchedWriteRes<[Ampere1BUnitBS]> { - let Latency = 5; - let NumMicroOps = 1; -} - -def Ampere1BWrite_5cyc_4S_4Z : SchedWriteRes<[Ampere1BUnitL, - Ampere1BUnitBS]> { - let Latency = 5; - let NumMicroOps = 8; -} - -def Ampere1BWrite_5cyc_1L_1BS : SchedWriteRes<[Ampere1BUnitL, - Ampere1BUnitBS]> { - let Latency = 5; - let NumMicroOps = 2; -} - -def Ampere1BWrite_5cyc_3L : SchedWriteRes<[Ampere1BUnitL, - Ampere1BUnitL, - Ampere1BUnitL]> { - let Latency = 5; - let NumMicroOps = 3; -} - -def Ampere1BWrite_5cyc_4L : SchedWriteRes<[Ampere1BUnitL, - Ampere1BUnitL, - Ampere1BUnitL, - Ampere1BUnitL]> { - let Latency = 5; - let NumMicroOps = 4; -} - -def Ampere1BWrite_5cyc_1X : SchedWriteRes<[Ampere1BUnitX]> { - let Latency = 5; - let NumMicroOps = 1; -} - -def Ampere1BWrite_5cyc_2XY_2S_2Z : SchedWriteRes<[Ampere1BUnitXY, Ampere1BUnitXY, - Ampere1BUnitS, Ampere1BUnitS, - Ampere1BUnitZ, Ampere1BUnitZ]> { - let Latency = 5; - let NumMicroOps = 6; -} - -def Ampere1BWrite_6cyc_1BS_1A : SchedWriteRes<[Ampere1BUnitBS, Ampere1BUnitA]> { - let Latency = 6; - let NumMicroOps = 2; -} - -def Ampere1BWrite_6cyc_1BS_2A : SchedWriteRes<[Ampere1BUnitBS, Ampere1BUnitA, - Ampere1BUnitA]> { - let Latency = 6; - let NumMicroOps = 3; -} - -def Ampere1BWrite_6cyc_1X : SchedWriteRes<[Ampere1BUnitX]> { - let Latency = 6; - let NumMicroOps = 2; -} - -def Ampere1BWrite_6cyc_2XY : SchedWriteRes<[Ampere1BUnitXY, Ampere1BUnitXY]> { - let Latency = 6; - let NumMicroOps = 2; -} - -def Ampere1BWrite_6cyc_3XY : SchedWriteRes<[Ampere1BUnitXY, Ampere1BUnitXY, - Ampere1BUnitXY]> { - let Latency = 6; - let NumMicroOps = 3; -} - -def Ampere1BWrite_6cyc_2XY_2S_2Z : SchedWriteRes<[Ampere1BUnitXY, Ampere1BUnitXY, - Ampere1BUnitS, Ampere1BUnitS, - Ampere1BUnitZ, Ampere1BUnitZ]> { - let Latency = 6; - let NumMicroOps = 6; -} - -def Ampere1BWrite_6cyc_3XY_3S_3Z : SchedWriteRes<[Ampere1BUnitXY, Ampere1BUnitXY, Ampere1BUnitXY, - Ampere1BUnitS, Ampere1BUnitS, Ampere1BUnitS, - Ampere1BUnitZ, Ampere1BUnitZ, Ampere1BUnitZ]> { - let Latency = 6; - let NumMicroOps = 9; -} - -def Ampere1BWrite_7cyc_1BS_1XY : SchedWriteRes<[Ampere1BUnitBS, Ampere1BUnitXY]> { - let Latency = 7; - let NumMicroOps = 2; -} - -def Ampere1BWrite_7cyc_1XY_1Z : SchedWriteRes<[Ampere1BUnitXY, Ampere1BUnitZ]> { - let Latency = 7; - let NumMicroOps = 2; -} - -def Ampere1BWrite_7cyc_4XY_4S_4Z : SchedWriteRes<[Ampere1BUnitXY, Ampere1BUnitXY, - Ampere1BUnitXY, Ampere1BUnitXY, - Ampere1BUnitS, Ampere1BUnitS, - Ampere1BUnitS, Ampere1BUnitS, - Ampere1BUnitZ, Ampere1BUnitZ, - Ampere1BUnitZ, Ampere1BUnitZ]> { - let Latency = 7; - let NumMicroOps = 12; -} - -def Ampere1BWrite_8cyc_1BS_1L : SchedWriteRes<[Ampere1BUnitBS, Ampere1BUnitL]> { - let Latency = 8; - let NumMicroOps = 2; -} - -def Ampere1BWrite_8cyc_1BS_1XY : SchedWriteRes<[Ampere1BUnitBS, Ampere1BUnitXY]> { - let Latency = 8; - let NumMicroOps = 2; -} - -def Ampere1BWrite_8cyc_2XY : SchedWriteRes<[Ampere1BUnitXY, Ampere1BUnitXY]> { - let Latency = 8; - let NumMicroOps = 2; -} - -def Ampere1BWrite_8cyc_4XY : SchedWriteRes<[Ampere1BUnitXY, Ampere1BUnitXY, - Ampere1BUnitXY, Ampere1BUnitXY]> { - let Latency = 8; - let NumMicroOps = 4; -} - -def Ampere1BWrite_9cyc_6XY_4S_4Z : SchedWriteRes<[Ampere1BUnitXY, Ampere1BUnitXY, - Ampere1BUnitXY, Ampere1BUnitXY, - Ampere1BUnitXY, Ampere1BUnitXY, - Ampere1BUnitS, Ampere1BUnitS, - Ampere1BUnitS, Ampere1BUnitS, - Ampere1BUnitZ, Ampere1BUnitZ, - Ampere1BUnitZ, Ampere1BUnitZ]> { - let Latency = 9; - let NumMicroOps = 14; -} - -def Ampere1BWrite_9cyc_1A_1BS_1X : SchedWriteRes<[Ampere1BUnitA, Ampere1BUnitBS, Ampere1BUnitX]> { - let Latency = 9; - let NumMicroOps = 3; -} - -def Ampere1BWrite_9cyc_1A_1BS_1XY : SchedWriteRes<[Ampere1BUnitA, Ampere1BUnitBS, Ampere1BUnitXY]> { - let Latency = 9; - let NumMicroOps = 3; -} - -def Ampere1BWrite_9cyc_1X : SchedWriteRes<[Ampere1BUnitX]> { - let Latency = 9; - let NumMicroOps = 1; -} - -def Ampere1BWrite_9cyc_3XY : SchedWriteRes<[Ampere1BUnitXY, Ampere1BUnitXY, Ampere1BUnitXY]> { - let Latency = 9; - let NumMicroOps = 3; -} - -def Ampere1BWrite_11cyc_1BS_2XY : SchedWriteRes<[Ampere1BUnitBS, Ampere1BUnitXY, Ampere1BUnitXY]> { - let Latency = 11; - let NumMicroOps = 3; -} - -def Ampere1BWrite_12cyc_1X : SchedWriteRes<[Ampere1BUnitX]> { - let Latency = 12; - let NumMicroOps = 1; -} - -def Ampere1BWrite_13cyc_1BS_1X : SchedWriteRes<[Ampere1BUnitBS, Ampere1BUnitX]> { - let Latency = 13; - let NumMicroOps = 2; -} - -def Ampere1BWrite_17cyc_1X : SchedWriteRes<[Ampere1BUnitX]> { - let Latency = 17; - let NumMicroOps = 1; -} - -def Ampere1BWrite_19cyc_2BS_1X : SchedWriteRes<[Ampere1BUnitBS, - Ampere1BUnitBS, - Ampere1BUnitX]> { - let Latency = 13; - let NumMicroOps = 3; -} - -def Ampere1BWrite_19cyc_1X : SchedWriteRes<[Ampere1BUnitX]> { - let Latency = 19; - let NumMicroOps = 1; -} - -def Ampere1BWrite_21cyc_1X : SchedWriteRes<[Ampere1BUnitX]> { - let Latency = 21; - let NumMicroOps = 1; -} - -def Ampere1BWrite_33cyc_1X : SchedWriteRes<[Ampere1BUnitX]> { - let Latency = 33; - let NumMicroOps = 1; -} - -def Ampere1BWrite_39cyc_1X : SchedWriteRes<[Ampere1BUnitX]> { - let Latency = 39; - let NumMicroOps = 1; -} - -def Ampere1BWrite_63cyc_1X : SchedWriteRes<[Ampere1BUnitX]> { - let Latency = 63; - let NumMicroOps = 1; -} - -// For basic arithmetic, we have more flexibility for short shifts (LSL shift <= 4), -// which are a single uop, and for extended registers, which have full flexibility -// across Unit A or B for both uops. -def Ampere1BWrite_Arith : SchedWriteVariant<[ - SchedVar, - SchedVar, - SchedVar]>; - -def Ampere1BWrite_ArithFlagsetting : SchedWriteVariant<[ - SchedVar, - SchedVar, - SchedVar]>; - -//===----------------------------------------------------------------------===// -// Map the target-defined scheduler read/write resources and latencies for Ampere-1. -// This provides a coarse model, which is then specialised below. - -def : WriteRes; // MOVN, MOVZ -def : WriteRes; // ALU -def : WriteRes { - let Latency = 2; - let NumMicroOps = 2; -} // ALU of Shifted-Reg -def : WriteRes { - let Latency = 2; - let NumMicroOps = 2; -} // ALU of Extended-Reg -def : WriteRes; // EXTR shifts a reg pair -def : WriteRes; // Shift/Scale -def : WriteRes { - let Latency = 13; -} // 32-bit Divide -def : WriteRes { - let Latency = 19; -} // 64-bit Divide -def : WriteRes { - let Latency = 3; -} // 32-bit Multiply -def : WriteRes { - let Latency = 3; -} // 64-bit Multiply -def : WriteRes; -def : WriteRes; -def : WriteRes { - let Latency = 3; -} // Load from base addr plus immediate offset -def : WriteRes { - let Latency = 1; -} // Store to base addr plus immediate offset -def : WriteRes { - let Latency = 1; - let NumMicroOps = 1; -} // Store a register pair. -def : WriteRes; -def : WriteRes { - let Latency = 3; - let NumMicroOps = 1; -} // Load from a register index (maybe scaled). -def : WriteRes { - let Latency = 1; - let NumMicroOps = 2; -} // Store to a register index (maybe scaled). -def : WriteRes { - let Latency = 2; -} // General floating-point ops. -def : WriteRes { - let Latency = 3; -} // Floating-point compare. -def : WriteRes { - let Latency = 3; -} // Float conversion. -def : WriteRes { -} // Float-int register copy. -def : WriteRes { - let Latency = 2; -} // Float-int register copy. -def : WriteRes { - let Latency = 4; -} // Floating-point multiply. -def : WriteRes { - let Latency = 19; -} // Floating-point division. -def : WriteRes { - let Latency = 3; -} // 64bit Vector D ops. -def : WriteRes { - let Latency = 3; -} // 128bit Vector Q ops. -def : WriteRes { - let Latency = 4; -} // Vector loads. -def : WriteRes { - let Latency = 2; -} // Vector stores. - -def : WriteRes { let Unsupported = 1; } - -def : WriteRes { let Latency = 1; } -def : WriteRes { let Latency = 1; } -def : WriteRes { let Latency = 1; } - -def : WriteRes { - let Latency = 3; -} // The second register of a load-pair: LDP,LDPSW,LDNP,LDXP,LDAXP - -// Forwarding logic. -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; - -//===----------------------------------------------------------------------===// -// Specialising the scheduling model further for Ampere-1B. - -def : InstRW<[Ampere1BWrite_1cyc_1AB], (instrs COPY)>; - -// Branch instructions -def : InstRW<[Ampere1BWrite_1cyc_1A], (instrs Bcc, BL, RET)>; -def : InstRW<[Ampere1BWrite_1cyc_1A], - (instrs CBZW, CBZX, CBNZW, CBNZX, TBZW, TBZX, TBNZW, TBNZX)>; -def : InstRW<[Ampere1BWrite_1cyc_2A], (instrs BLR)>; - -// Common Short Sequence Compression (CSSC) -def : InstRW<[Ampere1BWrite_1cyc_1AB], (instrs ABS)>; -def : InstRW<[Ampere1BWrite_1cyc_1BS], (instrs CNT)>; -def : InstRW<[Ampere1BWrite_1cyc_1AB_1A], (instrs SMAX, SMIN)>; -def : InstRW<[Ampere1BWrite_1cyc_1B], (instrs CTZ)>; -def : InstRW<[Ampere1BWrite_1cyc_1AB_1A], (instrs UMAX, USMIN)>; - -// Cryptography instructions -// -- AES encryption/decryption -def : InstRW<[Ampere1BWrite_2cyc_1XY], (instregex "^AES[DE]")>; -def : InstRW<[Ampere1BWrite_2cyc_1XY], (instregex "^AESI?MC")>; -// -- Polynomial multiplication -def : InstRW<[Ampere1BWrite_2cyc_1XY], (instregex "^PMUL", "^PMULL")>; -// -- SHA-256 hash -def : InstRW<[Ampere1BWrite_4cyc_1X], (instregex "^SHA256(H|H2)")>; -// -- SHA-256 schedule update -def : InstRW<[Ampere1BWrite_2cyc_1Y], (instregex "^SHA256SU[01]")>; -// -- SHA-3 instructions -def : InstRW<[Ampere1BWrite_2cyc_1XY], - (instregex "^BCAX", "^EOR3", "^RAX1", "^XAR")>; -// -- SHA-512 hash -def : InstRW<[Ampere1BWrite_4cyc_1X], (instregex "^SHA512(H|H2)")>; -// -- SHA-512 schedule update -def : InstRW<[Ampere1BWrite_2cyc_1Y], (instregex "^SHA512SU[01]")>; -// -- SHA1 choose/majority/parity -def : InstRW<[Ampere1BWrite_4cyc_1X], (instregex "^SHA1[CMP]")>; -// -- SHA1 hash/schedule update -def : InstRW<[Ampere1BWrite_2cyc_1Y], (instregex "^SHA1SU[01]")>; -def : InstRW<[Ampere1BWrite_2cyc_1Y], (instregex "^SHA1H")>; -// -- SM3 hash -def : InstRW<[Ampere1BWrite_2cyc_1XY], - (instregex "^SM3PARTW[12]$", "^SM3SS1$", "^SM3TT[12][AB]$"0)>; -def : InstRW<[Ampere1BWrite_4cyc_1X], (instrs SM4E, SM4ENCKEY)>; - -// FP and vector load instructions -// -- Load 1-element structure to one/all lanes -// ---- all lanes -def : InstRW<[Ampere1BWrite_6cyc_1L_1XY], - (instregex "^LD1Rv(8b|4h|2s|16b|8h|4s|2d)")>; -// ---- one lane -def : InstRW<[Ampere1BWrite_6cyc_1L_1XY], - (instregex "^LD1i(8|16|32|64)")>; -// -- Load 1-element structure to one/all lanes, 1D size -def : InstRW<[Ampere1BWrite_4cyc_1L], - (instregex "^LD1Rv1d")>; -// -- Load 1-element structures to 1 register -def : InstRW<[Ampere1BWrite_4cyc_1L], - (instregex "^LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)")>; -// -- Load 1-element structures to 2 registers -def : InstRW<[Ampere1BWrite_4cyc_2L], - (instregex "^LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)")>; -// -- Load 1-element structures to 3 registers -def : InstRW<[Ampere1BWrite_5cyc_3L], - (instregex "^LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)")>; -// -- Load 1-element structures to 4 registers -def : InstRW<[Ampere1BWrite_5cyc_4L], - (instregex "^LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)")>; -// -- Load 2-element structure to all lanes of 2 registers, 1D size -def : InstRW<[Ampere1BWrite_4cyc_2L], - (instregex "^LD2Rv1d")>; -// -- Load 2-element structure to all lanes of 2 registers, other sizes -def : InstRW<[Ampere1BWrite_6cyc_2L_2XY], - (instregex "^LD2Rv(8b|4h|2s|16b|8h|4s|2d)")>; -// -- Load 2-element structure to one lane of 2 registers -def : InstRW<[Ampere1BWrite_6cyc_2L_2XY], - (instregex "^LD2i(8|16|32|64)")>; -// -- Load 2-element structures to 2 registers, 16B/8H/4S/2D size -def : InstRW<[Ampere1BWrite_6cyc_2L_2XY], - (instregex "^LD2Twov(16b|8h|4s|2d)")>; -// -- Load 2-element structures to 2 registers, 8B/4H/2S size -def : InstRW<[Ampere1BWrite_8cyc_2L_3XY], - (instregex "^LD2Twov(8b|4h|2s)")>; -// -- Load 3-element structure to all lanes of 3 registers, 1D size -def : InstRW<[Ampere1BWrite_5cyc_3L], - (instregex "^LD3Rv1d")>; -// -- Load 3-element structure to all lanes of 3 registers, other sizes -def : InstRW<[Ampere1BWrite_7cyc_3L_3XY], - (instregex "^LD3Rv(8b|4h|2s|16b|8h|4s|2d)")>; -// -- Load 3-element structure to one lane of 3 registers -def : InstRW<[Ampere1BWrite_7cyc_3L_3XY], - (instregex "^LD3i(8|16|32|64)")>; -// -- Load 3-element structures to 3 registers, 16B/8H/4S sizes -def : InstRW<[Ampere1BWrite_8cyc_3L_3XY], - (instregex "^LD3Threev(16b|8h|4s)")>; -// -- Load 3-element structures to 3 registers, 2D size -def : InstRW<[Ampere1BWrite_7cyc_3L_3XY], - (instregex "^LD3Threev2d")>; -// -- Load 3-element structures to 3 registers, 8B/4H/2S sizes -def : InstRW<[Ampere1BWrite_9cyc_3L_3XY], - (instregex "^LD3Threev(8b|4h|2s)")>; -// -- Load 4-element structure to all lanes of 4 registers, 1D size -def : InstRW<[Ampere1BWrite_5cyc_4L], - (instregex "^LD4Rv1d")>; -// -- Load 4-element structure to all lanes of 4 registers, other sizes -def : InstRW<[Ampere1BWrite_7cyc_4L_4XY], - (instregex "^LD4Rv(8b|4h|2s|16b|8h|4s|2d)")>; -// -- Load 4-element structure to one lane of 4 registers -def : InstRW<[Ampere1BWrite_7cyc_4L_4XY], - (instregex "^LD4i(8|16|32|64)")>; -// -- Load 4-element structures to 4 registers, 2D size -def : InstRW<[Ampere1BWrite_8cyc_4L_4XY], - (instregex "^LD4Fourv2d")>; -// -- Load 4-element structures to 4 registers, 2S size -def : InstRW<[Ampere1BWrite_11cyc_4L_8XY], - (instregex "^LD4Fourv2s")>; -// -- Load 4-element structures to 4 registers, other sizes -def : InstRW<[Ampere1BWrite_10cyc_4L_8XY], - (instregex "^LD4Fourv(8b|4h|16b|8h|4s)")>; -// -- Load pair, Q-form -def : InstRW<[Ampere1BWrite_4cyc_2L], (instregex "LDN?PQ")>; -// -- Load pair, S/D-form -def : InstRW<[Ampere1BWrite_5cyc_1L_1BS], (instregex "LDN?P(S|D)")>; -// -- Load register -def : InstRW<[Ampere1BWrite_4cyc_1L], (instregex "LDU?R[BHSDQ]i")>; -// -- Load register, sign-extended register -def : InstRW<[Ampere1BWrite_4cyc_1L], (instregex "LDR[BHSDQ]ro(W|X)")>; - -// FP and vector store instructions -// -- Store 1-element structure from one lane of 1 register -def : InstRW<[Ampere1BWrite_4cyc_1XY_1S_1Z], - (instregex "^ST1i(8|16|32|64)")>; -// -- Store 1-element structures from 1 register -def : InstRW<[Ampere1BWrite_2cyc_1S_1Z], - (instregex "^ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)")>; -// -- Store 1-element structures from 2 registers -def : InstRW<[Ampere1BWrite_3cyc_2S_2Z], - (instregex "^ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)")>; -// -- Store 1-element structures from 3 registers -def : InstRW<[Ampere1BWrite_4cyc_3S_3Z], - (instregex "^ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)")>; -// -- Store 1-element structures from 4 registers -def : InstRW<[Ampere1BWrite_5cyc_4S_4Z], - (instregex "^ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)")>; -// -- Store 2-element structure from one lane of 2 registers -def : InstRW<[Ampere1BWrite_5cyc_2XY_2S_2Z], - (instregex "^ST2i(8|16|32|64)")>; -// -- Store 2-element structures from 2 registers, 16B/8H/4S/2D sizes -def : InstRW<[Ampere1BWrite_5cyc_2XY_2S_2Z], - (instregex "^ST2Twov(16b|8h|4s|2d)")>; -// -- Store 2-element structures from 2 registers, 8B/4H/2S sizes -def : InstRW<[Ampere1BWrite_6cyc_2XY_2S_2Z], - (instregex "^ST2Twov(8b|4h|2s)")>; -// -- Store 3-element structure from one lane of 3 registers -def : InstRW<[Ampere1BWrite_6cyc_3XY_3S_3Z], - (instregex "^ST3i(8|16|32|64)")>; -// -- Store 3-element structures from 3 registers -def : InstRW<[Ampere1BWrite_6cyc_3XY_3S_3Z], - (instregex "^ST3Threev(8b|4h|2s|1d|16b|8h|4s|2d)")>; -// -- Store 4-element structure from one lane of 4 registers -def : InstRW<[Ampere1BWrite_7cyc_4XY_4S_4Z], - (instregex "^ST4i(8|16|32|64)")>; -// -- Store 4-element structures from 4 registers, 16B/8H/4S sizes -def : InstRW<[Ampere1BWrite_7cyc_4XY_4S_4Z], - (instregex "^ST4Fourv(16b|8h|4s)")>; -// -- Store 4-element structures from 4 registers, 2D sizes -def : InstRW<[Ampere1BWrite_7cyc_4XY_4S_4Z], - (instregex "^ST4Fourv2d")>; -// -- Store 4-element structures from 4 registers, 8B/4H/2S sizes -def : InstRW<[Ampere1BWrite_9cyc_6XY_4S_4Z], - (instregex "^ST4Fourv(8b|4h|2s)")>; -// -- Store pair, Q-form -def : InstRW<[Ampere1BWrite_3cyc_2S_2Z], (instregex "^STN?PQ")>; -// -- Store pair, S/D-form -def : InstRW<[Ampere1BWrite_3cyc_2S_2Z], (instregex "^STN?P[SD]")>; -// -- Store register -def : InstRW<[Ampere1BWrite_2cyc_1S_2Z], (instregex "^STU?R[BHSDQ](ui|i)")>; -// -- Store register, sign-extended register offset -def : InstRW<[Ampere1BWrite_2cyc_1S_1Z], (instregex "^STR[BHSDQ]ro[XW]")>; - -// FP data processing, bfloat16 format -def : InstRW<[Ampere1BWrite_3cyc_1XY], (instrs BFCVT)>; -def : InstRW<[Ampere1BWrite_8cyc_2XY], (instrs BFCVTN, BFCVTN2)>; -def : InstRW<[Ampere1BWrite_2cyc_1XY], (instregex "^BFDOTv", "^BF16DOT")>; -def : InstRW<[Ampere1BWrite_3cyc_1XY], (instrs BFMMLA)>; -def : InstRW<[Ampere1BWrite_4cyc_1XY], (instregex "^BFMLAL")>; - -// FP data processing, scalar/vector, half precision -def : InstRW<[Ampere1BWrite_3cyc_1XY], (instregex "^F(ABD|ABS)v.[fi]16")>; -def : InstRW<[Ampere1BWrite_3cyc_1XY], - (instregex "^F(ADD|ADDP|CADD|NEG|NMUL|SUB)v.[fi]16")>; -def : InstRW<[Ampere1BWrite_3cyc_1XY], - (instregex "^F(AC|CM)(EQ|GE|GT|LE|LT)v.[fi]16")>; -def : InstRW<[Ampere1BWrite_3cyc_1XY], - (instregex "^F(AC|CM)(EQ|GE|GT|LE|LT)16")>; -def : InstRW<[Ampere1BWrite_3cyc_1X], - (instregex "^FCMPE?H")>; -def : InstRW<[Ampere1BWrite_9cyc_1A_1BS_1X], - (instregex "^FCCMPE?H")>; -def : InstRW<[Ampere1BWrite_9cyc_1A_1BS_1XY], - (instregex "^FCSELH")>; -def : InstRW<[Ampere1BWrite_3cyc_1XY], (instregex "^FCVT[AMNPZ][SU]v.[if]16")>; -// Convert FP to integer, H-form -def : InstRW<[Ampere1BWrite_3cyc_1XY], (instregex "^[SUd]CVTFv.[fi]16")>; -// Convert to FP from GPR, H-form -def : InstRW<[Ampere1BWrite_8cyc_1BS_1XY], (instregex "^[SU]CVTF_ZPmZ_[DSH]toH$")>; -// Convert to FP from GPR, fixed-point, H-form -def : InstRW<[Ampere1BWrite_11cyc_1BS_2XY], (instregex "^[SU]CVTF[SU][WX]Hri$")>; -def : InstRW<[Ampere1BWrite_9cyc_1X], (instrs FDIVHrr)>; -def : InstRW<[Ampere1BWrite_17cyc_1X], (instregex "^FDIVv.[if]16")>; -def : InstRW<[Ampere1BWrite_3cyc_1XY], (instregex "^F(MAX|MIN)(NM)?P?v.[if]16")>; -def : InstRW<[Ampere1BWrite_6cyc_2XY], (instregex "^F(MAX|MIN)(NM)?Vv4[if]16")>; -def : InstRW<[Ampere1BWrite_9cyc_3XY], (instregex "^F(MAX|MIN)(NM)?Vv8[if]16")>; -def : InstRW<[Ampere1BWrite_4cyc_1XY], (instregex "^FMULX?v.[if]16")>; -def : InstRW<[Ampere1BWrite_4cyc_1XY], (instrs FMULX16)>; -def : InstRW<[Ampere1BWrite_4cyc_1XY], (instregex "^FN?M(ADD|SUB)[H]rrr")>; -def : InstRW<[Ampere1BWrite_4cyc_1XY], (instregex "^FML[AS]v.[if]16")>; -def : InstRW<[Ampere1BWrite_3cyc_1XY], (instregex "^FRECPXv.[if]16")>; -def : InstRW<[Ampere1BWrite_4cyc_1XY], (instregex "^F(RECP|RSQRT)S16")>; -def : InstRW<[Ampere1BWrite_3cyc_1XY], (instregex "^FRINT[AIMNPXZ]v.[if]16")>; -// FP square root, H-form -def : InstRW<[Ampere1BWrite_21cyc_1X], (instrs FSQRTHr)>; -// FP square root, vector-form, F16 -def : InstRW<[Ampere1BWrite_39cyc_1X], (instregex "^FSQRTv.f16")>; - -// FP data processing, scalar/vector, single/double precision -def : InstRW<[Ampere1BWrite_3cyc_1XY], (instregex "^F(ABD|ABS)v.[fi](32|64)")>; -def : InstRW<[Ampere1BWrite_3cyc_1XY], - (instregex "^F(ADD|ADDP|CADD|NEG|NMUL|SUB)v.[fi](32|64)")>; -def : InstRW<[Ampere1BWrite_3cyc_1XY], - (instregex "^F(AC|CM)(EQ|GE|GT|LE|LT)v.[fi](32|64)")>; -def : InstRW<[Ampere1BWrite_3cyc_1XY], - (instregex "^F(AC|CM)(EQ|GE|GT|LE|LT)(32|64)")>; -def : InstRW<[Ampere1BWrite_3cyc_1X], - (instregex "^FCMPE?(S|D)")>; -def : InstRW<[Ampere1BWrite_9cyc_1A_1BS_1X], - (instregex "^FCCMPE?(S|D)")>; -def : InstRW<[Ampere1BWrite_9cyc_1A_1BS_1XY], - (instregex "^FCSEL(S|D)")>; -def : InstRW<[Ampere1BWrite_3cyc_1XY], (instregex "^FCVT[AMNPZ][SU]v.[if](32|64)")>; -// Convert FP to integer, S/D-form -def : InstRW<[Ampere1BWrite_3cyc_1XY], (instregex "^[SUd]CVTFv.[fi](32|64)")>; -// Convert to FP from GPR, S/D-form -def : InstRW<[Ampere1BWrite_8cyc_1BS_1XY], (instregex "^[SU]CVTF_ZPmZ_[DSH]to[DS]$")>; -// Convert to FP from GPR, fixed-point, S/D-form -def : InstRW<[Ampere1BWrite_11cyc_1BS_2XY], (instregex "^[SU]CVTF[SU][WX][SD]ri$")>; -def : InstRW<[Ampere1BWrite_19cyc_1X], (instregex "^FDIVv.[if](64)", "FDIVD")>; -def : InstRW<[Ampere1BWrite_12cyc_1X], (instregex "^FDIVv.[if](32)", "FDIVS")>; -def : InstRW<[Ampere1BWrite_3cyc_1XY], (instregex "^F(MAX|MIN)(NM)?P?v.[if](32|64)")>; -def : InstRW<[Ampere1BWrite_6cyc_2XY], (instregex "^F(MAX|MIN)(NM)?Vv.[if](32|64)")>; -def : InstRW<[Ampere1BWrite_4cyc_1XY], (instregex "^FMULX?v.[if](32|64)")>; -def : InstRW<[Ampere1BWrite_4cyc_1XY], (instrs FMULX32, FMULX64)>; -def : InstRW<[Ampere1BWrite_4cyc_1XY], (instregex "^FN?MUL")>; -def : InstRW<[Ampere1BWrite_4cyc_1XY], (instregex "^FN?M(ADD|SUB)[SD]rrr")>; -def : InstRW<[Ampere1BWrite_4cyc_1XY], (instregex "^FML[AS]v.[if](32|64)")>; -def : InstRW<[Ampere1BWrite_3cyc_1XY], (instregex "^FRECPXv.[if](32|64)")>; -def : InstRW<[Ampere1BWrite_3cyc_1XY], (instregex "^F(RECP|RSQRT)S(32|64)")>; -def : InstRW<[Ampere1BWrite_3cyc_1XY], (instregex "^FRINT[AIMNPXZ]v.[if](32|64)")>; -def : InstRW<[Ampere1BWrite_3cyc_1XY], (instregex "^FRINT(32|64)")>; -def : InstRW<[Ampere1BWrite_63cyc_1X], (instregex "^FSQRTv.f64", "^FSQRTDr")>; -def : InstRW<[Ampere1BWrite_33cyc_1X], (instregex "^FSQRTv.f32", "^FSQRTSr")>; - -// FP miscellaneous instructions -def : InstRW<[Ampere1BWrite_7cyc_1XY_1Z], (instregex "^FCVT[AMNPZ][SU][SU][XW][HSD]r")>; -def : InstRW<[Ampere1BWrite_3cyc_1XY], (instregex "^FCVT[HSD]Hr")>; -def : InstRW<[Ampere1BWrite_3cyc_1XY], (instregex "^FCVT[HSD][SD]r")>; -def : InstRW<[Ampere1BWrite_3cyc_1XY], (instregex "^FCVTLv")>; -def : InstRW<[Ampere1BWrite_3cyc_1XY], (instregex "^FCVT(N|XN)v")>; -def : InstRW<[Ampere1BWrite_7cyc_1X_1Z], (instrs FJCVTZS)>; -def : InstRW<[Ampere1BWrite_5cyc_1BS], (instregex "^FMOV[HSD][WX]r")>; -def : InstRW<[Ampere1BWrite_7cyc_1BS_1XY], (instregex "^FMOVDXHighr")>; -def : InstRW<[Ampere1BWrite_2cyc_1XY], (instregex "^FMOV[HSD][ri]")>; -def : InstRW<[Ampere1BWrite_5cyc_1X], (instregex "^FMOVXDHighr")>; -def : InstRW<[Ampere1BWrite_3cyc_1Z], (instregex "^FMOV[WX][HSD]r")>; - -// Integer arithmetic and logical instructions -def : InstRW<[Ampere1BWrite_1cyc_1A], - (instregex "ADC(W|X)r", "SBC(W|X)r")>; -def : InstRW<[Ampere1BWrite_Arith], - (instregex "(ADD|AND|BIC|EON|EOR|ORN|ORR|SUB)[WX]r[sx]")>; -def : InstRW<[Ampere1BWrite_1cyc_1AB], - (instregex "(ADD|AND|BIC|EON|EOR|ORN|ORR|SUB)[WX]r[ri]")>; -def : InstRW<[Ampere1BWrite_ArithFlagsetting], - (instregex "(ADD|AND|BIC|SUB)S[WX]r[sx]")>; -def : InstRW<[Ampere1BWrite_1cyc_1A], - (instregex "(ADD|AND|BIC|SUB)S[WX]r[ri]")>; -def : InstRW<[Ampere1BWrite_1cyc_1A], - (instregex "(ADC|SBC)S[WX]r")>; -def : InstRW<[Ampere1BWrite_1cyc_1A], (instrs RMIF)>; -def : InstRW<[Ampere1BWrite_1cyc_1A], - (instregex "(CCMN|CCMP)(X|W)")>; -def : InstRW<[Ampere1BWrite_1cyc_1A], - (instregex "(CSEL|CSINC|CSINV|CSNEG)(X|W)")>; -def : InstRW<[Ampere1BWrite_13cyc_1BS_1X], (instrs SDIVWr, UDIVWr)>; -def : InstRW<[Ampere1BWrite_19cyc_2BS_1X], (instrs SDIVXr, UDIVXr)>; -def : InstRW<[Ampere1BWrite_3cyc_1BS], - (instregex "(S|U)MULHr")>; -def : InstRW<[Ampere1BWrite_4cyc_1BS_1AB], - (instregex "(S|U)?M(ADD|SUB)L?r")>; - -// Integer load instructions -def : InstRW<[Ampere1BWrite_3cyc_1L], - (instregex "(LDNP|LDP|LDPSW)(X|W)")>; -def : InstRW<[Ampere1BWrite_3cyc_1L], - (instregex "LDR(B|D|H|Q|S)ui")>; -def : InstRW<[Ampere1BWrite_3cyc_1L], - (instregex "LDR(D|Q|W|X)l")>; -def : InstRW<[Ampere1BWrite_3cyc_1L], - (instregex "LDTR(B|H|W|X)i")>; -def : InstRW<[Ampere1BWrite_3cyc_1L], - (instregex "LDTRS(BW|BX|HW|HX|W)i")>; -def : InstRW<[Ampere1BWrite_3cyc_1L], - (instregex "LDUR(BB|HH|X|W)i")>; -def : InstRW<[Ampere1BWrite_3cyc_1L], - (instregex "LDURS(BW|BX|HW|HX|W)i")>; -def : InstRW<[Ampere1BWrite_3cyc_1L], - (instregex "LDR(HH|SHW|SHX|W|X)ro(W|X)")>; -def : InstRW<[Ampere1BWrite_1cyc_1L], - (instrs PRFMl, PRFUMi, PRFUMi)>; -def : InstRW<[Ampere1BWrite_1cyc_1L], - (instrs PRFMroW, PRFMroX)>; - -// Integer miscellaneous instructions -def : InstRW<[Ampere1BWrite_1cyc_1A], (instrs ADR, ADRP)>; -def : InstRW<[Ampere1BWrite_1cyc_1B], (instregex "EXTR(W|X)")>; -def : InstRW<[Ampere1BWrite_1cyc_1B], (instregex "(S|U)?BFM(W|X)")>; -def : InstRW<[Ampere1BWrite_3cyc_1BS], (instregex "^CRC32C?[BHWX]")>; -def : InstRW<[Ampere1BWrite_1cyc_1B], (instregex "CLS(W|X)")>; -def : InstRW<[Ampere1BWrite_1cyc_1A], (instrs SETF8, SETF16)>; -def : InstRW<[Ampere1BWrite_1cyc_1AB], - (instrs MOVKWi, MOVKXi, MOVNWi, MOVNXi, MOVZWi, MOVZXi)>; -def : InstRW<[Ampere1BWrite_1cyc_1B], - (instregex "(RBIT|REV|REV16)(W|X)r", "REV32Xr")>; -def : InstRW<[Ampere1BWrite_1cyc_1B], - (instregex "(ASR|LSL|LSR|ROR)V(W|X)r")>; - -// Integer store instructions -def : InstRW<[Ampere1BWrite_1cyc_2S], (instregex "STNP(X|W)i")>; -def : InstRW<[Ampere1BWrite_1cyc_2S], (instrs STPXi)>; -def : InstRW<[Ampere1BWrite_2cyc_1B_1S], (instrs STPWi)>; -def : InstRW<[Ampere1BWrite_2cyc_1B_1S_1AB], (instregex "STP(W|X)(pre|post)")>; -def : InstRW<[Ampere1BWrite_1cyc_1S], (instrs STTRBi, STTRHi, STTRWi, STTRXi)>; -def : InstRW<[Ampere1BWrite_1cyc_1S], (instregex "STUR(BB|HH|X|W)i", - "STR(X|W)ui", - "STUR(BB|HH|X|W)i")>; -def : InstRW<[Ampere1BWrite_1cyc_2S], (instrs STRWroX, STRXroX)>; -def : InstRW<[Ampere1BWrite_1cyc_2S], (instrs STRWroW, STRXroW)>; - -// Memory tagging - -// Insert Random Tags -def : InstRW<[Ampere1BWrite_1cyc_1BS_1B], (instrs IRG, IRGstack)>; -// Load allocation tag -def : InstRW<[Ampere1BWrite_4cyc_1L_1B], (instrs LDG, LDGM)>; -// Store allocation tags -def : InstRW<[Ampere1BWrite_1cyc_1S], - (instrs STGi, STGM, STGPreIndex, STGPostIndex)>; -// Store allocation tags and pair of registers -def : InstRW<[Ampere1BWrite_1cyc_2S], - (instrs STGPi, STGPpre, STGPpost)>; -// Store allocation tags and zero data -def : InstRW<[Ampere1BWrite_1cyc_1S], - (instrs STZGi, STZGM, STZGPreIndex, STZGPostIndex)>; -// Store two tags -def : InstRW<[Ampere1BWrite_1cyc_2S], - (instrs ST2Gi, ST2GPreIndex, ST2GPostIndex)>; -// Store two tags and zero data -def : InstRW<[Ampere1BWrite_1cyc_2S], - (instrs STZ2Gi, STZ2GPreIndex, STZ2GPostIndex)>; -// Subtract Pointer -def : InstRW<[Ampere1BWrite_1cyc_1AB], (instrs SUBP)>; -// Subtract Pointer, flagset -def : InstRW<[Ampere1BWrite_1cyc_1AB], (instrs SUBPS)>; -// Insert Tag Mask -def : InstRW<[Ampere1BWrite_1cyc_1AB], (instrs GMI)>; -// Arithmetic, immediate to logical address tag -def : InstRW<[Ampere1BWrite_1cyc_B], (instrs ADDG, SUBG)>; - -// Pointer authentication -def : InstRW<[Ampere1BWrite_5cyc_1BS], (instregex "^AUT")>; -def : InstRW<[Ampere1BWrite_6cyc_1BS_1A], - (instregex "BRA(A|AZ|B|BZ)", "RETA(A|B)", "ERETA(A|B)")>; -def : InstRW<[Ampere1BWrite_6cyc_1BS_2A], - (instrs BLRAA, BLRAAZ, BLRAB, BLRABZ)>; -def : InstRW<[Ampere1BWrite_5cyc_1BS], (instregex "^PAC")>; -def : InstRW<[Ampere1BWrite_8cyc_1BS_1L], (instregex "^LDRA(A|B)")>; -def : InstRW<[Ampere1BWrite_1cyc_1B], (instrs XPACD, XPACI)>; - -// Vector integer instructions -// -- absolute difference -def : InstRW<[Ampere1BWrite_2cyc_1XY], - (instregex "^SABAv", "^SABALv", "^SABDv", "^SABDLv", - "^UABAv", "^UABALv", "^UABDv", "^UABDLv")>; -// -- arithmetic -def : InstRW<[Ampere1BWrite_2cyc_1XY], - (instregex "^ABSv", "^(ADD|SUB)v", "^SADDLv", "^SADDW", "SHADD", - "SHSUB", "^SRHADD", "^URHADD", "SSUBL", "SSUBW", - "^UADDLv", "^UADDW", "UHADD", "UHSUB", "USUBL", "USUBW")>; -// -- arithmetic, horizontal, 16B -def : InstRW<[Ampere1BWrite_8cyc_4XY], - (instregex "^ADDVv16i8v", "^SADDLVv16i8v", "^UADDLVv16i8v")>; -def : InstRW<[Ampere1BWrite_8cyc_4XY], - (instregex "^[SU](MIN|MAX)Vv16i8v")>; -// -- arithmetic, horizontal, 4H/4S -def : InstRW<[Ampere1BWrite_4cyc_2XY], - (instregex "^[SU]?ADDL?V(v8i8|v4i16|v2i32)v")>; -def : InstRW<[Ampere1BWrite_4cyc_2XY], - (instregex "^[SU](MIN|MAX)V(v4i16|v4i32)v")>; -// -- arithmetic, horizontal, 8B/8H -def : InstRW<[Ampere1BWrite_6cyc_3XY], - (instregex "^[SU]?ADDL?V(v8i16|v4i32)v")>; -def : InstRW<[Ampere1BWrite_6cyc_3XY], - (instregex "^[SU](MIN|MAX)V(v8i8|v8i16)v")>; -// -- arithmetic, narrowing -def : InstRW<[Ampere1BWrite_6cyc_2XY], (instregex "(ADD|SUB)HNv.*")>; -def : InstRW<[Ampere1BWrite_6cyc_2XY], (instregex "(RADD|RSUB)HNv.*")>; -// -- arithmetic, pairwise -def : InstRW<[Ampere1BWrite_2cyc_1XY], - (instregex "^ADDPv", "^SADALP", "^UADALP", "^SADDLPv", "^UADDLPv")>; -// -- arithmetic, saturating -def : InstRW<[Ampere1BWrite_2cyc_1XY], - (instregex "^SQADD", "^SQSUB", "^SUQADD", "^UQADD", "^UQSUB", "^USQADD")>; -// -- bit count -def : InstRW<[Ampere1BWrite_2cyc_1XY], - (instregex "^(CLS|CLZ|CNT)v")>; -// -- compare -def : InstRW<[Ampere1BWrite_2cyc_1XY], - (instregex "^CMEQv", "^CMGEv", "^CMGTv", "^CMLEv", "^CMLTv", - "^CMHIv", "^CMHSv")>; -// -- compare non-zero -def : InstRW<[Ampere1BWrite_2cyc_1XY], (instregex "^CMTSTv")>; -// -- dot product -def : InstRW<[Ampere1BWrite_3cyc_1XY], (instregex "^(S|SU|U|US)DOTv")>; -// -- fp reciprocal estimate -def : InstRW<[Ampere1BWrite_6cyc_1X], (instregex "^FRECPEv", "^FRSQRTEv")>; -// -- integer reciprocal estimate -def : InstRW<[Ampere1BWrite_2cyc_1XY], (instregex "^URECPEv", "^URSQRTEv")>; -// -- logical -def : InstRW<[Ampere1BWrite_2cyc_1XY], - (instregex "^ANDv", "^BICv", "^EORv", "^ORRv", "^ORNv", "^NOTv")>; -// -- logical, narrowing -def : InstRW<[Ampere1BWrite_6cyc_2XY], - (instregex "RSHRNv", - "SHRNv", "SQSHRNv", "SQSHRUNv", - "UQXTNv")>; -// -- matrix multiply -def : InstRW<[Ampere1BWrite_3cyc_1XY], - (instrs SMMLA, UMMLA, USMMLA)>; -// -- max/min -def : InstRW<[Ampere1Write_2cyc_1XY], - (instregex "^SMAXv", "^SMINv", "^UMAXv", "^UMINv")>; -def : InstRW<[Ampere1Write_2cyc_1XY], - (instregex "^SMAXPv", "^SMINPv", "^UMAXPv", "^UMINPv")>; -// -- move immediate -def : InstRW<[Ampere1Write_2cyc_1XY], (instregex "^MOVIv", "^MVNIv")>; -// -- multiply -def : InstRW<[Ampere1Write_3cyc_1XY], - (instregex "MULv", "SMULLv", "UMULLv", "SQDMUL(H|L)v", "SQRDMULHv")>; -// -- multiply accumulate -def : InstRW<[Ampere1Write_3cyc_1XY], - (instregex "MLAv", "MLSv", "(S|U|SQD)(MLAL|MLSL)v", "SQRDML(A|S)Hv")>; -// -- negation, saturating -def : InstRW<[Ampere1Write_2cyc_1XY], (instregex "^SQABS", "^SQNEG")>; -// -- reverse bits/bytes -def : InstRW<[Ampere1Write_2cyc_1XY], - (instregex "^RBITv", "^REV16v", "^REV32v", "^REV64v")>; -// -- shift -def : InstRW<[Ampere1Write_2cyc_1XY], (instregex "^[SU]SHL(v16i8|v8i16|v4i32|v2i64)")>; -// -- shift and accumulate -def : InstRW<[Ampere1Write_2cyc_1XY], - (instregex "SRSRAv", "SSRAv", "URSRAv", "USRAv")>; -// -- shift, saturating -def : InstRW<[Ampere1Write_2cyc_1XY], - (instregex "^SQRSHLv", "^SQRSHRNv", "^SQRSHRUNv", "^SQSHL", "^SQSHLU", - "^SQXTNv", "^SQXTUNv", "^UQSHRNv", "UQRSHRNv", "^UQRSHL", - "^UQSHL")>; - -// Vector miscellaneous instructions -// -- duplicate element -def : InstRW<[Ampere1BWrite_2cyc_1XY], (instregex "^DUPv.+lane")>; -// -- duplicate from GPR -def : InstRW<[Ampere1BWrite_5cyc_1BS], (instregex "^DUPv.+gpr")>; -// -- extract narrow -def : InstRW<[Ampere1BWrite_2cyc_1XY], (instregex "^XTNv")>; -// -- insert/extract element -def : InstRW<[Ampere1BWrite_2cyc_1XY], (instregex "^EXTv", "^INSv.+lane")>; -// -- move FP immediate -def : InstRW<[Ampere1BWrite_2cyc_1XY], (instregex "^FMOVv")>; -// -- move element to GPR -def : InstRW<[Ampere1BWrite_5cyc_1X], (instregex "(S|U)MOVv")>; -// -- move from GPR to any element -def : InstRW<[Ampere1BWrite_7cyc_1BS_1XY], (instregex "^INSv.+gpr")>; -// -- table lookup -def : InstRW<[Ampere1BWrite_2cyc_1XY], - (instrs TBLv8i8One, TBLv16i8One, TBXv8i8One, TBXv16i8One)>; -def : InstRW<[Ampere1BWrite_4cyc_2XY], - (instrs TBLv8i8Two, TBLv16i8Two, TBXv8i8Two, TBXv16i8Two)>; -def : InstRW<[Ampere1BWrite_6cyc_3XY], - (instrs TBLv8i8Three, TBLv16i8Three, TBXv8i8Three, TBXv16i8Three)>; -def : InstRW<[Ampere1BWrite_8cyc_4XY], - (instrs TBLv8i8Four, TBLv16i8Four, TBXv8i8Four, TBXv16i8Four)>; -// -- transpose -def : InstRW<[Ampere1Write_2cyc_1XY], - (instregex "^TRN1v", "^TRN2v", "^UZP1v", "^UZP2v")>; -// -- zip/unzip -def : InstRW<[Ampere1Write_2cyc_1XY], (instregex "^ZIP1v", "^ZIP2v")>; - -} // SchedModel = Ampere1BModel