diff --git a/llvm/lib/Target/AArch64/AArch64SchedNeoverseV1.td b/llvm/lib/Target/AArch64/AArch64SchedNeoverseV1.td index 8792aa618ae05..5c155c936da9f 100644 --- a/llvm/lib/Target/AArch64/AArch64SchedNeoverseV1.td +++ b/llvm/lib/Target/AArch64/AArch64SchedNeoverseV1.td @@ -624,14 +624,14 @@ def : SchedAlias; // Load pair, immed offset def : SchedAlias; def : InstRW<[V1Write_4c_1L, V1Write_0c_0Z], (instrs LDPWi, LDNPWi)>; -def : InstRW<[V1Write_4c_1L, V1Write_0c_0Z, WriteAdr], +def : InstRW<[WriteAdr, V1Write_4c_1L, V1Write_0c_0Z], (instrs LDPWpost, LDPWpre)>; // Load pair, signed immed offset, signed words def : InstRW<[V1Write_5c_1I_1L, V1Write_0c_0Z], (instrs LDPSWi)>; // Load pair, immed post or pre-index, signed words -def : InstRW<[V1Write_5c_1I_1L, V1Write_0c_0Z, WriteAdr], +def : InstRW<[WriteAdr, V1Write_5c_1I_1L, V1Write_0c_0Z], (instrs LDPSWpost, LDPSWpre)>; @@ -735,7 +735,7 @@ def : InstRW<[V1Write_6c_1L, ReadAdrBase], (instregex "^LDR[SDQ]l$", // Load vector reg, immed post-index // Load vector reg, immed pre-index -def : InstRW<[V1Write_6c_1L, WriteAdr], +def : InstRW<[WriteAdr, V1Write_6c_1L], (instregex "^LDR[BHSDQ](post|pre)$")>; // Load vector reg, register offset, basic @@ -756,12 +756,12 @@ def : InstRW<[V1Write_6c_1L, WriteLDHi], (instrs LDPQi, LDNPQi)>; // Load vector pair, immed post-index, S/D-form // Load vector pair, immed pre-index, S/D-form -def : InstRW<[V1Write_6c_1L, V1Write_0c_0Z, WriteAdr], +def : InstRW<[WriteAdr, V1Write_6c_1L, V1Write_0c_0Z], (instregex "^LDP[SD](pre|post)$")>; // Load vector pair, immed post-index, Q-form // Load vector pair, immed pre-index, Q-form -def : InstRW<[V1Write_6c_1L, WriteLDHi, WriteAdr], +def : InstRW<[WriteAdr, V1Write_6c_1L, WriteLDHi], (instrs LDPQpost, LDPQpre)>; @@ -773,7 +773,7 @@ def : InstRW<[V1Write_2c_1L01_1V01], (instregex "^STUR[BHSDQ]i$")>; // Store vector reg, immed post-index, B/H/S/D/Q-form // Store vector reg, immed pre-index, B/H/S/D/Q-form -def : InstRW<[V1Write_2c_1L01_1V01, WriteAdr], +def : InstRW<[WriteAdr, V1Write_2c_1L01_1V01], (instregex "^STR[BHSDQ](pre|post)$")>; // Store vector reg, unsigned immed, B/H/S/D/Q-form @@ -798,12 +798,12 @@ def : InstRW<[V1Write_2c_1L01_1V01], (instregex "^STN?P[SDQ]i$")>; // Store vector pair, immed post-index, S/D-form // Store vector pair, immed pre-index, S/D-form -def : InstRW<[V1Write_2c_1L01_1V01, WriteAdr], +def : InstRW<[WriteAdr, V1Write_2c_1L01_1V01], (instregex "^STP[SD](pre|post)$")>; // Store vector pair, immed post-index, Q-form // Store vector pair, immed pre-index, Q-form -def : InstRW<[V1Write_2c_2L01_1V01, WriteAdr], (instrs STPQpre, STPQpost)>; +def : InstRW<[WriteAdr, V1Write_2c_2L01_1V01], (instrs STPQpre, STPQpost)>; // ASIMD integer instructions @@ -1074,31 +1074,31 @@ def : InstRW<[V1Write_5c_1M0_1V], (instregex "^INSvi(8|16|32|64)gpr$")>; // ASIMD load, 1 element, multiple, 1 reg def : InstRW<[V1Write_6c_1L], (instregex "^LD1Onev(8b|16b|4h|8h|2s|4s|1d|2d)$")>; -def : InstRW<[V1Write_6c_1L, WriteAdr], +def : InstRW<[WriteAdr, V1Write_6c_1L], (instregex "^LD1Onev(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>; // ASIMD load, 1 element, multiple, 2 reg def : InstRW<[V1Write_6c_2L], (instregex "^LD1Twov(8b|16b|4h|8h|2s|4s|1d|2d)$")>; -def : InstRW<[V1Write_6c_2L, WriteAdr], +def : InstRW<[WriteAdr, V1Write_6c_2L], (instregex "^LD1Twov(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>; // ASIMD load, 1 element, multiple, 3 reg def : InstRW<[V1Write_6c_3L], (instregex "^LD1Threev(8b|16b|4h|8h|2s|4s|1d|2d)$")>; -def : InstRW<[V1Write_6c_3L, WriteAdr], +def : InstRW<[WriteAdr, V1Write_6c_3L], (instregex "^LD1Threev(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>; // ASIMD load, 1 element, multiple, 4 reg, D-form def : InstRW<[V1Write_6c_2L], (instregex "^LD1Fourv(8b|4h|2s|1d)$")>; -def : InstRW<[V1Write_6c_2L, WriteAdr], +def : InstRW<[WriteAdr, V1Write_6c_2L], (instregex "^LD1Fourv(8b|4h|2s|1d)_POST$")>; // ASIMD load, 1 element, multiple, 4 reg, Q-form def : InstRW<[V1Write_7c_4L], (instregex "^LD1Fourv(16b|8h|4s|2d)$")>; -def : InstRW<[V1Write_7c_4L, WriteAdr], +def : InstRW<[WriteAdr, V1Write_7c_4L], (instregex "^LD1Fourv(16b|8h|4s|2d)_POST$")>; // ASIMD load, 1 element, one lane @@ -1106,20 +1106,20 @@ def : InstRW<[V1Write_7c_4L, WriteAdr], def : InstRW<[V1Write_8c_1L_1V], (instregex "^LD1(i|Rv)(8|16|32|64)$", "^LD1Rv(8b|16b|4h|8h|2s|4s|1d|2d)$")>; -def : InstRW<[V1Write_8c_1L_1V, WriteAdr], +def : InstRW<[WriteAdr, V1Write_8c_1L_1V], (instregex "^LD1i(8|16|32|64)_POST$", "^LD1Rv(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>; // ASIMD load, 2 element, multiple, D-form def : InstRW<[V1Write_8c_1L_2V], (instregex "^LD2Twov(8b|4h|2s)$")>; -def : InstRW<[V1Write_8c_1L_2V, WriteAdr], +def : InstRW<[WriteAdr, V1Write_8c_1L_2V], (instregex "^LD2Twov(8b|4h|2s)_POST$")>; // ASIMD load, 2 element, multiple, Q-form def : InstRW<[V1Write_8c_2L_2V], (instregex "^LD2Twov(16b|8h|4s|2d)$")>; -def : InstRW<[V1Write_8c_2L_2V, WriteAdr], +def : InstRW<[WriteAdr, V1Write_8c_2L_2V], (instregex "^LD2Twov(16b|8h|4s|2d)_POST$")>; // ASIMD load, 2 element, one lane @@ -1127,7 +1127,7 @@ def : InstRW<[V1Write_8c_2L_2V, WriteAdr], def : InstRW<[V1Write_8c_1L_2V], (instregex "^LD2i(8|16|32|64)$", "^LD2Rv(8b|16b|4h|8h|2s|4s|1d|2d)$")>; -def : InstRW<[V1Write_8c_1L_2V, WriteAdr], +def : InstRW<[WriteAdr, V1Write_8c_1L_2V], (instregex "^LD2i(8|16|32|64)_POST$", "^LD2Rv(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>; @@ -1138,7 +1138,7 @@ def : InstRW<[V1Write_8c_2L_3V], (instregex "^LD3Threev(8b|4h|2s)$", "^LD3i(8|16|32|64)$", "^LD3Rv(8b|16b|4h|8h|2s|4s|1d|2d)$")>; -def : InstRW<[V1Write_8c_2L_3V, WriteAdr], +def : InstRW<[WriteAdr, V1Write_8c_2L_3V], (instregex "^LD3Threev(8b|4h|2s)_POST$", "^LD3i(8|16|32|64)_POST$", "^LD3Rv(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>; @@ -1146,7 +1146,7 @@ def : InstRW<[V1Write_8c_2L_3V, WriteAdr], // ASIMD load, 3 element, multiple, Q-form def : InstRW<[V1Write_8c_3L_3V], (instregex "^LD3Threev(16b|8h|4s|2d)$")>; -def : InstRW<[V1Write_8c_3L_3V, WriteAdr], +def : InstRW<[WriteAdr, V1Write_8c_3L_3V], (instregex "^LD3Threev(16b|8h|4s|2d)_POST$")>; // ASIMD load, 4 element, multiple, D-form @@ -1156,7 +1156,7 @@ def : InstRW<[V1Write_8c_3L_4V], (instregex "^LD4Fourv(8b|4h|2s)$", "^LD4i(8|16|32|64)$", "^LD4Rv(8b|16b|4h|8h|2s|4s|1d|2d)$")>; -def : InstRW<[V1Write_8c_3L_4V, WriteAdr], +def : InstRW<[WriteAdr, V1Write_8c_3L_4V], (instregex "^LD4Fourv(8b|4h|2s)_POST$", "^LD4i(8|16|32|64)_POST$", "^LD4Rv(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>; @@ -1164,7 +1164,7 @@ def : InstRW<[V1Write_8c_3L_4V, WriteAdr], // ASIMD load, 4 element, multiple, Q-form def : InstRW<[V1Write_9c_4L_4V], (instregex "^LD4Fourv(16b|8h|4s|2d)$")>; -def : InstRW<[V1Write_9c_4L_4V, WriteAdr], +def : InstRW<[WriteAdr, V1Write_9c_4L_4V], (instregex "^LD4Fourv(16b|8h|4s|2d)_POST$")>; @@ -1176,7 +1176,7 @@ def : InstRW<[V1Write_9c_4L_4V, WriteAdr], def : InstRW<[V1Write_2c_1L01_1V01], (instregex "^ST1Onev(8b|16b|4h|8h|2s|4s|1d|2d)$", "^ST1Twov(8b|4h|2s|1d)$")>; -def : InstRW<[V1Write_2c_1L01_1V01, WriteAdr], +def : InstRW<[WriteAdr, V1Write_2c_1L01_1V01], (instregex "^ST1Onev(8b|16b|4h|8h|2s|4s|1d|2d)_POST$", "^ST1Twov(8b|4h|2s|1d)_POST$")>; @@ -1187,7 +1187,7 @@ def : InstRW<[V1Write_2c_2L01_2V01], (instregex "^ST1Twov(16b|8h|4s|2d)$", "^ST1Threev(8b|4h|2s|1d)$", "^ST1Fourv(8b|4h|2s|1d)$")>; -def : InstRW<[V1Write_2c_2L01_2V01, WriteAdr], +def : InstRW<[WriteAdr, V1Write_2c_2L01_2V01], (instregex "^ST1Twov(16b|8h|4s|2d)_POST$", "^ST1Threev(8b|4h|2s|1d)_POST$", "^ST1Fourv(8b|4h|2s|1d)_POST$")>; @@ -1195,13 +1195,13 @@ def : InstRW<[V1Write_2c_2L01_2V01, WriteAdr], // ASIMD store, 1 element, multiple, 3 reg, Q-form def : InstRW<[V1Write_2c_3L01_3V01], (instregex "^ST1Threev(16b|8h|4s|2d)$")>; -def : InstRW<[V1Write_2c_3L01_3V01, WriteAdr], +def : InstRW<[WriteAdr, V1Write_2c_3L01_3V01], (instregex "^ST1Threev(16b|8h|4s|2d)_POST$")>; // ASIMD store, 1 element, multiple, 4 reg, Q-form def : InstRW<[V1Write_2c_4L01_4V01], (instregex "^ST1Fourv(16b|8h|4s|2d)$")>; -def : InstRW<[V1Write_2c_4L01_4V01, WriteAdr], +def : InstRW<[WriteAdr, V1Write_2c_4L01_4V01], (instregex "^ST1Fourv(16b|8h|4s|2d)_POST$")>; // ASIMD store, 1 element, one lane @@ -1211,7 +1211,7 @@ def : InstRW<[V1Write_4c_1L01_1V01], (instregex "^ST1i(8|16|32|64)$", "^ST2Twov(8b|4h|2s)$", "^ST2i(8|16|32|64)$")>; -def : InstRW<[V1Write_4c_1L01_1V01, WriteAdr], +def : InstRW<[WriteAdr, V1Write_4c_1L01_1V01], (instregex "^ST1i(8|16|32|64)_POST$", "^ST2Twov(8b|4h|2s)_POST$", "^ST2i(8|16|32|64)_POST$")>; @@ -1225,7 +1225,7 @@ def : InstRW<[V1Write_4c_2L01_2V01], "^ST3Threev(8b|4h|2s)$", "^ST3i(8|16|32|64)$", "^ST4i64$")>; -def : InstRW<[V1Write_4c_2L01_2V01, WriteAdr], +def : InstRW<[WriteAdr, V1Write_4c_2L01_2V01], (instregex "^ST2Twov(16b|8h|4s|2d)_POST$", "^ST3Threev(8b|4h|2s)_POST$", "^ST3i(8|16|32|64)_POST$", @@ -1234,31 +1234,31 @@ def : InstRW<[V1Write_4c_2L01_2V01, WriteAdr], // ASIMD store, 3 element, multiple, Q-form def : InstRW<[V1Write_5c_3L01_3V01], (instregex "^ST3Threev(16b|8h|4s|2d)$")>; -def : InstRW<[V1Write_5c_3L01_3V01, WriteAdr], +def : InstRW<[WriteAdr, V1Write_5c_3L01_3V01], (instregex "^ST3Threev(16b|8h|4s|2d)_POST$")>; // ASIMD store, 4 element, multiple, D-form def : InstRW<[V1Write_6c_3L01_3V01], (instregex "^ST4Fourv(8b|4h|2s)$")>; -def : InstRW<[V1Write_6c_3L01_3V01, WriteAdr], +def : InstRW<[WriteAdr, V1Write_6c_3L01_3V01], (instregex "^ST4Fourv(8b|4h|2s)_POST$")>; // ASIMD store, 4 element, multiple, Q-form, B/H/S def : InstRW<[V1Write_7c_6L01_6V01], (instregex "^ST4Fourv(16b|8h|4s)$")>; -def : InstRW<[V1Write_7c_6L01_6V01, WriteAdr], +def : InstRW<[WriteAdr, V1Write_7c_6L01_6V01], (instregex "^ST4Fourv(16b|8h|4s)_POST$")>; // ASIMD store, 4 element, multiple, Q-form, D def : InstRW<[V1Write_4c_4L01_4V01], (instrs ST4Fourv2d)>; -def : InstRW<[V1Write_4c_4L01_4V01, WriteAdr], +def : InstRW<[WriteAdr, V1Write_4c_4L01_4V01], (instrs ST4Fourv2d_POST)>; // ASIMD store, 4 element, one lane, B/H/S def : InstRW<[V1Write_6c_3L_3V], (instregex "^ST4i(8|16|32)$")>; -def : InstRW<[V1Write_6c_3L_3V, WriteAdr], +def : InstRW<[WriteAdr, V1Write_6c_3L_3V], (instregex "^ST4i(8|16|32)_POST$")>; diff --git a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-writeback.s b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-writeback.s index 91adecac8d67e..264ad8bccc58e 100644 --- a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-writeback.s +++ b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-writeback.s @@ -1162,28 +1162,28 @@ add x0, x27, 1 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 3004 +# CHECK-NEXT: Total Cycles: 508 # CHECK-NEXT: Total uOps: 1500 # CHECK: Dispatch Width: 15 -# CHECK-NEXT: uOps Per Cycle: 0.50 -# CHECK-NEXT: IPC: 0.33 +# CHECK-NEXT: uOps Per Cycle: 2.95 +# CHECK-NEXT: IPC: 1.97 # CHECK-NEXT: Block RThroughput: 2.5 # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 0123 -# CHECK-NEXT: Index 0123456789 0123456789 - -# CHECK: [0,0] DeeeeeeER . . . . . . ld1 { v1.1d }, [x27], #8 -# CHECK-NEXT: [0,1] D======eER. . . . . . add x0, x27, #1 -# CHECK-NEXT: [0,2] D======eeeeeeER. . . . . ld1 { v1.2d }, [x27], #16 -# CHECK-NEXT: [0,3] D============eER . . . . add x0, x27, #1 -# CHECK-NEXT: [0,4] D============eeeeeeER . . . ld1 { v1.2s }, [x27], #8 -# CHECK-NEXT: [0,5] D==================eER . . . add x0, x27, #1 -# CHECK-NEXT: [0,6] D==================eeeeeeER . . ld1 { v1.4h }, [x27], #8 -# CHECK-NEXT: [0,7] D========================eER . . add x0, x27, #1 -# CHECK-NEXT: [0,8] D========================eeeeeeER. ld1 { v1.4s }, [x27], #16 -# CHECK-NEXT: [0,9] D==============================eER add x0, x27, #1 +# CHECK-NEXT: 012 +# CHECK-NEXT: Index 0123456789 + +# CHECK: [0,0] DeeeeeeER . . ld1 { v1.1d }, [x27], #8 +# CHECK-NEXT: [0,1] D=eE----R . . add x0, x27, #1 +# CHECK-NEXT: [0,2] D=eeeeeeER. . ld1 { v1.2d }, [x27], #16 +# CHECK-NEXT: [0,3] D==eE----R. . add x0, x27, #1 +# CHECK-NEXT: [0,4] D==eeeeeeER . ld1 { v1.2s }, [x27], #8 +# CHECK-NEXT: [0,5] D===eE----R . add x0, x27, #1 +# CHECK-NEXT: [0,6] D===eeeeeeER. ld1 { v1.4h }, [x27], #8 +# CHECK-NEXT: [0,7] D====eE----R. add x0, x27, #1 +# CHECK-NEXT: [0,8] D====eeeeeeER ld1 { v1.4s }, [x27], #16 +# CHECK-NEXT: [0,9] D=====eE----R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -1193,43 +1193,43 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.1d }, [x27], #8 -# CHECK-NEXT: 1. 1 7.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 7.0 0.0 0.0 ld1 { v1.2d }, [x27], #16 -# CHECK-NEXT: 3. 1 13.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 13.0 0.0 0.0 ld1 { v1.2s }, [x27], #8 -# CHECK-NEXT: 5. 1 19.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 19.0 0.0 0.0 ld1 { v1.4h }, [x27], #8 -# CHECK-NEXT: 7. 1 25.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 25.0 0.0 0.0 ld1 { v1.4s }, [x27], #16 -# CHECK-NEXT: 9. 1 31.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 16.0 0.1 0.0 +# CHECK-NEXT: 1. 1 2.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld1 { v1.2d }, [x27], #16 +# CHECK-NEXT: 3. 1 3.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ld1 { v1.2s }, [x27], #8 +# CHECK-NEXT: 5. 1 4.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 4.0 0.0 0.0 ld1 { v1.4h }, [x27], #8 +# CHECK-NEXT: 7. 1 5.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 5.0 0.0 0.0 ld1 { v1.4s }, [x27], #16 +# CHECK-NEXT: 9. 1 6.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 1 3.5 0.1 2.0 # CHECK: [1] Code Region - G02 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 3004 +# CHECK-NEXT: Total Cycles: 508 # CHECK-NEXT: Total uOps: 1500 # CHECK: Dispatch Width: 15 -# CHECK-NEXT: uOps Per Cycle: 0.50 -# CHECK-NEXT: IPC: 0.33 +# CHECK-NEXT: uOps Per Cycle: 2.95 +# CHECK-NEXT: IPC: 1.97 # CHECK-NEXT: Block RThroughput: 2.5 # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 0123 -# CHECK-NEXT: Index 0123456789 0123456789 - -# CHECK: [0,0] DeeeeeeER . . . . . . ld1 { v1.8b }, [x27], #8 -# CHECK-NEXT: [0,1] D======eER. . . . . . add x0, x27, #1 -# CHECK-NEXT: [0,2] D======eeeeeeER. . . . . ld1 { v1.8h }, [x27], #16 -# CHECK-NEXT: [0,3] D============eER . . . . add x0, x27, #1 -# CHECK-NEXT: [0,4] D============eeeeeeER . . . ld1 { v1.16b }, [x27], #16 -# CHECK-NEXT: [0,5] D==================eER . . . add x0, x27, #1 -# CHECK-NEXT: [0,6] D==================eeeeeeER . . ld1 { v1.1d }, [x27], x28 -# CHECK-NEXT: [0,7] D========================eER . . add x0, x27, #1 -# CHECK-NEXT: [0,8] D========================eeeeeeER. ld1 { v1.2d }, [x27], x28 -# CHECK-NEXT: [0,9] D==============================eER add x0, x27, #1 +# CHECK-NEXT: 012 +# CHECK-NEXT: Index 0123456789 + +# CHECK: [0,0] DeeeeeeER . . ld1 { v1.8b }, [x27], #8 +# CHECK-NEXT: [0,1] D=eE----R . . add x0, x27, #1 +# CHECK-NEXT: [0,2] D=eeeeeeER. . ld1 { v1.8h }, [x27], #16 +# CHECK-NEXT: [0,3] D==eE----R. . add x0, x27, #1 +# CHECK-NEXT: [0,4] D==eeeeeeER . ld1 { v1.16b }, [x27], #16 +# CHECK-NEXT: [0,5] D===eE----R . add x0, x27, #1 +# CHECK-NEXT: [0,6] D===eeeeeeER. ld1 { v1.1d }, [x27], x28 +# CHECK-NEXT: [0,7] D====eE----R. add x0, x27, #1 +# CHECK-NEXT: [0,8] D====eeeeeeER ld1 { v1.2d }, [x27], x28 +# CHECK-NEXT: [0,9] D=====eE----R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -1239,43 +1239,43 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.8b }, [x27], #8 -# CHECK-NEXT: 1. 1 7.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 7.0 0.0 0.0 ld1 { v1.8h }, [x27], #16 -# CHECK-NEXT: 3. 1 13.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 13.0 0.0 0.0 ld1 { v1.16b }, [x27], #16 -# CHECK-NEXT: 5. 1 19.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 19.0 0.0 0.0 ld1 { v1.1d }, [x27], x28 -# CHECK-NEXT: 7. 1 25.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 25.0 0.0 0.0 ld1 { v1.2d }, [x27], x28 -# CHECK-NEXT: 9. 1 31.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 16.0 0.1 0.0 +# CHECK-NEXT: 1. 1 2.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld1 { v1.8h }, [x27], #16 +# CHECK-NEXT: 3. 1 3.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ld1 { v1.16b }, [x27], #16 +# CHECK-NEXT: 5. 1 4.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 4.0 0.0 0.0 ld1 { v1.1d }, [x27], x28 +# CHECK-NEXT: 7. 1 5.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 5.0 0.0 0.0 ld1 { v1.2d }, [x27], x28 +# CHECK-NEXT: 9. 1 6.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 1 3.5 0.1 2.0 # CHECK: [2] Code Region - G03 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 3004 +# CHECK-NEXT: Total Cycles: 508 # CHECK-NEXT: Total uOps: 1500 # CHECK: Dispatch Width: 15 -# CHECK-NEXT: uOps Per Cycle: 0.50 -# CHECK-NEXT: IPC: 0.33 +# CHECK-NEXT: uOps Per Cycle: 2.95 +# CHECK-NEXT: IPC: 1.97 # CHECK-NEXT: Block RThroughput: 2.5 # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 0123 -# CHECK-NEXT: Index 0123456789 0123456789 - -# CHECK: [0,0] DeeeeeeER . . . . . . ld1 { v1.2s }, [x27], x28 -# CHECK-NEXT: [0,1] D======eER. . . . . . add x0, x27, #1 -# CHECK-NEXT: [0,2] D======eeeeeeER. . . . . ld1 { v1.4h }, [x27], x28 -# CHECK-NEXT: [0,3] D============eER . . . . add x0, x27, #1 -# CHECK-NEXT: [0,4] D============eeeeeeER . . . ld1 { v1.4s }, [x27], x28 -# CHECK-NEXT: [0,5] D==================eER . . . add x0, x27, #1 -# CHECK-NEXT: [0,6] D==================eeeeeeER . . ld1 { v1.8b }, [x27], x28 -# CHECK-NEXT: [0,7] D========================eER . . add x0, x27, #1 -# CHECK-NEXT: [0,8] D========================eeeeeeER. ld1 { v1.8h }, [x27], x28 -# CHECK-NEXT: [0,9] D==============================eER add x0, x27, #1 +# CHECK-NEXT: 012 +# CHECK-NEXT: Index 0123456789 + +# CHECK: [0,0] DeeeeeeER . . ld1 { v1.2s }, [x27], x28 +# CHECK-NEXT: [0,1] D=eE----R . . add x0, x27, #1 +# CHECK-NEXT: [0,2] D=eeeeeeER. . ld1 { v1.4h }, [x27], x28 +# CHECK-NEXT: [0,3] D==eE----R. . add x0, x27, #1 +# CHECK-NEXT: [0,4] D==eeeeeeER . ld1 { v1.4s }, [x27], x28 +# CHECK-NEXT: [0,5] D===eE----R . add x0, x27, #1 +# CHECK-NEXT: [0,6] D===eeeeeeER. ld1 { v1.8b }, [x27], x28 +# CHECK-NEXT: [0,7] D====eE----R. add x0, x27, #1 +# CHECK-NEXT: [0,8] D====eeeeeeER ld1 { v1.8h }, [x27], x28 +# CHECK-NEXT: [0,9] D=====eE----R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -1285,43 +1285,43 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.2s }, [x27], x28 -# CHECK-NEXT: 1. 1 7.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 7.0 0.0 0.0 ld1 { v1.4h }, [x27], x28 -# CHECK-NEXT: 3. 1 13.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 13.0 0.0 0.0 ld1 { v1.4s }, [x27], x28 -# CHECK-NEXT: 5. 1 19.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 19.0 0.0 0.0 ld1 { v1.8b }, [x27], x28 -# CHECK-NEXT: 7. 1 25.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 25.0 0.0 0.0 ld1 { v1.8h }, [x27], x28 -# CHECK-NEXT: 9. 1 31.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 16.0 0.1 0.0 +# CHECK-NEXT: 1. 1 2.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld1 { v1.4h }, [x27], x28 +# CHECK-NEXT: 3. 1 3.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ld1 { v1.4s }, [x27], x28 +# CHECK-NEXT: 5. 1 4.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 4.0 0.0 0.0 ld1 { v1.8b }, [x27], x28 +# CHECK-NEXT: 7. 1 5.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 5.0 0.0 0.0 ld1 { v1.8h }, [x27], x28 +# CHECK-NEXT: 9. 1 6.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 1 3.5 0.1 2.0 # CHECK: [3] Code Region - G04 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 3004 +# CHECK-NEXT: Total Cycles: 508 # CHECK-NEXT: Total uOps: 1900 # CHECK: Dispatch Width: 15 -# CHECK-NEXT: uOps Per Cycle: 0.63 -# CHECK-NEXT: IPC: 0.33 +# CHECK-NEXT: uOps Per Cycle: 3.74 +# CHECK-NEXT: IPC: 1.97 # CHECK-NEXT: Block RThroughput: 3.0 # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 0123 -# CHECK-NEXT: Index 0123456789 0123456789 - -# CHECK: [0,0] DeeeeeeER . . . . . . ld1 { v1.16b }, [x27], x28 -# CHECK-NEXT: [0,1] D======eER. . . . . . add x0, x27, #1 -# CHECK-NEXT: [0,2] D======eeeeeeER. . . . . ld1 { v1.1d, v2.1d }, [x27], #16 -# CHECK-NEXT: [0,3] D============eER . . . . add x0, x27, #1 -# CHECK-NEXT: [0,4] D============eeeeeeER . . . ld1 { v1.2d, v2.2d }, [x27], #32 -# CHECK-NEXT: [0,5] D==================eER . . . add x0, x27, #1 -# CHECK-NEXT: [0,6] D==================eeeeeeER . . ld1 { v1.2s, v2.2s }, [x27], #16 -# CHECK-NEXT: [0,7] D========================eER . . add x0, x27, #1 -# CHECK-NEXT: [0,8] .D=======================eeeeeeER. ld1 { v1.4h, v2.4h }, [x27], #16 -# CHECK-NEXT: [0,9] .D=============================eER add x0, x27, #1 +# CHECK-NEXT: 012 +# CHECK-NEXT: Index 0123456789 + +# CHECK: [0,0] DeeeeeeER . . ld1 { v1.16b }, [x27], x28 +# CHECK-NEXT: [0,1] D=eE----R . . add x0, x27, #1 +# CHECK-NEXT: [0,2] D=eeeeeeER. . ld1 { v1.1d, v2.1d }, [x27], #16 +# CHECK-NEXT: [0,3] D==eE----R. . add x0, x27, #1 +# CHECK-NEXT: [0,4] D==eeeeeeER . ld1 { v1.2d, v2.2d }, [x27], #32 +# CHECK-NEXT: [0,5] D===eE----R . add x0, x27, #1 +# CHECK-NEXT: [0,6] D===eeeeeeER. ld1 { v1.2s, v2.2s }, [x27], #16 +# CHECK-NEXT: [0,7] D====eE----R. add x0, x27, #1 +# CHECK-NEXT: [0,8] .D===eeeeeeER ld1 { v1.4h, v2.4h }, [x27], #16 +# CHECK-NEXT: [0,9] .D====eE----R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -1331,43 +1331,43 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.16b }, [x27], x28 -# CHECK-NEXT: 1. 1 7.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 7.0 0.0 0.0 ld1 { v1.1d, v2.1d }, [x27], #16 -# CHECK-NEXT: 3. 1 13.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 13.0 0.0 0.0 ld1 { v1.2d, v2.2d }, [x27], #32 -# CHECK-NEXT: 5. 1 19.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 19.0 0.0 0.0 ld1 { v1.2s, v2.2s }, [x27], #16 -# CHECK-NEXT: 7. 1 25.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 24.0 0.0 0.0 ld1 { v1.4h, v2.4h }, [x27], #16 -# CHECK-NEXT: 9. 1 30.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 15.8 0.1 0.0 +# CHECK-NEXT: 1. 1 2.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld1 { v1.1d, v2.1d }, [x27], #16 +# CHECK-NEXT: 3. 1 3.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ld1 { v1.2d, v2.2d }, [x27], #32 +# CHECK-NEXT: 5. 1 4.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 4.0 0.0 0.0 ld1 { v1.2s, v2.2s }, [x27], #16 +# CHECK-NEXT: 7. 1 5.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 4.0 0.0 0.0 ld1 { v1.4h, v2.4h }, [x27], #16 +# CHECK-NEXT: 9. 1 5.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 1 3.3 0.1 2.0 # CHECK: [4] Code Region - G05 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 3004 +# CHECK-NEXT: Total Cycles: 508 # CHECK-NEXT: Total uOps: 2000 # CHECK: Dispatch Width: 15 -# CHECK-NEXT: uOps Per Cycle: 0.67 -# CHECK-NEXT: IPC: 0.33 +# CHECK-NEXT: uOps Per Cycle: 3.94 +# CHECK-NEXT: IPC: 1.97 # CHECK-NEXT: Block RThroughput: 3.3 # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 0123 -# CHECK-NEXT: Index 0123456789 0123456789 - -# CHECK: [0,0] DeeeeeeER . . . . . . ld1 { v1.4s, v2.4s }, [x27], #32 -# CHECK-NEXT: [0,1] D======eER. . . . . . add x0, x27, #1 -# CHECK-NEXT: [0,2] D======eeeeeeER. . . . . ld1 { v1.8b, v2.8b }, [x27], #16 -# CHECK-NEXT: [0,3] D============eER . . . . add x0, x27, #1 -# CHECK-NEXT: [0,4] D============eeeeeeER . . . ld1 { v1.8h, v2.8h }, [x27], #32 -# CHECK-NEXT: [0,5] D==================eER . . . add x0, x27, #1 -# CHECK-NEXT: [0,6] D==================eeeeeeER . . ld1 { v1.16b, v2.16b }, [x27], #32 -# CHECK-NEXT: [0,7] .D=======================eER . . add x0, x27, #1 -# CHECK-NEXT: [0,8] .D=======================eeeeeeER. ld1 { v1.1d, v2.1d }, [x27], x28 -# CHECK-NEXT: [0,9] .D=============================eER add x0, x27, #1 +# CHECK-NEXT: 012 +# CHECK-NEXT: Index 0123456789 + +# CHECK: [0,0] DeeeeeeER . . ld1 { v1.4s, v2.4s }, [x27], #32 +# CHECK-NEXT: [0,1] D=eE----R . . add x0, x27, #1 +# CHECK-NEXT: [0,2] D=eeeeeeER. . ld1 { v1.8b, v2.8b }, [x27], #16 +# CHECK-NEXT: [0,3] D==eE----R. . add x0, x27, #1 +# CHECK-NEXT: [0,4] D==eeeeeeER . ld1 { v1.8h, v2.8h }, [x27], #32 +# CHECK-NEXT: [0,5] D===eE----R . add x0, x27, #1 +# CHECK-NEXT: [0,6] D===eeeeeeER. ld1 { v1.16b, v2.16b }, [x27], #32 +# CHECK-NEXT: [0,7] .D===eE----R. add x0, x27, #1 +# CHECK-NEXT: [0,8] .D===eeeeeeER ld1 { v1.1d, v2.1d }, [x27], x28 +# CHECK-NEXT: [0,9] .D====eE----R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -1377,43 +1377,43 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.4s, v2.4s }, [x27], #32 -# CHECK-NEXT: 1. 1 7.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 7.0 0.0 0.0 ld1 { v1.8b, v2.8b }, [x27], #16 -# CHECK-NEXT: 3. 1 13.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 13.0 0.0 0.0 ld1 { v1.8h, v2.8h }, [x27], #32 -# CHECK-NEXT: 5. 1 19.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 19.0 0.0 0.0 ld1 { v1.16b, v2.16b }, [x27], #32 -# CHECK-NEXT: 7. 1 24.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 24.0 0.0 0.0 ld1 { v1.1d, v2.1d }, [x27], x28 -# CHECK-NEXT: 9. 1 30.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 15.7 0.1 0.0 +# CHECK-NEXT: 1. 1 2.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld1 { v1.8b, v2.8b }, [x27], #16 +# CHECK-NEXT: 3. 1 3.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ld1 { v1.8h, v2.8h }, [x27], #32 +# CHECK-NEXT: 5. 1 4.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 4.0 0.0 0.0 ld1 { v1.16b, v2.16b }, [x27], #32 +# CHECK-NEXT: 7. 1 4.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 4.0 0.0 0.0 ld1 { v1.1d, v2.1d }, [x27], x28 +# CHECK-NEXT: 9. 1 5.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 1 3.2 0.1 2.0 # CHECK: [5] Code Region - G06 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 3004 +# CHECK-NEXT: Total Cycles: 508 # CHECK-NEXT: Total uOps: 2000 # CHECK: Dispatch Width: 15 -# CHECK-NEXT: uOps Per Cycle: 0.67 -# CHECK-NEXT: IPC: 0.33 +# CHECK-NEXT: uOps Per Cycle: 3.94 +# CHECK-NEXT: IPC: 1.97 # CHECK-NEXT: Block RThroughput: 3.3 # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 0123 -# CHECK-NEXT: Index 0123456789 0123456789 - -# CHECK: [0,0] DeeeeeeER . . . . . . ld1 { v1.2d, v2.2d }, [x27], x28 -# CHECK-NEXT: [0,1] D======eER. . . . . . add x0, x27, #1 -# CHECK-NEXT: [0,2] D======eeeeeeER. . . . . ld1 { v1.2s, v2.2s }, [x27], x28 -# CHECK-NEXT: [0,3] D============eER . . . . add x0, x27, #1 -# CHECK-NEXT: [0,4] D============eeeeeeER . . . ld1 { v1.4h, v2.4h }, [x27], x28 -# CHECK-NEXT: [0,5] D==================eER . . . add x0, x27, #1 -# CHECK-NEXT: [0,6] D==================eeeeeeER . . ld1 { v1.4s, v2.4s }, [x27], x28 -# CHECK-NEXT: [0,7] .D=======================eER . . add x0, x27, #1 -# CHECK-NEXT: [0,8] .D=======================eeeeeeER. ld1 { v1.8b, v2.8b }, [x27], x28 -# CHECK-NEXT: [0,9] .D=============================eER add x0, x27, #1 +# CHECK-NEXT: 012 +# CHECK-NEXT: Index 0123456789 + +# CHECK: [0,0] DeeeeeeER . . ld1 { v1.2d, v2.2d }, [x27], x28 +# CHECK-NEXT: [0,1] D=eE----R . . add x0, x27, #1 +# CHECK-NEXT: [0,2] D=eeeeeeER. . ld1 { v1.2s, v2.2s }, [x27], x28 +# CHECK-NEXT: [0,3] D==eE----R. . add x0, x27, #1 +# CHECK-NEXT: [0,4] D==eeeeeeER . ld1 { v1.4h, v2.4h }, [x27], x28 +# CHECK-NEXT: [0,5] D===eE----R . add x0, x27, #1 +# CHECK-NEXT: [0,6] D===eeeeeeER. ld1 { v1.4s, v2.4s }, [x27], x28 +# CHECK-NEXT: [0,7] .D===eE----R. add x0, x27, #1 +# CHECK-NEXT: [0,8] .D===eeeeeeER ld1 { v1.8b, v2.8b }, [x27], x28 +# CHECK-NEXT: [0,9] .D====eE----R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -1423,43 +1423,43 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.2d, v2.2d }, [x27], x28 -# CHECK-NEXT: 1. 1 7.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 7.0 0.0 0.0 ld1 { v1.2s, v2.2s }, [x27], x28 -# CHECK-NEXT: 3. 1 13.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 13.0 0.0 0.0 ld1 { v1.4h, v2.4h }, [x27], x28 -# CHECK-NEXT: 5. 1 19.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 19.0 0.0 0.0 ld1 { v1.4s, v2.4s }, [x27], x28 -# CHECK-NEXT: 7. 1 24.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 24.0 0.0 0.0 ld1 { v1.8b, v2.8b }, [x27], x28 -# CHECK-NEXT: 9. 1 30.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 15.7 0.1 0.0 +# CHECK-NEXT: 1. 1 2.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld1 { v1.2s, v2.2s }, [x27], x28 +# CHECK-NEXT: 3. 1 3.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ld1 { v1.4h, v2.4h }, [x27], x28 +# CHECK-NEXT: 5. 1 4.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 4.0 0.0 0.0 ld1 { v1.4s, v2.4s }, [x27], x28 +# CHECK-NEXT: 7. 1 4.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 4.0 0.0 0.0 ld1 { v1.8b, v2.8b }, [x27], x28 +# CHECK-NEXT: 9. 1 5.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 1 3.2 0.1 2.0 # CHECK: [6] Code Region - G07 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 3004 +# CHECK-NEXT: Total Cycles: 508 # CHECK-NEXT: Total uOps: 2300 # CHECK: Dispatch Width: 15 -# CHECK-NEXT: uOps Per Cycle: 0.77 -# CHECK-NEXT: IPC: 0.33 +# CHECK-NEXT: uOps Per Cycle: 4.53 +# CHECK-NEXT: IPC: 1.97 # CHECK-NEXT: Block RThroughput: 4.3 # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 0123 -# CHECK-NEXT: Index 0123456789 0123456789 - -# CHECK: [0,0] DeeeeeeER . . . . . . ld1 { v1.8h, v2.8h }, [x27], x28 -# CHECK-NEXT: [0,1] D======eER. . . . . . add x0, x27, #1 -# CHECK-NEXT: [0,2] D======eeeeeeER. . . . . ld1 { v1.16b, v2.16b }, [x27], x28 -# CHECK-NEXT: [0,3] D============eER . . . . add x0, x27, #1 -# CHECK-NEXT: [0,4] D============eeeeeeER . . . ld1 { v1.1d, v2.1d, v3.1d }, [x27], #24 -# CHECK-NEXT: [0,5] D==================eER . . . add x0, x27, #1 -# CHECK-NEXT: [0,6] .D=================eeeeeeER . . ld1 { v1.2d, v2.2d, v3.2d }, [x27], #48 -# CHECK-NEXT: [0,7] .D=======================eER . . add x0, x27, #1 -# CHECK-NEXT: [0,8] .D=======================eeeeeeER. ld1 { v1.2s, v2.2s, v3.2s }, [x27], #24 -# CHECK-NEXT: [0,9] .D=============================eER add x0, x27, #1 +# CHECK-NEXT: 012 +# CHECK-NEXT: Index 0123456789 + +# CHECK: [0,0] DeeeeeeER . . ld1 { v1.8h, v2.8h }, [x27], x28 +# CHECK-NEXT: [0,1] D=eE----R . . add x0, x27, #1 +# CHECK-NEXT: [0,2] D=eeeeeeER. . ld1 { v1.16b, v2.16b }, [x27], x28 +# CHECK-NEXT: [0,3] D==eE----R. . add x0, x27, #1 +# CHECK-NEXT: [0,4] D==eeeeeeER . ld1 { v1.1d, v2.1d, v3.1d }, [x27], #24 +# CHECK-NEXT: [0,5] D===eE----R . add x0, x27, #1 +# CHECK-NEXT: [0,6] .D==eeeeeeER. ld1 { v1.2d, v2.2d, v3.2d }, [x27], #48 +# CHECK-NEXT: [0,7] .D===eE----R. add x0, x27, #1 +# CHECK-NEXT: [0,8] .D===eeeeeeER ld1 { v1.2s, v2.2s, v3.2s }, [x27], #24 +# CHECK-NEXT: [0,9] .D====eE----R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -1469,43 +1469,43 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.8h, v2.8h }, [x27], x28 -# CHECK-NEXT: 1. 1 7.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 7.0 0.0 0.0 ld1 { v1.16b, v2.16b }, [x27], x28 -# CHECK-NEXT: 3. 1 13.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 13.0 0.0 0.0 ld1 { v1.1d, v2.1d, v3.1d }, [x27], #24 -# CHECK-NEXT: 5. 1 19.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 18.0 0.0 0.0 ld1 { v1.2d, v2.2d, v3.2d }, [x27], #48 -# CHECK-NEXT: 7. 1 24.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 24.0 0.0 0.0 ld1 { v1.2s, v2.2s, v3.2s }, [x27], #24 -# CHECK-NEXT: 9. 1 30.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 15.6 0.1 0.0 +# CHECK-NEXT: 1. 1 2.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld1 { v1.16b, v2.16b }, [x27], x28 +# CHECK-NEXT: 3. 1 3.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ld1 { v1.1d, v2.1d, v3.1d }, [x27], #24 +# CHECK-NEXT: 5. 1 4.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ld1 { v1.2d, v2.2d, v3.2d }, [x27], #48 +# CHECK-NEXT: 7. 1 4.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 4.0 0.0 0.0 ld1 { v1.2s, v2.2s, v3.2s }, [x27], #24 +# CHECK-NEXT: 9. 1 5.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 1 3.1 0.1 2.0 # CHECK: [7] Code Region - G08 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 3004 +# CHECK-NEXT: Total Cycles: 508 # CHECK-NEXT: Total uOps: 2500 # CHECK: Dispatch Width: 15 -# CHECK-NEXT: uOps Per Cycle: 0.83 -# CHECK-NEXT: IPC: 0.33 +# CHECK-NEXT: uOps Per Cycle: 4.92 +# CHECK-NEXT: IPC: 1.97 # CHECK-NEXT: Block RThroughput: 5.0 # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 0123 -# CHECK-NEXT: Index 0123456789 0123456789 - -# CHECK: [0,0] DeeeeeeER . . . . . . ld1 { v1.4h, v2.4h, v3.4h }, [x27], #24 -# CHECK-NEXT: [0,1] D======eER. . . . . . add x0, x27, #1 -# CHECK-NEXT: [0,2] D======eeeeeeER. . . . . ld1 { v1.4s, v2.4s, v3.4s }, [x27], #48 -# CHECK-NEXT: [0,3] D============eER . . . . add x0, x27, #1 -# CHECK-NEXT: [0,4] D============eeeeeeER . . . ld1 { v1.8b, v2.8b, v3.8b }, [x27], #24 -# CHECK-NEXT: [0,5] D==================eER . . . add x0, x27, #1 -# CHECK-NEXT: [0,6] .D=================eeeeeeER . . ld1 { v1.8h, v2.8h, v3.8h }, [x27], #48 -# CHECK-NEXT: [0,7] .D=======================eER . . add x0, x27, #1 -# CHECK-NEXT: [0,8] .D=======================eeeeeeER. ld1 { v1.16b, v2.16b, v3.16b }, [x27], #48 -# CHECK-NEXT: [0,9] .D=============================eER add x0, x27, #1 +# CHECK-NEXT: 012 +# CHECK-NEXT: Index 0123456789 + +# CHECK: [0,0] DeeeeeeER . . ld1 { v1.4h, v2.4h, v3.4h }, [x27], #24 +# CHECK-NEXT: [0,1] D=eE----R . . add x0, x27, #1 +# CHECK-NEXT: [0,2] D=eeeeeeER. . ld1 { v1.4s, v2.4s, v3.4s }, [x27], #48 +# CHECK-NEXT: [0,3] D==eE----R. . add x0, x27, #1 +# CHECK-NEXT: [0,4] D==eeeeeeER . ld1 { v1.8b, v2.8b, v3.8b }, [x27], #24 +# CHECK-NEXT: [0,5] D===eE----R . add x0, x27, #1 +# CHECK-NEXT: [0,6] .D==eeeeeeER. ld1 { v1.8h, v2.8h, v3.8h }, [x27], #48 +# CHECK-NEXT: [0,7] .D===eE----R. add x0, x27, #1 +# CHECK-NEXT: [0,8] .D===eeeeeeER ld1 { v1.16b, v2.16b, v3.16b }, [x27], #48 +# CHECK-NEXT: [0,9] .D====eE----R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -1515,43 +1515,43 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.4h, v2.4h, v3.4h }, [x27], #24 -# CHECK-NEXT: 1. 1 7.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 7.0 0.0 0.0 ld1 { v1.4s, v2.4s, v3.4s }, [x27], #48 -# CHECK-NEXT: 3. 1 13.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 13.0 0.0 0.0 ld1 { v1.8b, v2.8b, v3.8b }, [x27], #24 -# CHECK-NEXT: 5. 1 19.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 18.0 0.0 0.0 ld1 { v1.8h, v2.8h, v3.8h }, [x27], #48 -# CHECK-NEXT: 7. 1 24.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 24.0 0.0 0.0 ld1 { v1.16b, v2.16b, v3.16b }, [x27], #48 -# CHECK-NEXT: 9. 1 30.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 15.6 0.1 0.0 +# CHECK-NEXT: 1. 1 2.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld1 { v1.4s, v2.4s, v3.4s }, [x27], #48 +# CHECK-NEXT: 3. 1 3.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ld1 { v1.8b, v2.8b, v3.8b }, [x27], #24 +# CHECK-NEXT: 5. 1 4.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ld1 { v1.8h, v2.8h, v3.8h }, [x27], #48 +# CHECK-NEXT: 7. 1 4.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 4.0 0.0 0.0 ld1 { v1.16b, v2.16b, v3.16b }, [x27], #48 +# CHECK-NEXT: 9. 1 5.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 1 3.1 0.1 2.0 # CHECK: [8] Code Region - G09 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 3004 +# CHECK-NEXT: Total Cycles: 508 # CHECK-NEXT: Total uOps: 2500 # CHECK: Dispatch Width: 15 -# CHECK-NEXT: uOps Per Cycle: 0.83 -# CHECK-NEXT: IPC: 0.33 +# CHECK-NEXT: uOps Per Cycle: 4.92 +# CHECK-NEXT: IPC: 1.97 # CHECK-NEXT: Block RThroughput: 5.0 # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 0123 -# CHECK-NEXT: Index 0123456789 0123456789 - -# CHECK: [0,0] DeeeeeeER . . . . . . ld1 { v1.1d, v2.1d, v3.1d }, [x27], x28 -# CHECK-NEXT: [0,1] D======eER. . . . . . add x0, x27, #1 -# CHECK-NEXT: [0,2] D======eeeeeeER. . . . . ld1 { v1.2d, v2.2d, v3.2d }, [x27], x28 -# CHECK-NEXT: [0,3] D============eER . . . . add x0, x27, #1 -# CHECK-NEXT: [0,4] D============eeeeeeER . . . ld1 { v1.2s, v2.2s, v3.2s }, [x27], x28 -# CHECK-NEXT: [0,5] D==================eER . . . add x0, x27, #1 -# CHECK-NEXT: [0,6] .D=================eeeeeeER . . ld1 { v1.4h, v2.4h, v3.4h }, [x27], x28 -# CHECK-NEXT: [0,7] .D=======================eER . . add x0, x27, #1 -# CHECK-NEXT: [0,8] .D=======================eeeeeeER. ld1 { v1.4s, v2.4s, v3.4s }, [x27], x28 -# CHECK-NEXT: [0,9] .D=============================eER add x0, x27, #1 +# CHECK-NEXT: 012 +# CHECK-NEXT: Index 0123456789 + +# CHECK: [0,0] DeeeeeeER . . ld1 { v1.1d, v2.1d, v3.1d }, [x27], x28 +# CHECK-NEXT: [0,1] D=eE----R . . add x0, x27, #1 +# CHECK-NEXT: [0,2] D=eeeeeeER. . ld1 { v1.2d, v2.2d, v3.2d }, [x27], x28 +# CHECK-NEXT: [0,3] D==eE----R. . add x0, x27, #1 +# CHECK-NEXT: [0,4] D==eeeeeeER . ld1 { v1.2s, v2.2s, v3.2s }, [x27], x28 +# CHECK-NEXT: [0,5] D===eE----R . add x0, x27, #1 +# CHECK-NEXT: [0,6] .D==eeeeeeER. ld1 { v1.4h, v2.4h, v3.4h }, [x27], x28 +# CHECK-NEXT: [0,7] .D===eE----R. add x0, x27, #1 +# CHECK-NEXT: [0,8] .D===eeeeeeER ld1 { v1.4s, v2.4s, v3.4s }, [x27], x28 +# CHECK-NEXT: [0,9] .D====eE----R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -1561,43 +1561,43 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.1d, v2.1d, v3.1d }, [x27], x28 -# CHECK-NEXT: 1. 1 7.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 7.0 0.0 0.0 ld1 { v1.2d, v2.2d, v3.2d }, [x27], x28 -# CHECK-NEXT: 3. 1 13.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 13.0 0.0 0.0 ld1 { v1.2s, v2.2s, v3.2s }, [x27], x28 -# CHECK-NEXT: 5. 1 19.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 18.0 0.0 0.0 ld1 { v1.4h, v2.4h, v3.4h }, [x27], x28 -# CHECK-NEXT: 7. 1 24.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 24.0 0.0 0.0 ld1 { v1.4s, v2.4s, v3.4s }, [x27], x28 -# CHECK-NEXT: 9. 1 30.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 15.6 0.1 0.0 +# CHECK-NEXT: 1. 1 2.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld1 { v1.2d, v2.2d, v3.2d }, [x27], x28 +# CHECK-NEXT: 3. 1 3.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ld1 { v1.2s, v2.2s, v3.2s }, [x27], x28 +# CHECK-NEXT: 5. 1 4.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ld1 { v1.4h, v2.4h, v3.4h }, [x27], x28 +# CHECK-NEXT: 7. 1 4.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 4.0 0.0 0.0 ld1 { v1.4s, v2.4s, v3.4s }, [x27], x28 +# CHECK-NEXT: 9. 1 5.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 1 3.1 0.1 2.0 # CHECK: [9] Code Region - G10 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 3104 +# CHECK-NEXT: Total Cycles: 608 # CHECK-NEXT: Total uOps: 2500 # CHECK: Dispatch Width: 15 -# CHECK-NEXT: uOps Per Cycle: 0.81 -# CHECK-NEXT: IPC: 0.32 +# CHECK-NEXT: uOps Per Cycle: 4.11 +# CHECK-NEXT: IPC: 1.64 # CHECK-NEXT: Block RThroughput: 5.0 # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 01234 -# CHECK-NEXT: Index 0123456789 0123456789 - -# CHECK: [0,0] DeeeeeeER . . . . . . ld1 { v1.8b, v2.8b, v3.8b }, [x27], x28 -# CHECK-NEXT: [0,1] D======eER. . . . . . add x0, x27, #1 -# CHECK-NEXT: [0,2] D======eeeeeeER. . . . . ld1 { v1.8h, v2.8h, v3.8h }, [x27], x28 -# CHECK-NEXT: [0,3] D============eER . . . . add x0, x27, #1 -# CHECK-NEXT: [0,4] D============eeeeeeER . . . ld1 { v1.16b, v2.16b, v3.16b }, [x27], x28 -# CHECK-NEXT: [0,5] D==================eER . . . add x0, x27, #1 -# CHECK-NEXT: [0,6] .D=================eeeeeeER . . ld1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32 -# CHECK-NEXT: [0,7] .D=======================eER . . add x0, x27, #1 -# CHECK-NEXT: [0,8] .D=======================eeeeeeeER. ld1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64 -# CHECK-NEXT: [0,9] .D==============================eER add x0, x27, #1 +# CHECK-NEXT: 0123 +# CHECK-NEXT: Index 0123456789 + +# CHECK: [0,0] DeeeeeeER . . ld1 { v1.8b, v2.8b, v3.8b }, [x27], x28 +# CHECK-NEXT: [0,1] D=eE----R . . add x0, x27, #1 +# CHECK-NEXT: [0,2] D=eeeeeeER. . ld1 { v1.8h, v2.8h, v3.8h }, [x27], x28 +# CHECK-NEXT: [0,3] D==eE----R. . add x0, x27, #1 +# CHECK-NEXT: [0,4] D==eeeeeeER . ld1 { v1.16b, v2.16b, v3.16b }, [x27], x28 +# CHECK-NEXT: [0,5] D===eE----R . add x0, x27, #1 +# CHECK-NEXT: [0,6] .D==eeeeeeER . ld1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32 +# CHECK-NEXT: [0,7] .D===eE----R . add x0, x27, #1 +# CHECK-NEXT: [0,8] .D===eeeeeeeER ld1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64 +# CHECK-NEXT: [0,9] .D====eE-----R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -1607,43 +1607,43 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.8b, v2.8b, v3.8b }, [x27], x28 -# CHECK-NEXT: 1. 1 7.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 7.0 0.0 0.0 ld1 { v1.8h, v2.8h, v3.8h }, [x27], x28 -# CHECK-NEXT: 3. 1 13.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 13.0 0.0 0.0 ld1 { v1.16b, v2.16b, v3.16b }, [x27], x28 -# CHECK-NEXT: 5. 1 19.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 18.0 0.0 0.0 ld1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32 -# CHECK-NEXT: 7. 1 24.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 24.0 0.0 0.0 ld1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64 -# CHECK-NEXT: 9. 1 31.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 15.7 0.1 0.0 +# CHECK-NEXT: 1. 1 2.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld1 { v1.8h, v2.8h, v3.8h }, [x27], x28 +# CHECK-NEXT: 3. 1 3.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ld1 { v1.16b, v2.16b, v3.16b }, [x27], x28 +# CHECK-NEXT: 5. 1 4.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ld1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32 +# CHECK-NEXT: 7. 1 4.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 4.0 0.0 0.0 ld1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64 +# CHECK-NEXT: 9. 1 5.0 0.0 5.0 add x0, x27, #1 +# CHECK-NEXT: 1 3.1 0.1 2.1 # CHECK: [10] Code Region - G11 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 3204 +# CHECK-NEXT: Total Cycles: 509 # CHECK-NEXT: Total uOps: 2400 # CHECK: Dispatch Width: 15 -# CHECK-NEXT: uOps Per Cycle: 0.75 -# CHECK-NEXT: IPC: 0.31 +# CHECK-NEXT: uOps Per Cycle: 4.72 +# CHECK-NEXT: IPC: 1.96 # CHECK-NEXT: Block RThroughput: 4.7 # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 012345 -# CHECK-NEXT: Index 0123456789 0123456789 - -# CHECK: [0,0] DeeeeeeER . . . . . . ld1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32 -# CHECK-NEXT: [0,1] D======eER. . . . . . add x0, x27, #1 -# CHECK-NEXT: [0,2] D======eeeeeeER. . . . . ld1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32 -# CHECK-NEXT: [0,3] D============eER . . . . add x0, x27, #1 -# CHECK-NEXT: [0,4] D============eeeeeeeER . . . ld1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64 -# CHECK-NEXT: [0,5] D===================eER . . . add x0, x27, #1 -# CHECK-NEXT: [0,6] .D==================eeeeeeER . . ld1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32 -# CHECK-NEXT: [0,7] .D========================eER . . add x0, x27, #1 -# CHECK-NEXT: [0,8] .D========================eeeeeeeER. ld1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64 -# CHECK-NEXT: [0,9] .D===============================eER add x0, x27, #1 +# CHECK-NEXT: 0123 +# CHECK-NEXT: Index 0123456789 + +# CHECK: [0,0] DeeeeeeER . . ld1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32 +# CHECK-NEXT: [0,1] D=eE----R . . add x0, x27, #1 +# CHECK-NEXT: [0,2] D=eeeeeeER. . ld1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32 +# CHECK-NEXT: [0,3] D==eE----R. . add x0, x27, #1 +# CHECK-NEXT: [0,4] D==eeeeeeeER . ld1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64 +# CHECK-NEXT: [0,5] D===eE-----R . add x0, x27, #1 +# CHECK-NEXT: [0,6] .D==eeeeeeER . ld1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32 +# CHECK-NEXT: [0,7] .D===eE----R . add x0, x27, #1 +# CHECK-NEXT: [0,8] .D===eeeeeeeER ld1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64 +# CHECK-NEXT: [0,9] .D====eE-----R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -1653,43 +1653,43 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32 -# CHECK-NEXT: 1. 1 7.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 7.0 0.0 0.0 ld1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32 -# CHECK-NEXT: 3. 1 13.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 13.0 0.0 0.0 ld1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64 -# CHECK-NEXT: 5. 1 20.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 19.0 0.0 0.0 ld1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32 -# CHECK-NEXT: 7. 1 25.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 25.0 0.0 0.0 ld1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64 -# CHECK-NEXT: 9. 1 32.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 16.2 0.1 0.0 +# CHECK-NEXT: 1. 1 2.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32 +# CHECK-NEXT: 3. 1 3.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ld1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64 +# CHECK-NEXT: 5. 1 4.0 0.0 5.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ld1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32 +# CHECK-NEXT: 7. 1 4.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 4.0 0.0 0.0 ld1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64 +# CHECK-NEXT: 9. 1 5.0 0.0 5.0 add x0, x27, #1 +# CHECK-NEXT: 1 3.1 0.1 2.2 # CHECK: [11] Code Region - G12 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 3204 +# CHECK-NEXT: Total Cycles: 508 # CHECK-NEXT: Total uOps: 2400 # CHECK: Dispatch Width: 15 -# CHECK-NEXT: uOps Per Cycle: 0.75 -# CHECK-NEXT: IPC: 0.31 +# CHECK-NEXT: uOps Per Cycle: 4.72 +# CHECK-NEXT: IPC: 1.97 # CHECK-NEXT: Block RThroughput: 4.7 # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 012345 -# CHECK-NEXT: Index 0123456789 0123456789 - -# CHECK: [0,0] DeeeeeeeER. . . . . . ld1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64 -# CHECK-NEXT: [0,1] D=======eER . . . . . add x0, x27, #1 -# CHECK-NEXT: [0,2] D=======eeeeeeER . . . . ld1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28 -# CHECK-NEXT: [0,3] D=============eER . . . . add x0, x27, #1 -# CHECK-NEXT: [0,4] D=============eeeeeeeER . . . ld1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28 -# CHECK-NEXT: [0,5] .D===================eER . . . add x0, x27, #1 -# CHECK-NEXT: [0,6] .D===================eeeeeeER . . ld1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28 -# CHECK-NEXT: [0,7] .D=========================eER. . add x0, x27, #1 -# CHECK-NEXT: [0,8] .D=========================eeeeeeER. ld1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28 -# CHECK-NEXT: [0,9] .D===============================eER add x0, x27, #1 +# CHECK-NEXT: 012 +# CHECK-NEXT: Index 0123456789 + +# CHECK: [0,0] DeeeeeeeER. . ld1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64 +# CHECK-NEXT: [0,1] D=eE-----R. . add x0, x27, #1 +# CHECK-NEXT: [0,2] D=eeeeeeER. . ld1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28 +# CHECK-NEXT: [0,3] D==eE----R. . add x0, x27, #1 +# CHECK-NEXT: [0,4] D==eeeeeeeER. ld1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28 +# CHECK-NEXT: [0,5] .D==eE-----R. add x0, x27, #1 +# CHECK-NEXT: [0,6] .D==eeeeeeER. ld1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28 +# CHECK-NEXT: [0,7] .D===eE----R. add x0, x27, #1 +# CHECK-NEXT: [0,8] .D===eeeeeeER ld1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28 +# CHECK-NEXT: [0,9] .D====eE----R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -1699,43 +1699,43 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64 -# CHECK-NEXT: 1. 1 8.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 8.0 0.0 0.0 ld1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28 -# CHECK-NEXT: 3. 1 14.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 14.0 0.0 0.0 ld1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28 -# CHECK-NEXT: 5. 1 20.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 20.0 0.0 0.0 ld1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28 -# CHECK-NEXT: 7. 1 26.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 26.0 0.0 0.0 ld1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28 -# CHECK-NEXT: 9. 1 32.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 16.9 0.1 0.0 +# CHECK-NEXT: 1. 1 2.0 0.0 5.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28 +# CHECK-NEXT: 3. 1 3.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ld1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28 +# CHECK-NEXT: 5. 1 3.0 0.0 5.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ld1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28 +# CHECK-NEXT: 7. 1 4.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 4.0 0.0 0.0 ld1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28 +# CHECK-NEXT: 9. 1 5.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 1 3.0 0.1 2.2 # CHECK: [12] Code Region - G13 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 3504 +# CHECK-NEXT: Total Cycles: 1110 # CHECK-NEXT: Total uOps: 2600 # CHECK: Dispatch Width: 15 -# CHECK-NEXT: uOps Per Cycle: 0.74 -# CHECK-NEXT: IPC: 0.29 +# CHECK-NEXT: uOps Per Cycle: 2.34 +# CHECK-NEXT: IPC: 0.90 # CHECK-NEXT: Block RThroughput: 5.0 # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 012345678 -# CHECK-NEXT: Index 0123456789 0123456789 - -# CHECK: [0,0] DeeeeeeeER. . . . . . . ld1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28 -# CHECK-NEXT: [0,1] D=======eER . . . . . . add x0, x27, #1 -# CHECK-NEXT: [0,2] D=======eeeeeeER . . . . . ld1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28 -# CHECK-NEXT: [0,3] D=============eER . . . . . add x0, x27, #1 -# CHECK-NEXT: [0,4] D=============eeeeeeeER . . . . ld1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28 -# CHECK-NEXT: [0,5] .D===================eER . . . . add x0, x27, #1 -# CHECK-NEXT: [0,6] .D===================eeeeeeeER. . . ld1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28 -# CHECK-NEXT: [0,7] .D==========================eER . . add x0, x27, #1 -# CHECK-NEXT: [0,8] .D==========================eeeeeeeeER. ld1 { v1.b }[0], [x27], #1 -# CHECK-NEXT: [0,9] .D==================================eER add x0, x27, #1 +# CHECK-NEXT: 0123456789 +# CHECK-NEXT: Index 0123456789 0 + +# CHECK: [0,0] DeeeeeeeER. . . ld1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28 +# CHECK-NEXT: [0,1] D=eE-----R. . . add x0, x27, #1 +# CHECK-NEXT: [0,2] D=eeeeeeER. . . ld1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28 +# CHECK-NEXT: [0,3] D==eE----R. . . add x0, x27, #1 +# CHECK-NEXT: [0,4] D==eeeeeeeER . . ld1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28 +# CHECK-NEXT: [0,5] .D==eE-----R . . add x0, x27, #1 +# CHECK-NEXT: [0,6] .D==eeeeeeeER . . ld1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28 +# CHECK-NEXT: [0,7] .D===eE-----R . . add x0, x27, #1 +# CHECK-NEXT: [0,8] .D=========eeeeeeeeER ld1 { v1.b }[0], [x27], #1 +# CHECK-NEXT: [0,9] .D==========eE------R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -1745,22 +1745,22 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28 -# CHECK-NEXT: 1. 1 8.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 8.0 0.0 0.0 ld1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28 -# CHECK-NEXT: 3. 1 14.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 14.0 0.0 0.0 ld1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28 -# CHECK-NEXT: 5. 1 20.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 20.0 0.0 0.0 ld1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28 -# CHECK-NEXT: 7. 1 27.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 27.0 0.0 0.0 ld1 { v1.b }[0], [x27], #1 -# CHECK-NEXT: 9. 1 35.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 17.4 0.1 0.0 +# CHECK-NEXT: 1. 1 2.0 0.0 5.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28 +# CHECK-NEXT: 3. 1 3.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ld1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28 +# CHECK-NEXT: 5. 1 3.0 0.0 5.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ld1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28 +# CHECK-NEXT: 7. 1 4.0 0.0 5.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 10.0 0.0 0.0 ld1 { v1.b }[0], [x27], #1 +# CHECK-NEXT: 9. 1 11.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 1 4.2 0.1 2.5 # CHECK: [13] Code Region - G14 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 4004 +# CHECK-NEXT: Total Cycles: 4003 # CHECK-NEXT: Total uOps: 2000 # CHECK: Dispatch Width: 15 @@ -1770,18 +1770,18 @@ add x0, x27, 1 # CHECK: Timeline view: # CHECK-NEXT: 0123456789 0123456789 -# CHECK-NEXT: Index 0123456789 0123456789 0123 - -# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld1 { v1.b }[8], [x27], #1 -# CHECK-NEXT: [0,1] D========eER . . . . . . . add x0, x27, #1 -# CHECK-NEXT: [0,2] D========eeeeeeeeER . . . . . . ld1 { v1.b }[0], [x27], x28 -# CHECK-NEXT: [0,3] D================eER. . . . . . add x0, x27, #1 -# CHECK-NEXT: [0,4] D================eeeeeeeeER . . . . ld1 { v1.b }[8], [x27], x28 -# CHECK-NEXT: [0,5] D========================eER . . . . add x0, x27, #1 -# CHECK-NEXT: [0,6] D========================eeeeeeeeER. . . ld1 { v1.h }[0], [x27], #2 -# CHECK-NEXT: [0,7] .D===============================eER . . add x0, x27, #1 -# CHECK-NEXT: [0,8] .D===============================eeeeeeeeER. ld1 { v1.h }[4], [x27], #2 -# CHECK-NEXT: [0,9] .D=======================================eER add x0, x27, #1 +# CHECK-NEXT: Index 0123456789 0123456789 012 + +# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld1 { v1.b }[8], [x27], #1 +# CHECK-NEXT: [0,1] D=eE------R . . . . . . . add x0, x27, #1 +# CHECK-NEXT: [0,2] D========eeeeeeeeER . . . . . . ld1 { v1.b }[0], [x27], x28 +# CHECK-NEXT: [0,3] D=========eE------R . . . . . . add x0, x27, #1 +# CHECK-NEXT: [0,4] D================eeeeeeeeER . . . . ld1 { v1.b }[8], [x27], x28 +# CHECK-NEXT: [0,5] D=================eE------R . . . . add x0, x27, #1 +# CHECK-NEXT: [0,6] D========================eeeeeeeeER. . . ld1 { v1.h }[0], [x27], #2 +# CHECK-NEXT: [0,7] .D========================eE------R. . . add x0, x27, #1 +# CHECK-NEXT: [0,8] .D===============================eeeeeeeeER ld1 { v1.h }[4], [x27], #2 +# CHECK-NEXT: [0,9] .D================================eE------R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -1791,22 +1791,22 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.b }[8], [x27], #1 -# CHECK-NEXT: 1. 1 9.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 1. 1 2.0 0.0 6.0 add x0, x27, #1 # CHECK-NEXT: 2. 1 9.0 0.0 0.0 ld1 { v1.b }[0], [x27], x28 -# CHECK-NEXT: 3. 1 17.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 3. 1 10.0 0.0 6.0 add x0, x27, #1 # CHECK-NEXT: 4. 1 17.0 0.0 0.0 ld1 { v1.b }[8], [x27], x28 -# CHECK-NEXT: 5. 1 25.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 5. 1 18.0 0.0 6.0 add x0, x27, #1 # CHECK-NEXT: 6. 1 25.0 0.0 0.0 ld1 { v1.h }[0], [x27], #2 -# CHECK-NEXT: 7. 1 32.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 7. 1 25.0 0.0 6.0 add x0, x27, #1 # CHECK-NEXT: 8. 1 32.0 0.0 0.0 ld1 { v1.h }[4], [x27], #2 -# CHECK-NEXT: 9. 1 40.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 20.7 0.1 0.0 +# CHECK-NEXT: 9. 1 33.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 1 17.2 0.1 3.0 # CHECK: [14] Code Region - G15 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 4004 +# CHECK-NEXT: Total Cycles: 4003 # CHECK-NEXT: Total uOps: 2000 # CHECK: Dispatch Width: 15 @@ -1816,18 +1816,18 @@ add x0, x27, 1 # CHECK: Timeline view: # CHECK-NEXT: 0123456789 0123456789 -# CHECK-NEXT: Index 0123456789 0123456789 0123 - -# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld1 { v1.h }[0], [x27], x28 -# CHECK-NEXT: [0,1] D========eER . . . . . . . add x0, x27, #1 -# CHECK-NEXT: [0,2] D========eeeeeeeeER . . . . . . ld1 { v1.h }[4], [x27], x28 -# CHECK-NEXT: [0,3] D================eER. . . . . . add x0, x27, #1 -# CHECK-NEXT: [0,4] D================eeeeeeeeER . . . . ld1 { v1.s }[0], [x27], #4 -# CHECK-NEXT: [0,5] D========================eER . . . . add x0, x27, #1 -# CHECK-NEXT: [0,6] D========================eeeeeeeeER. . . ld1 { v1.s }[0], [x27], x28 -# CHECK-NEXT: [0,7] .D===============================eER . . add x0, x27, #1 -# CHECK-NEXT: [0,8] .D===============================eeeeeeeeER. ld1 { v1.d }[0], [x27], #8 -# CHECK-NEXT: [0,9] .D=======================================eER add x0, x27, #1 +# CHECK-NEXT: Index 0123456789 0123456789 012 + +# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld1 { v1.h }[0], [x27], x28 +# CHECK-NEXT: [0,1] D=eE------R . . . . . . . add x0, x27, #1 +# CHECK-NEXT: [0,2] D========eeeeeeeeER . . . . . . ld1 { v1.h }[4], [x27], x28 +# CHECK-NEXT: [0,3] D=========eE------R . . . . . . add x0, x27, #1 +# CHECK-NEXT: [0,4] D================eeeeeeeeER . . . . ld1 { v1.s }[0], [x27], #4 +# CHECK-NEXT: [0,5] D=================eE------R . . . . add x0, x27, #1 +# CHECK-NEXT: [0,6] D========================eeeeeeeeER. . . ld1 { v1.s }[0], [x27], x28 +# CHECK-NEXT: [0,7] .D========================eE------R. . . add x0, x27, #1 +# CHECK-NEXT: [0,8] .D===============================eeeeeeeeER ld1 { v1.d }[0], [x27], #8 +# CHECK-NEXT: [0,9] .D================================eE------R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -1837,43 +1837,43 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.h }[0], [x27], x28 -# CHECK-NEXT: 1. 1 9.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 1. 1 2.0 0.0 6.0 add x0, x27, #1 # CHECK-NEXT: 2. 1 9.0 0.0 0.0 ld1 { v1.h }[4], [x27], x28 -# CHECK-NEXT: 3. 1 17.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 3. 1 10.0 0.0 6.0 add x0, x27, #1 # CHECK-NEXT: 4. 1 17.0 0.0 0.0 ld1 { v1.s }[0], [x27], #4 -# CHECK-NEXT: 5. 1 25.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 5. 1 18.0 0.0 6.0 add x0, x27, #1 # CHECK-NEXT: 6. 1 25.0 0.0 0.0 ld1 { v1.s }[0], [x27], x28 -# CHECK-NEXT: 7. 1 32.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 7. 1 25.0 0.0 6.0 add x0, x27, #1 # CHECK-NEXT: 8. 1 32.0 0.0 0.0 ld1 { v1.d }[0], [x27], #8 -# CHECK-NEXT: 9. 1 40.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 20.7 0.1 0.0 +# CHECK-NEXT: 9. 1 33.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 1 17.2 0.1 3.0 # CHECK: [15] Code Region - G16 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 4004 +# CHECK-NEXT: Total Cycles: 1203 # CHECK-NEXT: Total uOps: 2000 # CHECK: Dispatch Width: 15 -# CHECK-NEXT: uOps Per Cycle: 0.50 -# CHECK-NEXT: IPC: 0.25 +# CHECK-NEXT: uOps Per Cycle: 1.66 +# CHECK-NEXT: IPC: 0.83 # CHECK-NEXT: Block RThroughput: 2.5 # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 0123456789 -# CHECK-NEXT: Index 0123456789 0123456789 0123 - -# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld1 { v1.d }[0], [x27], x28 -# CHECK-NEXT: [0,1] D========eER . . . . . . . add x0, x27, #1 -# CHECK-NEXT: [0,2] D========eeeeeeeeER . . . . . . ld1r { v1.1d }, [x27], #8 -# CHECK-NEXT: [0,3] D================eER. . . . . . add x0, x27, #1 -# CHECK-NEXT: [0,4] D================eeeeeeeeER . . . . ld1r { v1.2d }, [x27], #8 -# CHECK-NEXT: [0,5] D========================eER . . . . add x0, x27, #1 -# CHECK-NEXT: [0,6] D========================eeeeeeeeER. . . ld1r { v1.2s }, [x27], #4 -# CHECK-NEXT: [0,7] .D===============================eER . . add x0, x27, #1 -# CHECK-NEXT: [0,8] .D===============================eeeeeeeeER. ld1r { v1.4h }, [x27], #2 -# CHECK-NEXT: [0,9] .D=======================================eER add x0, x27, #1 +# CHECK-NEXT: 01234 +# CHECK-NEXT: Index 0123456789 + +# CHECK: [0,0] DeeeeeeeeER . ld1 { v1.d }[0], [x27], x28 +# CHECK-NEXT: [0,1] D=eE------R . add x0, x27, #1 +# CHECK-NEXT: [0,2] D=eeeeeeeeER . ld1r { v1.1d }, [x27], #8 +# CHECK-NEXT: [0,3] D==eE------R . add x0, x27, #1 +# CHECK-NEXT: [0,4] D==eeeeeeeeER . ld1r { v1.2d }, [x27], #8 +# CHECK-NEXT: [0,5] D===eE------R . add x0, x27, #1 +# CHECK-NEXT: [0,6] D===eeeeeeeeER. ld1r { v1.2s }, [x27], #4 +# CHECK-NEXT: [0,7] .D===eE------R. add x0, x27, #1 +# CHECK-NEXT: [0,8] .D===eeeeeeeeER ld1r { v1.4h }, [x27], #2 +# CHECK-NEXT: [0,9] .D====eE------R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -1883,43 +1883,43 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.d }[0], [x27], x28 -# CHECK-NEXT: 1. 1 9.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 9.0 0.0 0.0 ld1r { v1.1d }, [x27], #8 -# CHECK-NEXT: 3. 1 17.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 17.0 0.0 0.0 ld1r { v1.2d }, [x27], #8 -# CHECK-NEXT: 5. 1 25.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 25.0 0.0 0.0 ld1r { v1.2s }, [x27], #4 -# CHECK-NEXT: 7. 1 32.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 32.0 0.0 0.0 ld1r { v1.4h }, [x27], #2 -# CHECK-NEXT: 9. 1 40.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 20.7 0.1 0.0 +# CHECK-NEXT: 1. 1 2.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld1r { v1.1d }, [x27], #8 +# CHECK-NEXT: 3. 1 3.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ld1r { v1.2d }, [x27], #8 +# CHECK-NEXT: 5. 1 4.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 4.0 0.0 0.0 ld1r { v1.2s }, [x27], #4 +# CHECK-NEXT: 7. 1 4.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 4.0 0.0 0.0 ld1r { v1.4h }, [x27], #2 +# CHECK-NEXT: 9. 1 5.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 1 3.2 0.1 3.0 # CHECK: [16] Code Region - G17 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 4004 +# CHECK-NEXT: Total Cycles: 510 # CHECK-NEXT: Total uOps: 2000 # CHECK: Dispatch Width: 15 -# CHECK-NEXT: uOps Per Cycle: 0.50 -# CHECK-NEXT: IPC: 0.25 +# CHECK-NEXT: uOps Per Cycle: 3.92 +# CHECK-NEXT: IPC: 1.96 # CHECK-NEXT: Block RThroughput: 2.5 # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 0123456789 -# CHECK-NEXT: Index 0123456789 0123456789 0123 - -# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld1r { v1.4s }, [x27], #4 -# CHECK-NEXT: [0,1] D========eER . . . . . . . add x0, x27, #1 -# CHECK-NEXT: [0,2] D========eeeeeeeeER . . . . . . ld1r { v1.8b }, [x27], #1 -# CHECK-NEXT: [0,3] D================eER. . . . . . add x0, x27, #1 -# CHECK-NEXT: [0,4] D================eeeeeeeeER . . . . ld1r { v1.8h }, [x27], #2 -# CHECK-NEXT: [0,5] D========================eER . . . . add x0, x27, #1 -# CHECK-NEXT: [0,6] D========================eeeeeeeeER. . . ld1r { v1.16b }, [x27], #1 -# CHECK-NEXT: [0,7] .D===============================eER . . add x0, x27, #1 -# CHECK-NEXT: [0,8] .D===============================eeeeeeeeER. ld1r { v1.1d }, [x27], x28 -# CHECK-NEXT: [0,9] .D=======================================eER add x0, x27, #1 +# CHECK-NEXT: 01234 +# CHECK-NEXT: Index 0123456789 + +# CHECK: [0,0] DeeeeeeeeER . ld1r { v1.4s }, [x27], #4 +# CHECK-NEXT: [0,1] D=eE------R . add x0, x27, #1 +# CHECK-NEXT: [0,2] D=eeeeeeeeER . ld1r { v1.8b }, [x27], #1 +# CHECK-NEXT: [0,3] D==eE------R . add x0, x27, #1 +# CHECK-NEXT: [0,4] D==eeeeeeeeER . ld1r { v1.8h }, [x27], #2 +# CHECK-NEXT: [0,5] D===eE------R . add x0, x27, #1 +# CHECK-NEXT: [0,6] D===eeeeeeeeER. ld1r { v1.16b }, [x27], #1 +# CHECK-NEXT: [0,7] .D===eE------R. add x0, x27, #1 +# CHECK-NEXT: [0,8] .D===eeeeeeeeER ld1r { v1.1d }, [x27], x28 +# CHECK-NEXT: [0,9] .D====eE------R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -1929,43 +1929,43 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1r { v1.4s }, [x27], #4 -# CHECK-NEXT: 1. 1 9.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 9.0 0.0 0.0 ld1r { v1.8b }, [x27], #1 -# CHECK-NEXT: 3. 1 17.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 17.0 0.0 0.0 ld1r { v1.8h }, [x27], #2 -# CHECK-NEXT: 5. 1 25.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 25.0 0.0 0.0 ld1r { v1.16b }, [x27], #1 -# CHECK-NEXT: 7. 1 32.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 32.0 0.0 0.0 ld1r { v1.1d }, [x27], x28 -# CHECK-NEXT: 9. 1 40.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 20.7 0.1 0.0 +# CHECK-NEXT: 1. 1 2.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld1r { v1.8b }, [x27], #1 +# CHECK-NEXT: 3. 1 3.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ld1r { v1.8h }, [x27], #2 +# CHECK-NEXT: 5. 1 4.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 4.0 0.0 0.0 ld1r { v1.16b }, [x27], #1 +# CHECK-NEXT: 7. 1 4.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 4.0 0.0 0.0 ld1r { v1.1d }, [x27], x28 +# CHECK-NEXT: 9. 1 5.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 1 3.2 0.1 3.0 # CHECK: [17] Code Region - G18 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 4004 +# CHECK-NEXT: Total Cycles: 510 # CHECK-NEXT: Total uOps: 2000 # CHECK: Dispatch Width: 15 -# CHECK-NEXT: uOps Per Cycle: 0.50 -# CHECK-NEXT: IPC: 0.25 +# CHECK-NEXT: uOps Per Cycle: 3.92 +# CHECK-NEXT: IPC: 1.96 # CHECK-NEXT: Block RThroughput: 2.5 # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 0123456789 -# CHECK-NEXT: Index 0123456789 0123456789 0123 - -# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld1r { v1.2d }, [x27], x28 -# CHECK-NEXT: [0,1] D========eER . . . . . . . add x0, x27, #1 -# CHECK-NEXT: [0,2] D========eeeeeeeeER . . . . . . ld1r { v1.2s }, [x27], x28 -# CHECK-NEXT: [0,3] D================eER. . . . . . add x0, x27, #1 -# CHECK-NEXT: [0,4] D================eeeeeeeeER . . . . ld1r { v1.4h }, [x27], x28 -# CHECK-NEXT: [0,5] D========================eER . . . . add x0, x27, #1 -# CHECK-NEXT: [0,6] D========================eeeeeeeeER. . . ld1r { v1.4s }, [x27], x28 -# CHECK-NEXT: [0,7] .D===============================eER . . add x0, x27, #1 -# CHECK-NEXT: [0,8] .D===============================eeeeeeeeER. ld1r { v1.8b }, [x27], x28 -# CHECK-NEXT: [0,9] .D=======================================eER add x0, x27, #1 +# CHECK-NEXT: 01234 +# CHECK-NEXT: Index 0123456789 + +# CHECK: [0,0] DeeeeeeeeER . ld1r { v1.2d }, [x27], x28 +# CHECK-NEXT: [0,1] D=eE------R . add x0, x27, #1 +# CHECK-NEXT: [0,2] D=eeeeeeeeER . ld1r { v1.2s }, [x27], x28 +# CHECK-NEXT: [0,3] D==eE------R . add x0, x27, #1 +# CHECK-NEXT: [0,4] D==eeeeeeeeER . ld1r { v1.4h }, [x27], x28 +# CHECK-NEXT: [0,5] D===eE------R . add x0, x27, #1 +# CHECK-NEXT: [0,6] D===eeeeeeeeER. ld1r { v1.4s }, [x27], x28 +# CHECK-NEXT: [0,7] .D===eE------R. add x0, x27, #1 +# CHECK-NEXT: [0,8] .D===eeeeeeeeER ld1r { v1.8b }, [x27], x28 +# CHECK-NEXT: [0,9] .D====eE------R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -1975,43 +1975,43 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1r { v1.2d }, [x27], x28 -# CHECK-NEXT: 1. 1 9.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 9.0 0.0 0.0 ld1r { v1.2s }, [x27], x28 -# CHECK-NEXT: 3. 1 17.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 17.0 0.0 0.0 ld1r { v1.4h }, [x27], x28 -# CHECK-NEXT: 5. 1 25.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 25.0 0.0 0.0 ld1r { v1.4s }, [x27], x28 -# CHECK-NEXT: 7. 1 32.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 32.0 0.0 0.0 ld1r { v1.8b }, [x27], x28 -# CHECK-NEXT: 9. 1 40.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 20.7 0.1 0.0 +# CHECK-NEXT: 1. 1 2.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld1r { v1.2s }, [x27], x28 +# CHECK-NEXT: 3. 1 3.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ld1r { v1.4h }, [x27], x28 +# CHECK-NEXT: 5. 1 4.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 4.0 0.0 0.0 ld1r { v1.4s }, [x27], x28 +# CHECK-NEXT: 7. 1 4.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 4.0 0.0 0.0 ld1r { v1.8b }, [x27], x28 +# CHECK-NEXT: 9. 1 5.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 1 3.2 0.1 3.0 # CHECK: [18] Code Region - G19 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 4004 +# CHECK-NEXT: Total Cycles: 510 # CHECK-NEXT: Total uOps: 2400 # CHECK: Dispatch Width: 15 -# CHECK-NEXT: uOps Per Cycle: 0.60 -# CHECK-NEXT: IPC: 0.25 +# CHECK-NEXT: uOps Per Cycle: 4.71 +# CHECK-NEXT: IPC: 1.96 # CHECK-NEXT: Block RThroughput: 2.5 # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 0123456789 -# CHECK-NEXT: Index 0123456789 0123456789 0123 - -# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld1r { v1.8h }, [x27], x28 -# CHECK-NEXT: [0,1] D========eER . . . . . . . add x0, x27, #1 -# CHECK-NEXT: [0,2] D========eeeeeeeeER . . . . . . ld1r { v1.16b }, [x27], x28 -# CHECK-NEXT: [0,3] D================eER. . . . . . add x0, x27, #1 -# CHECK-NEXT: [0,4] D================eeeeeeeeER . . . . ld2 { v1.2d, v2.2d }, [x27], #32 -# CHECK-NEXT: [0,5] D========================eER . . . . add x0, x27, #1 -# CHECK-NEXT: [0,6] .D=======================eeeeeeeeER. . . ld2 { v1.2s, v2.2s }, [x27], #16 -# CHECK-NEXT: [0,7] .D===============================eER . . add x0, x27, #1 -# CHECK-NEXT: [0,8] .D===============================eeeeeeeeER. ld2 { v1.4h, v2.4h }, [x27], #16 -# CHECK-NEXT: [0,9] .D=======================================eER add x0, x27, #1 +# CHECK-NEXT: 01234 +# CHECK-NEXT: Index 0123456789 + +# CHECK: [0,0] DeeeeeeeeER . ld1r { v1.8h }, [x27], x28 +# CHECK-NEXT: [0,1] D=eE------R . add x0, x27, #1 +# CHECK-NEXT: [0,2] D=eeeeeeeeER . ld1r { v1.16b }, [x27], x28 +# CHECK-NEXT: [0,3] D==eE------R . add x0, x27, #1 +# CHECK-NEXT: [0,4] D==eeeeeeeeER . ld2 { v1.2d, v2.2d }, [x27], #32 +# CHECK-NEXT: [0,5] D===eE------R . add x0, x27, #1 +# CHECK-NEXT: [0,6] .D==eeeeeeeeER. ld2 { v1.2s, v2.2s }, [x27], #16 +# CHECK-NEXT: [0,7] .D===eE------R. add x0, x27, #1 +# CHECK-NEXT: [0,8] .D===eeeeeeeeER ld2 { v1.4h, v2.4h }, [x27], #16 +# CHECK-NEXT: [0,9] .D====eE------R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -2021,43 +2021,43 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1r { v1.8h }, [x27], x28 -# CHECK-NEXT: 1. 1 9.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 9.0 0.0 0.0 ld1r { v1.16b }, [x27], x28 -# CHECK-NEXT: 3. 1 17.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 17.0 0.0 0.0 ld2 { v1.2d, v2.2d }, [x27], #32 -# CHECK-NEXT: 5. 1 25.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 24.0 0.0 0.0 ld2 { v1.2s, v2.2s }, [x27], #16 -# CHECK-NEXT: 7. 1 32.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 32.0 0.0 0.0 ld2 { v1.4h, v2.4h }, [x27], #16 -# CHECK-NEXT: 9. 1 40.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 20.6 0.1 0.0 +# CHECK-NEXT: 1. 1 2.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld1r { v1.16b }, [x27], x28 +# CHECK-NEXT: 3. 1 3.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ld2 { v1.2d, v2.2d }, [x27], #32 +# CHECK-NEXT: 5. 1 4.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ld2 { v1.2s, v2.2s }, [x27], #16 +# CHECK-NEXT: 7. 1 4.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 4.0 0.0 0.0 ld2 { v1.4h, v2.4h }, [x27], #16 +# CHECK-NEXT: 9. 1 5.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 1 3.1 0.1 3.0 # CHECK: [19] Code Region - G20 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 4004 +# CHECK-NEXT: Total Cycles: 510 # CHECK-NEXT: Total uOps: 2900 # CHECK: Dispatch Width: 15 -# CHECK-NEXT: uOps Per Cycle: 0.72 -# CHECK-NEXT: IPC: 0.25 +# CHECK-NEXT: uOps Per Cycle: 5.69 +# CHECK-NEXT: IPC: 1.96 # CHECK-NEXT: Block RThroughput: 3.0 # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 0123456789 -# CHECK-NEXT: Index 0123456789 0123456789 0123 - -# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld2 { v1.4s, v2.4s }, [x27], #32 -# CHECK-NEXT: [0,1] D========eER . . . . . . . add x0, x27, #1 -# CHECK-NEXT: [0,2] D========eeeeeeeeER . . . . . . ld2 { v1.8b, v2.8b }, [x27], #16 -# CHECK-NEXT: [0,3] D================eER. . . . . . add x0, x27, #1 -# CHECK-NEXT: [0,4] .D===============eeeeeeeeER . . . . ld2 { v1.8h, v2.8h }, [x27], #32 -# CHECK-NEXT: [0,5] .D=======================eER . . . . add x0, x27, #1 -# CHECK-NEXT: [0,6] .D=======================eeeeeeeeER. . . ld2 { v1.16b, v2.16b }, [x27], #32 -# CHECK-NEXT: [0,7] .D===============================eER . . add x0, x27, #1 -# CHECK-NEXT: [0,8] . D==============================eeeeeeeeER. ld2 { v1.2d, v2.2d }, [x27], x28 -# CHECK-NEXT: [0,9] . D======================================eER add x0, x27, #1 +# CHECK-NEXT: 01234 +# CHECK-NEXT: Index 0123456789 + +# CHECK: [0,0] DeeeeeeeeER . ld2 { v1.4s, v2.4s }, [x27], #32 +# CHECK-NEXT: [0,1] D=eE------R . add x0, x27, #1 +# CHECK-NEXT: [0,2] D=eeeeeeeeER . ld2 { v1.8b, v2.8b }, [x27], #16 +# CHECK-NEXT: [0,3] D==eE------R . add x0, x27, #1 +# CHECK-NEXT: [0,4] .D=eeeeeeeeER . ld2 { v1.8h, v2.8h }, [x27], #32 +# CHECK-NEXT: [0,5] .D==eE------R . add x0, x27, #1 +# CHECK-NEXT: [0,6] .D==eeeeeeeeER. ld2 { v1.16b, v2.16b }, [x27], #32 +# CHECK-NEXT: [0,7] .D===eE------R. add x0, x27, #1 +# CHECK-NEXT: [0,8] . D==eeeeeeeeER ld2 { v1.2d, v2.2d }, [x27], x28 +# CHECK-NEXT: [0,9] . D===eE------R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -2067,43 +2067,43 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld2 { v1.4s, v2.4s }, [x27], #32 -# CHECK-NEXT: 1. 1 9.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 9.0 0.0 0.0 ld2 { v1.8b, v2.8b }, [x27], #16 -# CHECK-NEXT: 3. 1 17.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 16.0 0.0 0.0 ld2 { v1.8h, v2.8h }, [x27], #32 -# CHECK-NEXT: 5. 1 24.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 24.0 0.0 0.0 ld2 { v1.16b, v2.16b }, [x27], #32 -# CHECK-NEXT: 7. 1 32.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 31.0 0.0 0.0 ld2 { v1.2d, v2.2d }, [x27], x28 -# CHECK-NEXT: 9. 1 39.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 20.2 0.1 0.0 +# CHECK-NEXT: 1. 1 2.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld2 { v1.8b, v2.8b }, [x27], #16 +# CHECK-NEXT: 3. 1 3.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 2.0 0.0 0.0 ld2 { v1.8h, v2.8h }, [x27], #32 +# CHECK-NEXT: 5. 1 3.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ld2 { v1.16b, v2.16b }, [x27], #32 +# CHECK-NEXT: 7. 1 4.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 3.0 0.0 0.0 ld2 { v1.2d, v2.2d }, [x27], x28 +# CHECK-NEXT: 9. 1 4.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 1 2.7 0.1 3.0 # CHECK: [20] Code Region - G21 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 4004 +# CHECK-NEXT: Total Cycles: 510 # CHECK-NEXT: Total uOps: 2700 # CHECK: Dispatch Width: 15 -# CHECK-NEXT: uOps Per Cycle: 0.67 -# CHECK-NEXT: IPC: 0.25 +# CHECK-NEXT: uOps Per Cycle: 5.29 +# CHECK-NEXT: IPC: 1.96 # CHECK-NEXT: Block RThroughput: 2.5 # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 0123456789 -# CHECK-NEXT: Index 0123456789 0123456789 0123 - -# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld2 { v1.2s, v2.2s }, [x27], x28 -# CHECK-NEXT: [0,1] D========eER . . . . . . . add x0, x27, #1 -# CHECK-NEXT: [0,2] D========eeeeeeeeER . . . . . . ld2 { v1.4h, v2.4h }, [x27], x28 -# CHECK-NEXT: [0,3] D================eER. . . . . . add x0, x27, #1 -# CHECK-NEXT: [0,4] D================eeeeeeeeER . . . . ld2 { v1.4s, v2.4s }, [x27], x28 -# CHECK-NEXT: [0,5] .D=======================eER . . . . add x0, x27, #1 -# CHECK-NEXT: [0,6] .D=======================eeeeeeeeER. . . ld2 { v1.8b, v2.8b }, [x27], x28 -# CHECK-NEXT: [0,7] .D===============================eER . . add x0, x27, #1 -# CHECK-NEXT: [0,8] .D===============================eeeeeeeeER. ld2 { v1.8h, v2.8h }, [x27], x28 -# CHECK-NEXT: [0,9] .D=======================================eER add x0, x27, #1 +# CHECK-NEXT: 01234 +# CHECK-NEXT: Index 0123456789 + +# CHECK: [0,0] DeeeeeeeeER . ld2 { v1.2s, v2.2s }, [x27], x28 +# CHECK-NEXT: [0,1] D=eE------R . add x0, x27, #1 +# CHECK-NEXT: [0,2] D=eeeeeeeeER . ld2 { v1.4h, v2.4h }, [x27], x28 +# CHECK-NEXT: [0,3] D==eE------R . add x0, x27, #1 +# CHECK-NEXT: [0,4] D==eeeeeeeeER . ld2 { v1.4s, v2.4s }, [x27], x28 +# CHECK-NEXT: [0,5] .D==eE------R . add x0, x27, #1 +# CHECK-NEXT: [0,6] .D==eeeeeeeeER. ld2 { v1.8b, v2.8b }, [x27], x28 +# CHECK-NEXT: [0,7] .D===eE------R. add x0, x27, #1 +# CHECK-NEXT: [0,8] .D===eeeeeeeeER ld2 { v1.8h, v2.8h }, [x27], x28 +# CHECK-NEXT: [0,9] .D====eE------R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -2113,43 +2113,43 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld2 { v1.2s, v2.2s }, [x27], x28 -# CHECK-NEXT: 1. 1 9.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 9.0 0.0 0.0 ld2 { v1.4h, v2.4h }, [x27], x28 -# CHECK-NEXT: 3. 1 17.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 17.0 0.0 0.0 ld2 { v1.4s, v2.4s }, [x27], x28 -# CHECK-NEXT: 5. 1 24.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 24.0 0.0 0.0 ld2 { v1.8b, v2.8b }, [x27], x28 -# CHECK-NEXT: 7. 1 32.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 32.0 0.0 0.0 ld2 { v1.8h, v2.8h }, [x27], x28 -# CHECK-NEXT: 9. 1 40.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 20.5 0.1 0.0 +# CHECK-NEXT: 1. 1 2.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld2 { v1.4h, v2.4h }, [x27], x28 +# CHECK-NEXT: 3. 1 3.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ld2 { v1.4s, v2.4s }, [x27], x28 +# CHECK-NEXT: 5. 1 3.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ld2 { v1.8b, v2.8b }, [x27], x28 +# CHECK-NEXT: 7. 1 4.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 4.0 0.0 0.0 ld2 { v1.8h, v2.8h }, [x27], x28 +# CHECK-NEXT: 9. 1 5.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 1 3.0 0.1 3.0 # CHECK: [21] Code Region - G22 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 4004 +# CHECK-NEXT: Total Cycles: 3310 # CHECK-NEXT: Total uOps: 2600 # CHECK: Dispatch Width: 15 -# CHECK-NEXT: uOps Per Cycle: 0.65 -# CHECK-NEXT: IPC: 0.25 +# CHECK-NEXT: uOps Per Cycle: 0.79 +# CHECK-NEXT: IPC: 0.30 # CHECK-NEXT: Block RThroughput: 2.5 # CHECK: Timeline view: # CHECK-NEXT: 0123456789 0123456789 -# CHECK-NEXT: Index 0123456789 0123456789 0123 - -# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld2 { v1.16b, v2.16b }, [x27], x28 -# CHECK-NEXT: [0,1] D========eER . . . . . . . add x0, x27, #1 -# CHECK-NEXT: [0,2] D========eeeeeeeeER . . . . . . ld2 { v1.b, v2.b }[0], [x27], #2 -# CHECK-NEXT: [0,3] D================eER. . . . . . add x0, x27, #1 -# CHECK-NEXT: [0,4] D================eeeeeeeeER . . . . ld2 { v1.b, v2.b }[8], [x27], #2 -# CHECK-NEXT: [0,5] .D=======================eER . . . . add x0, x27, #1 -# CHECK-NEXT: [0,6] .D=======================eeeeeeeeER. . . ld2 { v1.b, v2.b }[0], [x27], x28 -# CHECK-NEXT: [0,7] .D===============================eER . . add x0, x27, #1 -# CHECK-NEXT: [0,8] .D===============================eeeeeeeeER. ld2 { v1.b, v2.b }[8], [x27], x28 -# CHECK-NEXT: [0,9] .D=======================================eER add x0, x27, #1 +# CHECK-NEXT: Index 0123456789 0123456789 012 + +# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld2 { v1.16b, v2.16b }, [x27], x28 +# CHECK-NEXT: [0,1] D=eE------R . . . . . . . add x0, x27, #1 +# CHECK-NEXT: [0,2] D========eeeeeeeeER . . . . . . ld2 { v1.b, v2.b }[0], [x27], #2 +# CHECK-NEXT: [0,3] D=========eE------R . . . . . . add x0, x27, #1 +# CHECK-NEXT: [0,4] D================eeeeeeeeER . . . . ld2 { v1.b, v2.b }[8], [x27], #2 +# CHECK-NEXT: [0,5] .D================eE------R . . . . add x0, x27, #1 +# CHECK-NEXT: [0,6] .D=======================eeeeeeeeER. . . ld2 { v1.b, v2.b }[0], [x27], x28 +# CHECK-NEXT: [0,7] .D========================eE------R. . . add x0, x27, #1 +# CHECK-NEXT: [0,8] .D===============================eeeeeeeeER ld2 { v1.b, v2.b }[8], [x27], x28 +# CHECK-NEXT: [0,9] .D================================eE------R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -2159,22 +2159,22 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld2 { v1.16b, v2.16b }, [x27], x28 -# CHECK-NEXT: 1. 1 9.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 1. 1 2.0 0.0 6.0 add x0, x27, #1 # CHECK-NEXT: 2. 1 9.0 0.0 0.0 ld2 { v1.b, v2.b }[0], [x27], #2 -# CHECK-NEXT: 3. 1 17.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 3. 1 10.0 0.0 6.0 add x0, x27, #1 # CHECK-NEXT: 4. 1 17.0 0.0 0.0 ld2 { v1.b, v2.b }[8], [x27], #2 -# CHECK-NEXT: 5. 1 24.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 5. 1 17.0 0.0 6.0 add x0, x27, #1 # CHECK-NEXT: 6. 1 24.0 0.0 0.0 ld2 { v1.b, v2.b }[0], [x27], x28 -# CHECK-NEXT: 7. 1 32.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 7. 1 25.0 0.0 6.0 add x0, x27, #1 # CHECK-NEXT: 8. 1 32.0 0.0 0.0 ld2 { v1.b, v2.b }[8], [x27], x28 -# CHECK-NEXT: 9. 1 40.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 20.5 0.1 0.0 +# CHECK-NEXT: 9. 1 33.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 1 17.0 0.1 3.0 # CHECK: [22] Code Region - G23 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 4004 +# CHECK-NEXT: Total Cycles: 4003 # CHECK-NEXT: Total uOps: 2500 # CHECK: Dispatch Width: 15 @@ -2184,18 +2184,18 @@ add x0, x27, 1 # CHECK: Timeline view: # CHECK-NEXT: 0123456789 0123456789 -# CHECK-NEXT: Index 0123456789 0123456789 0123 - -# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld2 { v1.h, v2.h }[0], [x27], #4 -# CHECK-NEXT: [0,1] D========eER . . . . . . . add x0, x27, #1 -# CHECK-NEXT: [0,2] D========eeeeeeeeER . . . . . . ld2 { v1.h, v2.h }[4], [x27], #4 -# CHECK-NEXT: [0,3] D================eER. . . . . . add x0, x27, #1 -# CHECK-NEXT: [0,4] D================eeeeeeeeER . . . . ld2 { v1.h, v2.h }[0], [x27], x28 -# CHECK-NEXT: [0,5] D========================eER . . . . add x0, x27, #1 -# CHECK-NEXT: [0,6] .D=======================eeeeeeeeER. . . ld2 { v1.h, v2.h }[4], [x27], x28 -# CHECK-NEXT: [0,7] .D===============================eER . . add x0, x27, #1 -# CHECK-NEXT: [0,8] .D===============================eeeeeeeeER. ld2 { v1.s, v2.s }[0], [x27], #8 -# CHECK-NEXT: [0,9] .D=======================================eER add x0, x27, #1 +# CHECK-NEXT: Index 0123456789 0123456789 012 + +# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld2 { v1.h, v2.h }[0], [x27], #4 +# CHECK-NEXT: [0,1] D=eE------R . . . . . . . add x0, x27, #1 +# CHECK-NEXT: [0,2] D========eeeeeeeeER . . . . . . ld2 { v1.h, v2.h }[4], [x27], #4 +# CHECK-NEXT: [0,3] D=========eE------R . . . . . . add x0, x27, #1 +# CHECK-NEXT: [0,4] D================eeeeeeeeER . . . . ld2 { v1.h, v2.h }[0], [x27], x28 +# CHECK-NEXT: [0,5] D=================eE------R . . . . add x0, x27, #1 +# CHECK-NEXT: [0,6] .D=======================eeeeeeeeER. . . ld2 { v1.h, v2.h }[4], [x27], x28 +# CHECK-NEXT: [0,7] .D========================eE------R. . . add x0, x27, #1 +# CHECK-NEXT: [0,8] .D===============================eeeeeeeeER ld2 { v1.s, v2.s }[0], [x27], #8 +# CHECK-NEXT: [0,9] .D================================eE------R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -2205,43 +2205,43 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld2 { v1.h, v2.h }[0], [x27], #4 -# CHECK-NEXT: 1. 1 9.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 1. 1 2.0 0.0 6.0 add x0, x27, #1 # CHECK-NEXT: 2. 1 9.0 0.0 0.0 ld2 { v1.h, v2.h }[4], [x27], #4 -# CHECK-NEXT: 3. 1 17.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 3. 1 10.0 0.0 6.0 add x0, x27, #1 # CHECK-NEXT: 4. 1 17.0 0.0 0.0 ld2 { v1.h, v2.h }[0], [x27], x28 -# CHECK-NEXT: 5. 1 25.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 5. 1 18.0 0.0 6.0 add x0, x27, #1 # CHECK-NEXT: 6. 1 24.0 0.0 0.0 ld2 { v1.h, v2.h }[4], [x27], x28 -# CHECK-NEXT: 7. 1 32.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 7. 1 25.0 0.0 6.0 add x0, x27, #1 # CHECK-NEXT: 8. 1 32.0 0.0 0.0 ld2 { v1.s, v2.s }[0], [x27], #8 -# CHECK-NEXT: 9. 1 40.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 20.6 0.1 0.0 +# CHECK-NEXT: 9. 1 33.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 1 17.1 0.1 3.0 # CHECK: [23] Code Region - G24 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 4004 +# CHECK-NEXT: Total Cycles: 2603 # CHECK-NEXT: Total uOps: 2500 # CHECK: Dispatch Width: 15 -# CHECK-NEXT: uOps Per Cycle: 0.62 -# CHECK-NEXT: IPC: 0.25 +# CHECK-NEXT: uOps Per Cycle: 0.96 +# CHECK-NEXT: IPC: 0.38 # CHECK-NEXT: Block RThroughput: 2.5 # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 0123456789 -# CHECK-NEXT: Index 0123456789 0123456789 0123 - -# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld2 { v1.s, v2.s }[0], [x27], x28 -# CHECK-NEXT: [0,1] D========eER . . . . . . . add x0, x27, #1 -# CHECK-NEXT: [0,2] D========eeeeeeeeER . . . . . . ld2 { v1.d, v2.d }[0], [x27], #16 -# CHECK-NEXT: [0,3] D================eER. . . . . . add x0, x27, #1 -# CHECK-NEXT: [0,4] D================eeeeeeeeER . . . . ld2 { v1.d, v2.d }[0], [x27], x28 -# CHECK-NEXT: [0,5] D========================eER . . . . add x0, x27, #1 -# CHECK-NEXT: [0,6] .D=======================eeeeeeeeER. . . ld2r { v1.1d, v2.1d }, [x27], #16 -# CHECK-NEXT: [0,7] .D===============================eER . . add x0, x27, #1 -# CHECK-NEXT: [0,8] .D===============================eeeeeeeeER. ld2r { v1.2d, v2.2d }, [x27], #16 -# CHECK-NEXT: [0,9] .D=======================================eER add x0, x27, #1 +# CHECK-NEXT: 0123456789 +# CHECK-NEXT: Index 0123456789 012345678 + +# CHECK: [0,0] DeeeeeeeeER . . . . ld2 { v1.s, v2.s }[0], [x27], x28 +# CHECK-NEXT: [0,1] D=eE------R . . . . add x0, x27, #1 +# CHECK-NEXT: [0,2] D========eeeeeeeeER . . . ld2 { v1.d, v2.d }[0], [x27], #16 +# CHECK-NEXT: [0,3] D=========eE------R . . . add x0, x27, #1 +# CHECK-NEXT: [0,4] D================eeeeeeeeER . ld2 { v1.d, v2.d }[0], [x27], x28 +# CHECK-NEXT: [0,5] D=================eE------R . add x0, x27, #1 +# CHECK-NEXT: [0,6] .D================eeeeeeeeER. ld2r { v1.1d, v2.1d }, [x27], #16 +# CHECK-NEXT: [0,7] .D=================eE------R. add x0, x27, #1 +# CHECK-NEXT: [0,8] .D=================eeeeeeeeER ld2r { v1.2d, v2.2d }, [x27], #16 +# CHECK-NEXT: [0,9] .D==================eE------R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -2251,43 +2251,43 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld2 { v1.s, v2.s }[0], [x27], x28 -# CHECK-NEXT: 1. 1 9.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 1. 1 2.0 0.0 6.0 add x0, x27, #1 # CHECK-NEXT: 2. 1 9.0 0.0 0.0 ld2 { v1.d, v2.d }[0], [x27], #16 -# CHECK-NEXT: 3. 1 17.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 3. 1 10.0 0.0 6.0 add x0, x27, #1 # CHECK-NEXT: 4. 1 17.0 0.0 0.0 ld2 { v1.d, v2.d }[0], [x27], x28 -# CHECK-NEXT: 5. 1 25.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 24.0 0.0 0.0 ld2r { v1.1d, v2.1d }, [x27], #16 -# CHECK-NEXT: 7. 1 32.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 32.0 0.0 0.0 ld2r { v1.2d, v2.2d }, [x27], #16 -# CHECK-NEXT: 9. 1 40.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 20.6 0.1 0.0 +# CHECK-NEXT: 5. 1 18.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 17.0 0.0 0.0 ld2r { v1.1d, v2.1d }, [x27], #16 +# CHECK-NEXT: 7. 1 18.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 18.0 0.0 0.0 ld2r { v1.2d, v2.2d }, [x27], #16 +# CHECK-NEXT: 9. 1 19.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 1 12.9 0.1 3.0 # CHECK: [24] Code Region - G25 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 4004 +# CHECK-NEXT: Total Cycles: 510 # CHECK-NEXT: Total uOps: 2500 # CHECK: Dispatch Width: 15 -# CHECK-NEXT: uOps Per Cycle: 0.62 -# CHECK-NEXT: IPC: 0.25 +# CHECK-NEXT: uOps Per Cycle: 4.90 +# CHECK-NEXT: IPC: 1.96 # CHECK-NEXT: Block RThroughput: 2.5 # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 0123456789 -# CHECK-NEXT: Index 0123456789 0123456789 0123 - -# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld2r { v1.2s, v2.2s }, [x27], #8 -# CHECK-NEXT: [0,1] D========eER . . . . . . . add x0, x27, #1 -# CHECK-NEXT: [0,2] D========eeeeeeeeER . . . . . . ld2r { v1.4h, v2.4h }, [x27], #4 -# CHECK-NEXT: [0,3] D================eER. . . . . . add x0, x27, #1 -# CHECK-NEXT: [0,4] D================eeeeeeeeER . . . . ld2r { v1.4s, v2.4s }, [x27], #8 -# CHECK-NEXT: [0,5] D========================eER . . . . add x0, x27, #1 -# CHECK-NEXT: [0,6] .D=======================eeeeeeeeER. . . ld2r { v1.8b, v2.8b }, [x27], #2 -# CHECK-NEXT: [0,7] .D===============================eER . . add x0, x27, #1 -# CHECK-NEXT: [0,8] .D===============================eeeeeeeeER. ld2r { v1.8h, v2.8h }, [x27], #4 -# CHECK-NEXT: [0,9] .D=======================================eER add x0, x27, #1 +# CHECK-NEXT: 01234 +# CHECK-NEXT: Index 0123456789 + +# CHECK: [0,0] DeeeeeeeeER . ld2r { v1.2s, v2.2s }, [x27], #8 +# CHECK-NEXT: [0,1] D=eE------R . add x0, x27, #1 +# CHECK-NEXT: [0,2] D=eeeeeeeeER . ld2r { v1.4h, v2.4h }, [x27], #4 +# CHECK-NEXT: [0,3] D==eE------R . add x0, x27, #1 +# CHECK-NEXT: [0,4] D==eeeeeeeeER . ld2r { v1.4s, v2.4s }, [x27], #8 +# CHECK-NEXT: [0,5] D===eE------R . add x0, x27, #1 +# CHECK-NEXT: [0,6] .D==eeeeeeeeER. ld2r { v1.8b, v2.8b }, [x27], #2 +# CHECK-NEXT: [0,7] .D===eE------R. add x0, x27, #1 +# CHECK-NEXT: [0,8] .D===eeeeeeeeER ld2r { v1.8h, v2.8h }, [x27], #4 +# CHECK-NEXT: [0,9] .D====eE------R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -2297,43 +2297,43 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld2r { v1.2s, v2.2s }, [x27], #8 -# CHECK-NEXT: 1. 1 9.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 9.0 0.0 0.0 ld2r { v1.4h, v2.4h }, [x27], #4 -# CHECK-NEXT: 3. 1 17.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 17.0 0.0 0.0 ld2r { v1.4s, v2.4s }, [x27], #8 -# CHECK-NEXT: 5. 1 25.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 24.0 0.0 0.0 ld2r { v1.8b, v2.8b }, [x27], #2 -# CHECK-NEXT: 7. 1 32.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 32.0 0.0 0.0 ld2r { v1.8h, v2.8h }, [x27], #4 -# CHECK-NEXT: 9. 1 40.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 20.6 0.1 0.0 +# CHECK-NEXT: 1. 1 2.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld2r { v1.4h, v2.4h }, [x27], #4 +# CHECK-NEXT: 3. 1 3.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ld2r { v1.4s, v2.4s }, [x27], #8 +# CHECK-NEXT: 5. 1 4.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ld2r { v1.8b, v2.8b }, [x27], #2 +# CHECK-NEXT: 7. 1 4.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 4.0 0.0 0.0 ld2r { v1.8h, v2.8h }, [x27], #4 +# CHECK-NEXT: 9. 1 5.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 1 3.1 0.1 3.0 # CHECK: [25] Code Region - G26 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 4004 +# CHECK-NEXT: Total Cycles: 510 # CHECK-NEXT: Total uOps: 2500 # CHECK: Dispatch Width: 15 -# CHECK-NEXT: uOps Per Cycle: 0.62 -# CHECK-NEXT: IPC: 0.25 +# CHECK-NEXT: uOps Per Cycle: 4.90 +# CHECK-NEXT: IPC: 1.96 # CHECK-NEXT: Block RThroughput: 2.5 # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 0123456789 -# CHECK-NEXT: Index 0123456789 0123456789 0123 - -# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld2r { v1.16b, v2.16b }, [x27], #2 -# CHECK-NEXT: [0,1] D========eER . . . . . . . add x0, x27, #1 -# CHECK-NEXT: [0,2] D========eeeeeeeeER . . . . . . ld2r { v1.1d, v2.1d }, [x27], x28 -# CHECK-NEXT: [0,3] D================eER. . . . . . add x0, x27, #1 -# CHECK-NEXT: [0,4] D================eeeeeeeeER . . . . ld2r { v1.2d, v2.2d }, [x27], x28 -# CHECK-NEXT: [0,5] D========================eER . . . . add x0, x27, #1 -# CHECK-NEXT: [0,6] .D=======================eeeeeeeeER. . . ld2r { v1.2s, v2.2s }, [x27], x28 -# CHECK-NEXT: [0,7] .D===============================eER . . add x0, x27, #1 -# CHECK-NEXT: [0,8] .D===============================eeeeeeeeER. ld2r { v1.4h, v2.4h }, [x27], x28 -# CHECK-NEXT: [0,9] .D=======================================eER add x0, x27, #1 +# CHECK-NEXT: 01234 +# CHECK-NEXT: Index 0123456789 + +# CHECK: [0,0] DeeeeeeeeER . ld2r { v1.16b, v2.16b }, [x27], #2 +# CHECK-NEXT: [0,1] D=eE------R . add x0, x27, #1 +# CHECK-NEXT: [0,2] D=eeeeeeeeER . ld2r { v1.1d, v2.1d }, [x27], x28 +# CHECK-NEXT: [0,3] D==eE------R . add x0, x27, #1 +# CHECK-NEXT: [0,4] D==eeeeeeeeER . ld2r { v1.2d, v2.2d }, [x27], x28 +# CHECK-NEXT: [0,5] D===eE------R . add x0, x27, #1 +# CHECK-NEXT: [0,6] .D==eeeeeeeeER. ld2r { v1.2s, v2.2s }, [x27], x28 +# CHECK-NEXT: [0,7] .D===eE------R. add x0, x27, #1 +# CHECK-NEXT: [0,8] .D===eeeeeeeeER ld2r { v1.4h, v2.4h }, [x27], x28 +# CHECK-NEXT: [0,9] .D====eE------R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -2343,43 +2343,43 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld2r { v1.16b, v2.16b }, [x27], #2 -# CHECK-NEXT: 1. 1 9.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 9.0 0.0 0.0 ld2r { v1.1d, v2.1d }, [x27], x28 -# CHECK-NEXT: 3. 1 17.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 17.0 0.0 0.0 ld2r { v1.2d, v2.2d }, [x27], x28 -# CHECK-NEXT: 5. 1 25.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 24.0 0.0 0.0 ld2r { v1.2s, v2.2s }, [x27], x28 -# CHECK-NEXT: 7. 1 32.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 32.0 0.0 0.0 ld2r { v1.4h, v2.4h }, [x27], x28 -# CHECK-NEXT: 9. 1 40.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 20.6 0.1 0.0 +# CHECK-NEXT: 1. 1 2.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld2r { v1.1d, v2.1d }, [x27], x28 +# CHECK-NEXT: 3. 1 3.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ld2r { v1.2d, v2.2d }, [x27], x28 +# CHECK-NEXT: 5. 1 4.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ld2r { v1.2s, v2.2s }, [x27], x28 +# CHECK-NEXT: 7. 1 4.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 4.0 0.0 0.0 ld2r { v1.4h, v2.4h }, [x27], x28 +# CHECK-NEXT: 9. 1 5.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 1 3.1 0.1 3.0 # CHECK: [26] Code Region - G27 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 4004 +# CHECK-NEXT: Total Cycles: 510 # CHECK-NEXT: Total uOps: 2800 # CHECK: Dispatch Width: 15 -# CHECK-NEXT: uOps Per Cycle: 0.70 -# CHECK-NEXT: IPC: 0.25 +# CHECK-NEXT: uOps Per Cycle: 5.49 +# CHECK-NEXT: IPC: 1.96 # CHECK-NEXT: Block RThroughput: 2.8 # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 0123456789 -# CHECK-NEXT: Index 0123456789 0123456789 0123 - -# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld2r { v1.4s, v2.4s }, [x27], x28 -# CHECK-NEXT: [0,1] D========eER . . . . . . . add x0, x27, #1 -# CHECK-NEXT: [0,2] D========eeeeeeeeER . . . . . . ld2r { v1.8b, v2.8b }, [x27], x28 -# CHECK-NEXT: [0,3] D================eER. . . . . . add x0, x27, #1 -# CHECK-NEXT: [0,4] D================eeeeeeeeER . . . . ld2r { v1.8h, v2.8h }, [x27], x28 -# CHECK-NEXT: [0,5] D========================eER . . . . add x0, x27, #1 -# CHECK-NEXT: [0,6] .D=======================eeeeeeeeER. . . ld2r { v1.16b, v2.16b }, [x27], x28 -# CHECK-NEXT: [0,7] .D===============================eER . . add x0, x27, #1 -# CHECK-NEXT: [0,8] .D===============================eeeeeeeeER. ld3 { v1.2d, v2.2d, v3.2d }, [x27], #48 -# CHECK-NEXT: [0,9] .D=======================================eER add x0, x27, #1 +# CHECK-NEXT: 01234 +# CHECK-NEXT: Index 0123456789 + +# CHECK: [0,0] DeeeeeeeeER . ld2r { v1.4s, v2.4s }, [x27], x28 +# CHECK-NEXT: [0,1] D=eE------R . add x0, x27, #1 +# CHECK-NEXT: [0,2] D=eeeeeeeeER . ld2r { v1.8b, v2.8b }, [x27], x28 +# CHECK-NEXT: [0,3] D==eE------R . add x0, x27, #1 +# CHECK-NEXT: [0,4] D==eeeeeeeeER . ld2r { v1.8h, v2.8h }, [x27], x28 +# CHECK-NEXT: [0,5] D===eE------R . add x0, x27, #1 +# CHECK-NEXT: [0,6] .D==eeeeeeeeER. ld2r { v1.16b, v2.16b }, [x27], x28 +# CHECK-NEXT: [0,7] .D===eE------R. add x0, x27, #1 +# CHECK-NEXT: [0,8] .D===eeeeeeeeER ld3 { v1.2d, v2.2d, v3.2d }, [x27], #48 +# CHECK-NEXT: [0,9] .D====eE------R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -2389,43 +2389,43 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld2r { v1.4s, v2.4s }, [x27], x28 -# CHECK-NEXT: 1. 1 9.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 9.0 0.0 0.0 ld2r { v1.8b, v2.8b }, [x27], x28 -# CHECK-NEXT: 3. 1 17.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 17.0 0.0 0.0 ld2r { v1.8h, v2.8h }, [x27], x28 -# CHECK-NEXT: 5. 1 25.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 24.0 0.0 0.0 ld2r { v1.16b, v2.16b }, [x27], x28 -# CHECK-NEXT: 7. 1 32.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 32.0 0.0 0.0 ld3 { v1.2d, v2.2d, v3.2d }, [x27], #48 -# CHECK-NEXT: 9. 1 40.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 20.6 0.1 0.0 +# CHECK-NEXT: 1. 1 2.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld2r { v1.8b, v2.8b }, [x27], x28 +# CHECK-NEXT: 3. 1 3.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ld2r { v1.8h, v2.8h }, [x27], x28 +# CHECK-NEXT: 5. 1 4.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ld2r { v1.16b, v2.16b }, [x27], x28 +# CHECK-NEXT: 7. 1 4.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 4.0 0.0 0.0 ld3 { v1.2d, v2.2d, v3.2d }, [x27], #48 +# CHECK-NEXT: 9. 1 5.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 1 3.1 0.1 3.0 # CHECK: [27] Code Region - G28 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 4004 +# CHECK-NEXT: Total Cycles: 510 # CHECK-NEXT: Total uOps: 3700 # CHECK: Dispatch Width: 15 -# CHECK-NEXT: uOps Per Cycle: 0.92 -# CHECK-NEXT: IPC: 0.25 +# CHECK-NEXT: uOps Per Cycle: 7.25 +# CHECK-NEXT: IPC: 1.96 # CHECK-NEXT: Block RThroughput: 4.0 # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 0123456789 -# CHECK-NEXT: Index 0123456789 0123456789 0123 - -# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld3 { v1.2s, v2.2s, v3.2s }, [x27], #24 -# CHECK-NEXT: [0,1] D========eER . . . . . . . add x0, x27, #1 -# CHECK-NEXT: [0,2] D========eeeeeeeeER . . . . . . ld3 { v1.4h, v2.4h, v3.4h }, [x27], #24 -# CHECK-NEXT: [0,3] D================eER. . . . . . add x0, x27, #1 -# CHECK-NEXT: [0,4] .D===============eeeeeeeeER . . . . ld3 { v1.4s, v2.4s, v3.4s }, [x27], #48 -# CHECK-NEXT: [0,5] .D=======================eER . . . . add x0, x27, #1 -# CHECK-NEXT: [0,6] .D=======================eeeeeeeeER. . . ld3 { v1.8b, v2.8b, v3.8b }, [x27], #24 -# CHECK-NEXT: [0,7] .D===============================eER . . add x0, x27, #1 -# CHECK-NEXT: [0,8] . D==============================eeeeeeeeER. ld3 { v1.8h, v2.8h, v3.8h }, [x27], #48 -# CHECK-NEXT: [0,9] . D======================================eER add x0, x27, #1 +# CHECK-NEXT: 01234 +# CHECK-NEXT: Index 0123456789 + +# CHECK: [0,0] DeeeeeeeeER . ld3 { v1.2s, v2.2s, v3.2s }, [x27], #24 +# CHECK-NEXT: [0,1] D=eE------R . add x0, x27, #1 +# CHECK-NEXT: [0,2] D=eeeeeeeeER . ld3 { v1.4h, v2.4h, v3.4h }, [x27], #24 +# CHECK-NEXT: [0,3] D==eE------R . add x0, x27, #1 +# CHECK-NEXT: [0,4] .D=eeeeeeeeER . ld3 { v1.4s, v2.4s, v3.4s }, [x27], #48 +# CHECK-NEXT: [0,5] .D==eE------R . add x0, x27, #1 +# CHECK-NEXT: [0,6] .D==eeeeeeeeER. ld3 { v1.8b, v2.8b, v3.8b }, [x27], #24 +# CHECK-NEXT: [0,7] .D===eE------R. add x0, x27, #1 +# CHECK-NEXT: [0,8] . D==eeeeeeeeER ld3 { v1.8h, v2.8h, v3.8h }, [x27], #48 +# CHECK-NEXT: [0,9] . D===eE------R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -2435,43 +2435,43 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld3 { v1.2s, v2.2s, v3.2s }, [x27], #24 -# CHECK-NEXT: 1. 1 9.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 9.0 0.0 0.0 ld3 { v1.4h, v2.4h, v3.4h }, [x27], #24 -# CHECK-NEXT: 3. 1 17.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 16.0 0.0 0.0 ld3 { v1.4s, v2.4s, v3.4s }, [x27], #48 -# CHECK-NEXT: 5. 1 24.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 24.0 0.0 0.0 ld3 { v1.8b, v2.8b, v3.8b }, [x27], #24 -# CHECK-NEXT: 7. 1 32.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 31.0 0.0 0.0 ld3 { v1.8h, v2.8h, v3.8h }, [x27], #48 -# CHECK-NEXT: 9. 1 39.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 20.2 0.1 0.0 +# CHECK-NEXT: 1. 1 2.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld3 { v1.4h, v2.4h, v3.4h }, [x27], #24 +# CHECK-NEXT: 3. 1 3.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 2.0 0.0 0.0 ld3 { v1.4s, v2.4s, v3.4s }, [x27], #48 +# CHECK-NEXT: 5. 1 3.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ld3 { v1.8b, v2.8b, v3.8b }, [x27], #24 +# CHECK-NEXT: 7. 1 4.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 3.0 0.0 0.0 ld3 { v1.8h, v2.8h, v3.8h }, [x27], #48 +# CHECK-NEXT: 9. 1 4.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 1 2.7 0.1 3.0 # CHECK: [28] Code Region - G29 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 4004 +# CHECK-NEXT: Total Cycles: 510 # CHECK-NEXT: Total uOps: 3800 # CHECK: Dispatch Width: 15 -# CHECK-NEXT: uOps Per Cycle: 0.95 -# CHECK-NEXT: IPC: 0.25 +# CHECK-NEXT: uOps Per Cycle: 7.45 +# CHECK-NEXT: IPC: 1.96 # CHECK-NEXT: Block RThroughput: 4.3 # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 0123456789 -# CHECK-NEXT: Index 0123456789 0123456789 0123 - -# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld3 { v1.16b, v2.16b, v3.16b }, [x27], #48 -# CHECK-NEXT: [0,1] D========eER . . . . . . . add x0, x27, #1 -# CHECK-NEXT: [0,2] D========eeeeeeeeER . . . . . . ld3 { v1.2d, v2.2d, v3.2d }, [x27], x28 -# CHECK-NEXT: [0,3] .D===============eER. . . . . . add x0, x27, #1 -# CHECK-NEXT: [0,4] .D===============eeeeeeeeER . . . . ld3 { v1.2s, v2.2s, v3.2s }, [x27], x28 -# CHECK-NEXT: [0,5] .D=======================eER . . . . add x0, x27, #1 -# CHECK-NEXT: [0,6] .D=======================eeeeeeeeER. . . ld3 { v1.4h, v2.4h, v3.4h }, [x27], x28 -# CHECK-NEXT: [0,7] .D===============================eER . . add x0, x27, #1 -# CHECK-NEXT: [0,8] . D==============================eeeeeeeeER. ld3 { v1.4s, v2.4s, v3.4s }, [x27], x28 -# CHECK-NEXT: [0,9] . D======================================eER add x0, x27, #1 +# CHECK-NEXT: 01234 +# CHECK-NEXT: Index 0123456789 + +# CHECK: [0,0] DeeeeeeeeER . ld3 { v1.16b, v2.16b, v3.16b }, [x27], #48 +# CHECK-NEXT: [0,1] D=eE------R . add x0, x27, #1 +# CHECK-NEXT: [0,2] D=eeeeeeeeER . ld3 { v1.2d, v2.2d, v3.2d }, [x27], x28 +# CHECK-NEXT: [0,3] .D=eE------R . add x0, x27, #1 +# CHECK-NEXT: [0,4] .D=eeeeeeeeER . ld3 { v1.2s, v2.2s, v3.2s }, [x27], x28 +# CHECK-NEXT: [0,5] .D==eE------R . add x0, x27, #1 +# CHECK-NEXT: [0,6] .D==eeeeeeeeER. ld3 { v1.4h, v2.4h, v3.4h }, [x27], x28 +# CHECK-NEXT: [0,7] .D===eE------R. add x0, x27, #1 +# CHECK-NEXT: [0,8] . D==eeeeeeeeER ld3 { v1.4s, v2.4s, v3.4s }, [x27], x28 +# CHECK-NEXT: [0,9] . D===eE------R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -2481,43 +2481,43 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld3 { v1.16b, v2.16b, v3.16b }, [x27], #48 -# CHECK-NEXT: 1. 1 9.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 9.0 0.0 0.0 ld3 { v1.2d, v2.2d, v3.2d }, [x27], x28 -# CHECK-NEXT: 3. 1 16.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 16.0 0.0 0.0 ld3 { v1.2s, v2.2s, v3.2s }, [x27], x28 -# CHECK-NEXT: 5. 1 24.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 24.0 0.0 0.0 ld3 { v1.4h, v2.4h, v3.4h }, [x27], x28 -# CHECK-NEXT: 7. 1 32.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 31.0 0.0 0.0 ld3 { v1.4s, v2.4s, v3.4s }, [x27], x28 -# CHECK-NEXT: 9. 1 39.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 20.1 0.1 0.0 +# CHECK-NEXT: 1. 1 2.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld3 { v1.2d, v2.2d, v3.2d }, [x27], x28 +# CHECK-NEXT: 3. 1 2.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 2.0 0.0 0.0 ld3 { v1.2s, v2.2s, v3.2s }, [x27], x28 +# CHECK-NEXT: 5. 1 3.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ld3 { v1.4h, v2.4h, v3.4h }, [x27], x28 +# CHECK-NEXT: 7. 1 4.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 3.0 0.0 0.0 ld3 { v1.4s, v2.4s, v3.4s }, [x27], x28 +# CHECK-NEXT: 9. 1 4.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 1 2.6 0.1 3.0 # CHECK: [29] Code Region - G30 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 4004 +# CHECK-NEXT: Total Cycles: 1910 # CHECK-NEXT: Total uOps: 3700 # CHECK: Dispatch Width: 15 -# CHECK-NEXT: uOps Per Cycle: 0.92 -# CHECK-NEXT: IPC: 0.25 +# CHECK-NEXT: uOps Per Cycle: 1.94 +# CHECK-NEXT: IPC: 0.52 # CHECK-NEXT: Block RThroughput: 4.0 # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 0123456789 -# CHECK-NEXT: Index 0123456789 0123456789 0123 - -# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld3 { v1.8b, v2.8b, v3.8b }, [x27], x28 -# CHECK-NEXT: [0,1] D========eER . . . . . . . add x0, x27, #1 -# CHECK-NEXT: [0,2] D========eeeeeeeeER . . . . . . ld3 { v1.8h, v2.8h, v3.8h }, [x27], x28 -# CHECK-NEXT: [0,3] D================eER. . . . . . add x0, x27, #1 -# CHECK-NEXT: [0,4] .D===============eeeeeeeeER . . . . ld3 { v1.16b, v2.16b, v3.16b }, [x27], x28 -# CHECK-NEXT: [0,5] .D=======================eER . . . . add x0, x27, #1 -# CHECK-NEXT: [0,6] .D=======================eeeeeeeeER. . . ld3 { v1.b, v2.b, v3.b }[0], [x27], #3 -# CHECK-NEXT: [0,7] .D===============================eER . . add x0, x27, #1 -# CHECK-NEXT: [0,8] . D==============================eeeeeeeeER. ld3 { v1.b, v2.b, v3.b }[8], [x27], #3 -# CHECK-NEXT: [0,9] . D======================================eER add x0, x27, #1 +# CHECK-NEXT: 0123456789 +# CHECK-NEXT: Index 0123456789 012345678 + +# CHECK: [0,0] DeeeeeeeeER . . . . ld3 { v1.8b, v2.8b, v3.8b }, [x27], x28 +# CHECK-NEXT: [0,1] D=eE------R . . . . add x0, x27, #1 +# CHECK-NEXT: [0,2] D=eeeeeeeeER . . . . ld3 { v1.8h, v2.8h, v3.8h }, [x27], x28 +# CHECK-NEXT: [0,3] D==eE------R . . . . add x0, x27, #1 +# CHECK-NEXT: [0,4] .D=eeeeeeeeER . . . . ld3 { v1.16b, v2.16b, v3.16b }, [x27], x28 +# CHECK-NEXT: [0,5] .D==eE------R . . . . add x0, x27, #1 +# CHECK-NEXT: [0,6] .D=========eeeeeeeeER . . ld3 { v1.b, v2.b, v3.b }[0], [x27], #3 +# CHECK-NEXT: [0,7] .D==========eE------R . . add x0, x27, #1 +# CHECK-NEXT: [0,8] . D================eeeeeeeeER ld3 { v1.b, v2.b, v3.b }[8], [x27], #3 +# CHECK-NEXT: [0,9] . D=================eE------R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -2527,22 +2527,22 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld3 { v1.8b, v2.8b, v3.8b }, [x27], x28 -# CHECK-NEXT: 1. 1 9.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 9.0 0.0 0.0 ld3 { v1.8h, v2.8h, v3.8h }, [x27], x28 -# CHECK-NEXT: 3. 1 17.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 16.0 0.0 0.0 ld3 { v1.16b, v2.16b, v3.16b }, [x27], x28 -# CHECK-NEXT: 5. 1 24.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 24.0 0.0 0.0 ld3 { v1.b, v2.b, v3.b }[0], [x27], #3 -# CHECK-NEXT: 7. 1 32.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 31.0 0.0 0.0 ld3 { v1.b, v2.b, v3.b }[8], [x27], #3 -# CHECK-NEXT: 9. 1 39.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 20.2 0.1 0.0 +# CHECK-NEXT: 1. 1 2.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld3 { v1.8h, v2.8h, v3.8h }, [x27], x28 +# CHECK-NEXT: 3. 1 3.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 2.0 0.0 0.0 ld3 { v1.16b, v2.16b, v3.16b }, [x27], x28 +# CHECK-NEXT: 5. 1 3.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 10.0 0.0 0.0 ld3 { v1.b, v2.b, v3.b }[0], [x27], #3 +# CHECK-NEXT: 7. 1 11.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 17.0 0.0 0.0 ld3 { v1.b, v2.b, v3.b }[8], [x27], #3 +# CHECK-NEXT: 9. 1 18.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 1 6.9 0.1 3.0 # CHECK: [30] Code Region - G31 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 4004 +# CHECK-NEXT: Total Cycles: 4003 # CHECK-NEXT: Total uOps: 3500 # CHECK: Dispatch Width: 15 @@ -2552,18 +2552,18 @@ add x0, x27, 1 # CHECK: Timeline view: # CHECK-NEXT: 0123456789 0123456789 -# CHECK-NEXT: Index 0123456789 0123456789 0123 - -# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld3 { v1.b, v2.b, v3.b }[0], [x27], x28 -# CHECK-NEXT: [0,1] D========eER . . . . . . . add x0, x27, #1 -# CHECK-NEXT: [0,2] D========eeeeeeeeER . . . . . . ld3 { v1.b, v2.b, v3.b }[8], [x27], x28 -# CHECK-NEXT: [0,3] D================eER. . . . . . add x0, x27, #1 -# CHECK-NEXT: [0,4] .D===============eeeeeeeeER . . . . ld3 { v1.h, v2.h, v3.h }[0], [x27], #6 -# CHECK-NEXT: [0,5] .D=======================eER . . . . add x0, x27, #1 -# CHECK-NEXT: [0,6] .D=======================eeeeeeeeER. . . ld3 { v1.h, v2.h, v3.h }[4], [x27], #6 -# CHECK-NEXT: [0,7] .D===============================eER . . add x0, x27, #1 -# CHECK-NEXT: [0,8] . D==============================eeeeeeeeER. ld3 { v1.h, v2.h, v3.h }[0], [x27], x28 -# CHECK-NEXT: [0,9] . D======================================eER add x0, x27, #1 +# CHECK-NEXT: Index 0123456789 0123456789 012 + +# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld3 { v1.b, v2.b, v3.b }[0], [x27], x28 +# CHECK-NEXT: [0,1] D=eE------R . . . . . . . add x0, x27, #1 +# CHECK-NEXT: [0,2] D========eeeeeeeeER . . . . . . ld3 { v1.b, v2.b, v3.b }[8], [x27], x28 +# CHECK-NEXT: [0,3] D=========eE------R . . . . . . add x0, x27, #1 +# CHECK-NEXT: [0,4] .D===============eeeeeeeeER . . . . ld3 { v1.h, v2.h, v3.h }[0], [x27], #6 +# CHECK-NEXT: [0,5] .D================eE------R . . . . add x0, x27, #1 +# CHECK-NEXT: [0,6] .D=======================eeeeeeeeER. . . ld3 { v1.h, v2.h, v3.h }[4], [x27], #6 +# CHECK-NEXT: [0,7] .D========================eE------R. . . add x0, x27, #1 +# CHECK-NEXT: [0,8] . D==============================eeeeeeeeER ld3 { v1.h, v2.h, v3.h }[0], [x27], x28 +# CHECK-NEXT: [0,9] . D===============================eE------R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -2573,22 +2573,22 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld3 { v1.b, v2.b, v3.b }[0], [x27], x28 -# CHECK-NEXT: 1. 1 9.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 1. 1 2.0 0.0 6.0 add x0, x27, #1 # CHECK-NEXT: 2. 1 9.0 0.0 0.0 ld3 { v1.b, v2.b, v3.b }[8], [x27], x28 -# CHECK-NEXT: 3. 1 17.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 3. 1 10.0 0.0 6.0 add x0, x27, #1 # CHECK-NEXT: 4. 1 16.0 0.0 0.0 ld3 { v1.h, v2.h, v3.h }[0], [x27], #6 -# CHECK-NEXT: 5. 1 24.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 5. 1 17.0 0.0 6.0 add x0, x27, #1 # CHECK-NEXT: 6. 1 24.0 0.0 0.0 ld3 { v1.h, v2.h, v3.h }[4], [x27], #6 -# CHECK-NEXT: 7. 1 32.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 7. 1 25.0 0.0 6.0 add x0, x27, #1 # CHECK-NEXT: 8. 1 31.0 0.0 0.0 ld3 { v1.h, v2.h, v3.h }[0], [x27], x28 -# CHECK-NEXT: 9. 1 39.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 20.2 0.1 0.0 +# CHECK-NEXT: 9. 1 32.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 1 16.7 0.1 3.0 # CHECK: [31] Code Region - G32 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 4004 +# CHECK-NEXT: Total Cycles: 4003 # CHECK-NEXT: Total uOps: 3500 # CHECK: Dispatch Width: 15 @@ -2598,18 +2598,18 @@ add x0, x27, 1 # CHECK: Timeline view: # CHECK-NEXT: 0123456789 0123456789 -# CHECK-NEXT: Index 0123456789 0123456789 0123 - -# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld3 { v1.h, v2.h, v3.h }[4], [x27], x28 -# CHECK-NEXT: [0,1] D========eER . . . . . . . add x0, x27, #1 -# CHECK-NEXT: [0,2] D========eeeeeeeeER . . . . . . ld3 { v1.s, v2.s, v3.s }[0], [x27], #12 -# CHECK-NEXT: [0,3] D================eER. . . . . . add x0, x27, #1 -# CHECK-NEXT: [0,4] .D===============eeeeeeeeER . . . . ld3 { v1.s, v2.s, v3.s }[0], [x27], x28 -# CHECK-NEXT: [0,5] .D=======================eER . . . . add x0, x27, #1 -# CHECK-NEXT: [0,6] .D=======================eeeeeeeeER. . . ld3 { v1.d, v2.d, v3.d }[0], [x27], #24 -# CHECK-NEXT: [0,7] .D===============================eER . . add x0, x27, #1 -# CHECK-NEXT: [0,8] . D==============================eeeeeeeeER. ld3 { v1.d, v2.d, v3.d }[0], [x27], x28 -# CHECK-NEXT: [0,9] . D======================================eER add x0, x27, #1 +# CHECK-NEXT: Index 0123456789 0123456789 012 + +# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld3 { v1.h, v2.h, v3.h }[4], [x27], x28 +# CHECK-NEXT: [0,1] D=eE------R . . . . . . . add x0, x27, #1 +# CHECK-NEXT: [0,2] D========eeeeeeeeER . . . . . . ld3 { v1.s, v2.s, v3.s }[0], [x27], #12 +# CHECK-NEXT: [0,3] D=========eE------R . . . . . . add x0, x27, #1 +# CHECK-NEXT: [0,4] .D===============eeeeeeeeER . . . . ld3 { v1.s, v2.s, v3.s }[0], [x27], x28 +# CHECK-NEXT: [0,5] .D================eE------R . . . . add x0, x27, #1 +# CHECK-NEXT: [0,6] .D=======================eeeeeeeeER. . . ld3 { v1.d, v2.d, v3.d }[0], [x27], #24 +# CHECK-NEXT: [0,7] .D========================eE------R. . . add x0, x27, #1 +# CHECK-NEXT: [0,8] . D==============================eeeeeeeeER ld3 { v1.d, v2.d, v3.d }[0], [x27], x28 +# CHECK-NEXT: [0,9] . D===============================eE------R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -2619,43 +2619,43 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld3 { v1.h, v2.h, v3.h }[4], [x27], x28 -# CHECK-NEXT: 1. 1 9.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 1. 1 2.0 0.0 6.0 add x0, x27, #1 # CHECK-NEXT: 2. 1 9.0 0.0 0.0 ld3 { v1.s, v2.s, v3.s }[0], [x27], #12 -# CHECK-NEXT: 3. 1 17.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 3. 1 10.0 0.0 6.0 add x0, x27, #1 # CHECK-NEXT: 4. 1 16.0 0.0 0.0 ld3 { v1.s, v2.s, v3.s }[0], [x27], x28 -# CHECK-NEXT: 5. 1 24.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 5. 1 17.0 0.0 6.0 add x0, x27, #1 # CHECK-NEXT: 6. 1 24.0 0.0 0.0 ld3 { v1.d, v2.d, v3.d }[0], [x27], #24 -# CHECK-NEXT: 7. 1 32.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 7. 1 25.0 0.0 6.0 add x0, x27, #1 # CHECK-NEXT: 8. 1 31.0 0.0 0.0 ld3 { v1.d, v2.d, v3.d }[0], [x27], x28 -# CHECK-NEXT: 9. 1 39.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 20.2 0.1 0.0 +# CHECK-NEXT: 9. 1 32.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 1 16.7 0.1 3.0 # CHECK: [32] Code Region - G33 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 4004 +# CHECK-NEXT: Total Cycles: 510 # CHECK-NEXT: Total uOps: 3500 # CHECK: Dispatch Width: 15 -# CHECK-NEXT: uOps Per Cycle: 0.87 -# CHECK-NEXT: IPC: 0.25 +# CHECK-NEXT: uOps Per Cycle: 6.86 +# CHECK-NEXT: IPC: 1.96 # CHECK-NEXT: Block RThroughput: 3.8 # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 0123456789 -# CHECK-NEXT: Index 0123456789 0123456789 0123 - -# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld3r { v1.1d, v2.1d, v3.1d }, [x27], #24 -# CHECK-NEXT: [0,1] D========eER . . . . . . . add x0, x27, #1 -# CHECK-NEXT: [0,2] D========eeeeeeeeER . . . . . . ld3r { v1.2d, v2.2d, v3.2d }, [x27], #24 -# CHECK-NEXT: [0,3] D================eER. . . . . . add x0, x27, #1 -# CHECK-NEXT: [0,4] .D===============eeeeeeeeER . . . . ld3r { v1.2s, v2.2s, v3.2s }, [x27], #12 -# CHECK-NEXT: [0,5] .D=======================eER . . . . add x0, x27, #1 -# CHECK-NEXT: [0,6] .D=======================eeeeeeeeER. . . ld3r { v1.4h, v2.4h, v3.4h }, [x27], #6 -# CHECK-NEXT: [0,7] .D===============================eER . . add x0, x27, #1 -# CHECK-NEXT: [0,8] . D==============================eeeeeeeeER. ld3r { v1.4s, v2.4s, v3.4s }, [x27], #12 -# CHECK-NEXT: [0,9] . D======================================eER add x0, x27, #1 +# CHECK-NEXT: 01234 +# CHECK-NEXT: Index 0123456789 + +# CHECK: [0,0] DeeeeeeeeER . ld3r { v1.1d, v2.1d, v3.1d }, [x27], #24 +# CHECK-NEXT: [0,1] D=eE------R . add x0, x27, #1 +# CHECK-NEXT: [0,2] D=eeeeeeeeER . ld3r { v1.2d, v2.2d, v3.2d }, [x27], #24 +# CHECK-NEXT: [0,3] D==eE------R . add x0, x27, #1 +# CHECK-NEXT: [0,4] .D=eeeeeeeeER . ld3r { v1.2s, v2.2s, v3.2s }, [x27], #12 +# CHECK-NEXT: [0,5] .D==eE------R . add x0, x27, #1 +# CHECK-NEXT: [0,6] .D==eeeeeeeeER. ld3r { v1.4h, v2.4h, v3.4h }, [x27], #6 +# CHECK-NEXT: [0,7] .D===eE------R. add x0, x27, #1 +# CHECK-NEXT: [0,8] . D==eeeeeeeeER ld3r { v1.4s, v2.4s, v3.4s }, [x27], #12 +# CHECK-NEXT: [0,9] . D===eE------R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -2665,43 +2665,43 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld3r { v1.1d, v2.1d, v3.1d }, [x27], #24 -# CHECK-NEXT: 1. 1 9.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 9.0 0.0 0.0 ld3r { v1.2d, v2.2d, v3.2d }, [x27], #24 -# CHECK-NEXT: 3. 1 17.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 16.0 0.0 0.0 ld3r { v1.2s, v2.2s, v3.2s }, [x27], #12 -# CHECK-NEXT: 5. 1 24.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 24.0 0.0 0.0 ld3r { v1.4h, v2.4h, v3.4h }, [x27], #6 -# CHECK-NEXT: 7. 1 32.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 31.0 0.0 0.0 ld3r { v1.4s, v2.4s, v3.4s }, [x27], #12 -# CHECK-NEXT: 9. 1 39.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 20.2 0.1 0.0 +# CHECK-NEXT: 1. 1 2.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld3r { v1.2d, v2.2d, v3.2d }, [x27], #24 +# CHECK-NEXT: 3. 1 3.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 2.0 0.0 0.0 ld3r { v1.2s, v2.2s, v3.2s }, [x27], #12 +# CHECK-NEXT: 5. 1 3.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ld3r { v1.4h, v2.4h, v3.4h }, [x27], #6 +# CHECK-NEXT: 7. 1 4.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 3.0 0.0 0.0 ld3r { v1.4s, v2.4s, v3.4s }, [x27], #12 +# CHECK-NEXT: 9. 1 4.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 1 2.7 0.1 3.0 # CHECK: [33] Code Region - G34 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 4004 +# CHECK-NEXT: Total Cycles: 510 # CHECK-NEXT: Total uOps: 3500 # CHECK: Dispatch Width: 15 -# CHECK-NEXT: uOps Per Cycle: 0.87 -# CHECK-NEXT: IPC: 0.25 +# CHECK-NEXT: uOps Per Cycle: 6.86 +# CHECK-NEXT: IPC: 1.96 # CHECK-NEXT: Block RThroughput: 3.8 # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 0123456789 -# CHECK-NEXT: Index 0123456789 0123456789 0123 - -# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld3r { v1.8b, v2.8b, v3.8b }, [x27], #3 -# CHECK-NEXT: [0,1] D========eER . . . . . . . add x0, x27, #1 -# CHECK-NEXT: [0,2] D========eeeeeeeeER . . . . . . ld3r { v1.8h, v2.8h, v3.8h }, [x27], #6 -# CHECK-NEXT: [0,3] D================eER. . . . . . add x0, x27, #1 -# CHECK-NEXT: [0,4] .D===============eeeeeeeeER . . . . ld3r { v1.16b, v2.16b, v3.16b }, [x27], #3 -# CHECK-NEXT: [0,5] .D=======================eER . . . . add x0, x27, #1 -# CHECK-NEXT: [0,6] .D=======================eeeeeeeeER. . . ld3r { v1.1d, v2.1d, v3.1d }, [x27], x28 -# CHECK-NEXT: [0,7] .D===============================eER . . add x0, x27, #1 -# CHECK-NEXT: [0,8] . D==============================eeeeeeeeER. ld3r { v1.2d, v2.2d, v3.2d }, [x27], x28 -# CHECK-NEXT: [0,9] . D======================================eER add x0, x27, #1 +# CHECK-NEXT: 01234 +# CHECK-NEXT: Index 0123456789 + +# CHECK: [0,0] DeeeeeeeeER . ld3r { v1.8b, v2.8b, v3.8b }, [x27], #3 +# CHECK-NEXT: [0,1] D=eE------R . add x0, x27, #1 +# CHECK-NEXT: [0,2] D=eeeeeeeeER . ld3r { v1.8h, v2.8h, v3.8h }, [x27], #6 +# CHECK-NEXT: [0,3] D==eE------R . add x0, x27, #1 +# CHECK-NEXT: [0,4] .D=eeeeeeeeER . ld3r { v1.16b, v2.16b, v3.16b }, [x27], #3 +# CHECK-NEXT: [0,5] .D==eE------R . add x0, x27, #1 +# CHECK-NEXT: [0,6] .D==eeeeeeeeER. ld3r { v1.1d, v2.1d, v3.1d }, [x27], x28 +# CHECK-NEXT: [0,7] .D===eE------R. add x0, x27, #1 +# CHECK-NEXT: [0,8] . D==eeeeeeeeER ld3r { v1.2d, v2.2d, v3.2d }, [x27], x28 +# CHECK-NEXT: [0,9] . D===eE------R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -2711,43 +2711,43 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld3r { v1.8b, v2.8b, v3.8b }, [x27], #3 -# CHECK-NEXT: 1. 1 9.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 9.0 0.0 0.0 ld3r { v1.8h, v2.8h, v3.8h }, [x27], #6 -# CHECK-NEXT: 3. 1 17.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 16.0 0.0 0.0 ld3r { v1.16b, v2.16b, v3.16b }, [x27], #3 -# CHECK-NEXT: 5. 1 24.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 24.0 0.0 0.0 ld3r { v1.1d, v2.1d, v3.1d }, [x27], x28 -# CHECK-NEXT: 7. 1 32.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 31.0 0.0 0.0 ld3r { v1.2d, v2.2d, v3.2d }, [x27], x28 -# CHECK-NEXT: 9. 1 39.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 20.2 0.1 0.0 +# CHECK-NEXT: 1. 1 2.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld3r { v1.8h, v2.8h, v3.8h }, [x27], #6 +# CHECK-NEXT: 3. 1 3.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 2.0 0.0 0.0 ld3r { v1.16b, v2.16b, v3.16b }, [x27], #3 +# CHECK-NEXT: 5. 1 3.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ld3r { v1.1d, v2.1d, v3.1d }, [x27], x28 +# CHECK-NEXT: 7. 1 4.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 3.0 0.0 0.0 ld3r { v1.2d, v2.2d, v3.2d }, [x27], x28 +# CHECK-NEXT: 9. 1 4.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 1 2.7 0.1 3.0 # CHECK: [34] Code Region - G35 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 4004 +# CHECK-NEXT: Total Cycles: 510 # CHECK-NEXT: Total uOps: 3500 # CHECK: Dispatch Width: 15 -# CHECK-NEXT: uOps Per Cycle: 0.87 -# CHECK-NEXT: IPC: 0.25 +# CHECK-NEXT: uOps Per Cycle: 6.86 +# CHECK-NEXT: IPC: 1.96 # CHECK-NEXT: Block RThroughput: 3.8 # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 0123456789 -# CHECK-NEXT: Index 0123456789 0123456789 0123 - -# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld3r { v1.2s, v2.2s, v3.2s }, [x27], x28 -# CHECK-NEXT: [0,1] D========eER . . . . . . . add x0, x27, #1 -# CHECK-NEXT: [0,2] D========eeeeeeeeER . . . . . . ld3r { v1.4h, v2.4h, v3.4h }, [x27], x28 -# CHECK-NEXT: [0,3] D================eER. . . . . . add x0, x27, #1 -# CHECK-NEXT: [0,4] .D===============eeeeeeeeER . . . . ld3r { v1.4s, v2.4s, v3.4s }, [x27], x28 -# CHECK-NEXT: [0,5] .D=======================eER . . . . add x0, x27, #1 -# CHECK-NEXT: [0,6] .D=======================eeeeeeeeER. . . ld3r { v1.8b, v2.8b, v3.8b }, [x27], x28 -# CHECK-NEXT: [0,7] .D===============================eER . . add x0, x27, #1 -# CHECK-NEXT: [0,8] . D==============================eeeeeeeeER. ld3r { v1.8h, v2.8h, v3.8h }, [x27], x28 -# CHECK-NEXT: [0,9] . D======================================eER add x0, x27, #1 +# CHECK-NEXT: 01234 +# CHECK-NEXT: Index 0123456789 + +# CHECK: [0,0] DeeeeeeeeER . ld3r { v1.2s, v2.2s, v3.2s }, [x27], x28 +# CHECK-NEXT: [0,1] D=eE------R . add x0, x27, #1 +# CHECK-NEXT: [0,2] D=eeeeeeeeER . ld3r { v1.4h, v2.4h, v3.4h }, [x27], x28 +# CHECK-NEXT: [0,3] D==eE------R . add x0, x27, #1 +# CHECK-NEXT: [0,4] .D=eeeeeeeeER . ld3r { v1.4s, v2.4s, v3.4s }, [x27], x28 +# CHECK-NEXT: [0,5] .D==eE------R . add x0, x27, #1 +# CHECK-NEXT: [0,6] .D==eeeeeeeeER. ld3r { v1.8b, v2.8b, v3.8b }, [x27], x28 +# CHECK-NEXT: [0,7] .D===eE------R. add x0, x27, #1 +# CHECK-NEXT: [0,8] . D==eeeeeeeeER ld3r { v1.8h, v2.8h, v3.8h }, [x27], x28 +# CHECK-NEXT: [0,9] . D===eE------R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -2757,43 +2757,43 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld3r { v1.2s, v2.2s, v3.2s }, [x27], x28 -# CHECK-NEXT: 1. 1 9.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 9.0 0.0 0.0 ld3r { v1.4h, v2.4h, v3.4h }, [x27], x28 -# CHECK-NEXT: 3. 1 17.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 16.0 0.0 0.0 ld3r { v1.4s, v2.4s, v3.4s }, [x27], x28 -# CHECK-NEXT: 5. 1 24.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 24.0 0.0 0.0 ld3r { v1.8b, v2.8b, v3.8b }, [x27], x28 -# CHECK-NEXT: 7. 1 32.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 31.0 0.0 0.0 ld3r { v1.8h, v2.8h, v3.8h }, [x27], x28 -# CHECK-NEXT: 9. 1 39.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 20.2 0.1 0.0 +# CHECK-NEXT: 1. 1 2.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld3r { v1.4h, v2.4h, v3.4h }, [x27], x28 +# CHECK-NEXT: 3. 1 3.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 2.0 0.0 0.0 ld3r { v1.4s, v2.4s, v3.4s }, [x27], x28 +# CHECK-NEXT: 5. 1 3.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ld3r { v1.8b, v2.8b, v3.8b }, [x27], x28 +# CHECK-NEXT: 7. 1 4.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 3.0 0.0 0.0 ld3r { v1.8h, v2.8h, v3.8h }, [x27], x28 +# CHECK-NEXT: 9. 1 4.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 1 2.7 0.1 3.0 # CHECK: [35] Code Region - G36 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 4204 +# CHECK-NEXT: Total Cycles: 611 # CHECK-NEXT: Total uOps: 4500 # CHECK: Dispatch Width: 15 -# CHECK-NEXT: uOps Per Cycle: 1.07 -# CHECK-NEXT: IPC: 0.24 +# CHECK-NEXT: uOps Per Cycle: 7.36 +# CHECK-NEXT: IPC: 1.64 # CHECK-NEXT: Block RThroughput: 5.3 # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 0123456789 -# CHECK-NEXT: Index 0123456789 0123456789 012345 - -# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld3r { v1.16b, v2.16b, v3.16b }, [x27], x28 -# CHECK-NEXT: [0,1] D========eER . . . . . . . add x0, x27, #1 -# CHECK-NEXT: [0,2] .D=======eeeeeeeeeER. . . . . . ld4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64 -# CHECK-NEXT: [0,3] .D================eER . . . . . add x0, x27, #1 -# CHECK-NEXT: [0,4] . D===============eeeeeeeeER . . . . ld4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32 -# CHECK-NEXT: [0,5] . D=======================eER . . . . add x0, x27, #1 -# CHECK-NEXT: [0,6] . D======================eeeeeeeeER . . ld4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32 -# CHECK-NEXT: [0,7] . D==============================eER . . add x0, x27, #1 -# CHECK-NEXT: [0,8] . D=============================eeeeeeeeeER. ld4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64 -# CHECK-NEXT: [0,9] . D======================================eER add x0, x27, #1 +# CHECK-NEXT: 0123456 +# CHECK-NEXT: Index 0123456789 + +# CHECK: [0,0] DeeeeeeeeER .. ld3r { v1.16b, v2.16b, v3.16b }, [x27], x28 +# CHECK-NEXT: [0,1] D=eE------R .. add x0, x27, #1 +# CHECK-NEXT: [0,2] .DeeeeeeeeeER .. ld4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64 +# CHECK-NEXT: [0,3] .D=eE-------R .. add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeeeeeeeER .. ld4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32 +# CHECK-NEXT: [0,5] . D=eE------R .. add x0, x27, #1 +# CHECK-NEXT: [0,6] . DeeeeeeeeER .. ld4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32 +# CHECK-NEXT: [0,7] . D=eE------R .. add x0, x27, #1 +# CHECK-NEXT: [0,8] . D=eeeeeeeeeER ld4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64 +# CHECK-NEXT: [0,9] . D==eE-------R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -2803,43 +2803,43 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld3r { v1.16b, v2.16b, v3.16b }, [x27], x28 -# CHECK-NEXT: 1. 1 9.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 8.0 0.0 0.0 ld4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64 -# CHECK-NEXT: 3. 1 17.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 16.0 0.0 0.0 ld4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32 -# CHECK-NEXT: 5. 1 24.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 23.0 0.0 0.0 ld4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32 -# CHECK-NEXT: 7. 1 31.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 30.0 0.0 0.0 ld4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64 -# CHECK-NEXT: 9. 1 39.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 19.8 0.1 0.0 +# CHECK-NEXT: 1. 1 2.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 1.0 0.0 0.0 ld4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64 +# CHECK-NEXT: 3. 1 2.0 0.0 7.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 0.0 0.0 ld4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32 +# CHECK-NEXT: 5. 1 2.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 0.0 0.0 ld4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32 +# CHECK-NEXT: 7. 1 2.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 2.0 1.0 0.0 ld4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64 +# CHECK-NEXT: 9. 1 3.0 0.0 7.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.7 0.2 3.2 # CHECK: [36] Code Region - G37 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 4304 +# CHECK-NEXT: Total Cycles: 610 # CHECK-NEXT: Total uOps: 4800 # CHECK: Dispatch Width: 15 -# CHECK-NEXT: uOps Per Cycle: 1.12 -# CHECK-NEXT: IPC: 0.23 +# CHECK-NEXT: uOps Per Cycle: 7.87 +# CHECK-NEXT: IPC: 1.64 # CHECK-NEXT: Block RThroughput: 6.0 # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 0123456789 -# CHECK-NEXT: Index 0123456789 0123456789 0123456 - -# CHECK: [0,0] DeeeeeeeeER . . . . . . .. ld4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32 -# CHECK-NEXT: [0,1] D========eER . . . . . . .. add x0, x27, #1 -# CHECK-NEXT: [0,2] .D=======eeeeeeeeeER. . . . . .. ld4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64 -# CHECK-NEXT: [0,3] .D================eER . . . . .. add x0, x27, #1 -# CHECK-NEXT: [0,4] . D===============eeeeeeeeeER . . . .. ld4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64 -# CHECK-NEXT: [0,5] . D========================eER. . . .. add x0, x27, #1 -# CHECK-NEXT: [0,6] . D=======================eeeeeeeeeER . .. ld4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28 -# CHECK-NEXT: [0,7] . D================================eER . .. add x0, x27, #1 -# CHECK-NEXT: [0,8] . D===============================eeeeeeeeER. ld4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28 -# CHECK-NEXT: [0,9] . D=======================================eER add x0, x27, #1 +# CHECK-NEXT: 012345 +# CHECK-NEXT: Index 0123456789 + +# CHECK: [0,0] DeeeeeeeeER . ld4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32 +# CHECK-NEXT: [0,1] D=eE------R . add x0, x27, #1 +# CHECK-NEXT: [0,2] .DeeeeeeeeeER . ld4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64 +# CHECK-NEXT: [0,3] .D=eE-------R . add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeeeeeeeeER . ld4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64 +# CHECK-NEXT: [0,5] . D=eE-------R . add x0, x27, #1 +# CHECK-NEXT: [0,6] . DeeeeeeeeeER. ld4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28 +# CHECK-NEXT: [0,7] . D=eE-------R. add x0, x27, #1 +# CHECK-NEXT: [0,8] . D=eeeeeeeeER ld4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28 +# CHECK-NEXT: [0,9] . D==eE------R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -2849,43 +2849,43 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32 -# CHECK-NEXT: 1. 1 9.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 8.0 0.0 0.0 ld4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64 -# CHECK-NEXT: 3. 1 17.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 16.0 0.0 0.0 ld4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64 -# CHECK-NEXT: 5. 1 25.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 24.0 0.0 0.0 ld4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28 -# CHECK-NEXT: 7. 1 33.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 32.0 0.0 0.0 ld4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28 -# CHECK-NEXT: 9. 1 40.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 20.5 0.1 0.0 +# CHECK-NEXT: 1. 1 2.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 1.0 0.0 0.0 ld4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64 +# CHECK-NEXT: 3. 1 2.0 0.0 7.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 0.0 0.0 ld4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64 +# CHECK-NEXT: 5. 1 2.0 0.0 7.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 0.0 0.0 ld4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28 +# CHECK-NEXT: 7. 1 2.0 0.0 7.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 2.0 1.0 0.0 ld4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28 +# CHECK-NEXT: 9. 1 3.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.7 0.2 3.3 # CHECK: [37] Code Region - G38 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 4304 +# CHECK-NEXT: Total Cycles: 660 # CHECK-NEXT: Total uOps: 4800 # CHECK: Dispatch Width: 15 -# CHECK-NEXT: uOps Per Cycle: 1.12 -# CHECK-NEXT: IPC: 0.23 +# CHECK-NEXT: uOps Per Cycle: 7.27 +# CHECK-NEXT: IPC: 1.52 # CHECK-NEXT: Block RThroughput: 6.0 # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 0123456789 -# CHECK-NEXT: Index 0123456789 0123456789 0123456 - -# CHECK: [0,0] DeeeeeeeeER . . . . . . .. ld4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28 -# CHECK-NEXT: [0,1] D========eER . . . . . . .. add x0, x27, #1 -# CHECK-NEXT: [0,2] .D=======eeeeeeeeeER. . . . . .. ld4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28 -# CHECK-NEXT: [0,3] .D================eER . . . . .. add x0, x27, #1 -# CHECK-NEXT: [0,4] . D===============eeeeeeeeER . . . .. ld4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28 -# CHECK-NEXT: [0,5] . D=======================eER . . . .. add x0, x27, #1 -# CHECK-NEXT: [0,6] . D======================eeeeeeeeeER . .. ld4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28 -# CHECK-NEXT: [0,7] . D===============================eER . .. add x0, x27, #1 -# CHECK-NEXT: [0,8] . D==============================eeeeeeeeeER. ld4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28 -# CHECK-NEXT: [0,9] . D=======================================eER add x0, x27, #1 +# CHECK-NEXT: 0123456 +# CHECK-NEXT: Index 0123456789 + +# CHECK: [0,0] DeeeeeeeeER .. ld4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28 +# CHECK-NEXT: [0,1] D=eE------R .. add x0, x27, #1 +# CHECK-NEXT: [0,2] .DeeeeeeeeeER .. ld4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28 +# CHECK-NEXT: [0,3] .D=eE-------R .. add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeeeeeeeER .. ld4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28 +# CHECK-NEXT: [0,5] . D=eE------R .. add x0, x27, #1 +# CHECK-NEXT: [0,6] . DeeeeeeeeeER.. ld4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28 +# CHECK-NEXT: [0,7] . D=eE-------R.. add x0, x27, #1 +# CHECK-NEXT: [0,8] . D=eeeeeeeeeER ld4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28 +# CHECK-NEXT: [0,9] . D==eE-------R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -2895,22 +2895,22 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28 -# CHECK-NEXT: 1. 1 9.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 8.0 0.0 0.0 ld4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28 -# CHECK-NEXT: 3. 1 17.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 16.0 0.0 0.0 ld4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28 -# CHECK-NEXT: 5. 1 24.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 23.0 0.0 0.0 ld4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28 -# CHECK-NEXT: 7. 1 32.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 31.0 0.0 0.0 ld4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28 -# CHECK-NEXT: 9. 1 40.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 20.1 0.1 0.0 +# CHECK-NEXT: 1. 1 2.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 1.0 0.0 0.0 ld4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28 +# CHECK-NEXT: 3. 1 2.0 0.0 7.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 0.0 0.0 ld4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28 +# CHECK-NEXT: 5. 1 2.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 0.0 0.0 ld4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28 +# CHECK-NEXT: 7. 1 2.0 0.0 7.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 2.0 1.0 0.0 ld4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28 +# CHECK-NEXT: 9. 1 3.0 0.0 7.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.7 0.2 3.3 # CHECK: [38] Code Region - G39 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 4004 +# CHECK-NEXT: Total Cycles: 4003 # CHECK-NEXT: Total uOps: 4500 # CHECK: Dispatch Width: 15 @@ -2920,18 +2920,18 @@ add x0, x27, 1 # CHECK: Timeline view: # CHECK-NEXT: 0123456789 0123456789 -# CHECK-NEXT: Index 0123456789 0123456789 0123 - -# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], #4 -# CHECK-NEXT: [0,1] D========eER . . . . . . . add x0, x27, #1 -# CHECK-NEXT: [0,2] .D=======eeeeeeeeER . . . . . . ld4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], #4 -# CHECK-NEXT: [0,3] .D===============eER. . . . . . add x0, x27, #1 -# CHECK-NEXT: [0,4] . D==============eeeeeeeeER . . . . ld4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], x28 -# CHECK-NEXT: [0,5] . D======================eER . . . . add x0, x27, #1 -# CHECK-NEXT: [0,6] . D=====================eeeeeeeeER. . . ld4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], x28 -# CHECK-NEXT: [0,7] . D=============================eER . . add x0, x27, #1 -# CHECK-NEXT: [0,8] . D============================eeeeeeeeER. ld4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], #8 -# CHECK-NEXT: [0,9] . D====================================eER add x0, x27, #1 +# CHECK-NEXT: Index 0123456789 0123456789 012 + +# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], #4 +# CHECK-NEXT: [0,1] D=eE------R . . . . . . . add x0, x27, #1 +# CHECK-NEXT: [0,2] .D=======eeeeeeeeER . . . . . . ld4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], #4 +# CHECK-NEXT: [0,3] .D========eE------R . . . . . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . D==============eeeeeeeeER . . . . ld4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], x28 +# CHECK-NEXT: [0,5] . D===============eE------R . . . . add x0, x27, #1 +# CHECK-NEXT: [0,6] . D=====================eeeeeeeeER. . . ld4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], x28 +# CHECK-NEXT: [0,7] . D======================eE------R. . . add x0, x27, #1 +# CHECK-NEXT: [0,8] . D============================eeeeeeeeER ld4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], #8 +# CHECK-NEXT: [0,9] . D=============================eE------R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -2941,22 +2941,22 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], #4 -# CHECK-NEXT: 1. 1 9.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 1. 1 2.0 0.0 6.0 add x0, x27, #1 # CHECK-NEXT: 2. 1 8.0 0.0 0.0 ld4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], #4 -# CHECK-NEXT: 3. 1 16.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 3. 1 9.0 0.0 6.0 add x0, x27, #1 # CHECK-NEXT: 4. 1 15.0 0.0 0.0 ld4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], x28 -# CHECK-NEXT: 5. 1 23.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 5. 1 16.0 0.0 6.0 add x0, x27, #1 # CHECK-NEXT: 6. 1 22.0 0.0 0.0 ld4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], x28 -# CHECK-NEXT: 7. 1 30.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 7. 1 23.0 0.0 6.0 add x0, x27, #1 # CHECK-NEXT: 8. 1 29.0 0.0 0.0 ld4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], #8 -# CHECK-NEXT: 9. 1 37.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 19.0 0.1 0.0 +# CHECK-NEXT: 9. 1 30.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 1 15.5 0.1 3.0 # CHECK: [39] Code Region - G40 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 4004 +# CHECK-NEXT: Total Cycles: 4003 # CHECK-NEXT: Total uOps: 4500 # CHECK: Dispatch Width: 15 @@ -2966,18 +2966,18 @@ add x0, x27, 1 # CHECK: Timeline view: # CHECK-NEXT: 0123456789 0123456789 -# CHECK-NEXT: Index 0123456789 0123456789 0123 - -# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], #8 -# CHECK-NEXT: [0,1] D========eER . . . . . . . add x0, x27, #1 -# CHECK-NEXT: [0,2] .D=======eeeeeeeeER . . . . . . ld4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], x28 -# CHECK-NEXT: [0,3] .D===============eER. . . . . . add x0, x27, #1 -# CHECK-NEXT: [0,4] . D==============eeeeeeeeER . . . . ld4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], x28 -# CHECK-NEXT: [0,5] . D======================eER . . . . add x0, x27, #1 -# CHECK-NEXT: [0,6] . D=====================eeeeeeeeER. . . ld4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], #16 -# CHECK-NEXT: [0,7] . D=============================eER . . add x0, x27, #1 -# CHECK-NEXT: [0,8] . D============================eeeeeeeeER. ld4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], x28 -# CHECK-NEXT: [0,9] . D====================================eER add x0, x27, #1 +# CHECK-NEXT: Index 0123456789 0123456789 012 + +# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], #8 +# CHECK-NEXT: [0,1] D=eE------R . . . . . . . add x0, x27, #1 +# CHECK-NEXT: [0,2] .D=======eeeeeeeeER . . . . . . ld4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], x28 +# CHECK-NEXT: [0,3] .D========eE------R . . . . . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . D==============eeeeeeeeER . . . . ld4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], x28 +# CHECK-NEXT: [0,5] . D===============eE------R . . . . add x0, x27, #1 +# CHECK-NEXT: [0,6] . D=====================eeeeeeeeER. . . ld4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], #16 +# CHECK-NEXT: [0,7] . D======================eE------R. . . add x0, x27, #1 +# CHECK-NEXT: [0,8] . D============================eeeeeeeeER ld4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], x28 +# CHECK-NEXT: [0,9] . D=============================eE------R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -2987,43 +2987,43 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], #8 -# CHECK-NEXT: 1. 1 9.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 1. 1 2.0 0.0 6.0 add x0, x27, #1 # CHECK-NEXT: 2. 1 8.0 0.0 0.0 ld4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], x28 -# CHECK-NEXT: 3. 1 16.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 3. 1 9.0 0.0 6.0 add x0, x27, #1 # CHECK-NEXT: 4. 1 15.0 0.0 0.0 ld4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], x28 -# CHECK-NEXT: 5. 1 23.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 5. 1 16.0 0.0 6.0 add x0, x27, #1 # CHECK-NEXT: 6. 1 22.0 0.0 0.0 ld4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], #16 -# CHECK-NEXT: 7. 1 30.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 7. 1 23.0 0.0 6.0 add x0, x27, #1 # CHECK-NEXT: 8. 1 29.0 0.0 0.0 ld4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], x28 -# CHECK-NEXT: 9. 1 37.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 19.0 0.1 0.0 +# CHECK-NEXT: 9. 1 30.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 1 15.5 0.1 3.0 # CHECK: [40] Code Region - G41 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 4004 +# CHECK-NEXT: Total Cycles: 1903 # CHECK-NEXT: Total uOps: 4500 # CHECK: Dispatch Width: 15 -# CHECK-NEXT: uOps Per Cycle: 1.12 -# CHECK-NEXT: IPC: 0.25 +# CHECK-NEXT: uOps Per Cycle: 2.36 +# CHECK-NEXT: IPC: 0.53 # CHECK-NEXT: Block RThroughput: 5.0 # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 0123456789 -# CHECK-NEXT: Index 0123456789 0123456789 0123 - -# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], #32 -# CHECK-NEXT: [0,1] D========eER . . . . . . . add x0, x27, #1 -# CHECK-NEXT: [0,2] .D=======eeeeeeeeER . . . . . . ld4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], x28 -# CHECK-NEXT: [0,3] .D===============eER. . . . . . add x0, x27, #1 -# CHECK-NEXT: [0,4] . D==============eeeeeeeeER . . . . ld4r { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32 -# CHECK-NEXT: [0,5] . D======================eER . . . . add x0, x27, #1 -# CHECK-NEXT: [0,6] . D=====================eeeeeeeeER. . . ld4r { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #32 -# CHECK-NEXT: [0,7] . D=============================eER . . add x0, x27, #1 -# CHECK-NEXT: [0,8] . D============================eeeeeeeeER. ld4r { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #16 -# CHECK-NEXT: [0,9] . D====================================eER add x0, x27, #1 +# CHECK-NEXT: 0123456789 +# CHECK-NEXT: Index 0123456789 01 + +# CHECK: [0,0] DeeeeeeeeER . .. ld4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], #32 +# CHECK-NEXT: [0,1] D=eE------R . .. add x0, x27, #1 +# CHECK-NEXT: [0,2] .D=======eeeeeeeeER .. ld4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], x28 +# CHECK-NEXT: [0,3] .D========eE------R .. add x0, x27, #1 +# CHECK-NEXT: [0,4] . D=======eeeeeeeeER.. ld4r { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32 +# CHECK-NEXT: [0,5] . D========eE------R.. add x0, x27, #1 +# CHECK-NEXT: [0,6] . D=======eeeeeeeeER. ld4r { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #32 +# CHECK-NEXT: [0,7] . D========eE------R. add x0, x27, #1 +# CHECK-NEXT: [0,8] . D=======eeeeeeeeER ld4r { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #16 +# CHECK-NEXT: [0,9] . D========eE------R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -3033,43 +3033,43 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], #32 -# CHECK-NEXT: 1. 1 9.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 1. 1 2.0 0.0 6.0 add x0, x27, #1 # CHECK-NEXT: 2. 1 8.0 0.0 0.0 ld4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], x28 -# CHECK-NEXT: 3. 1 16.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 15.0 0.0 0.0 ld4r { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32 -# CHECK-NEXT: 5. 1 23.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 22.0 0.0 0.0 ld4r { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #32 -# CHECK-NEXT: 7. 1 30.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 29.0 0.0 0.0 ld4r { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #16 -# CHECK-NEXT: 9. 1 37.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 19.0 0.1 0.0 +# CHECK-NEXT: 3. 1 9.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 8.0 0.0 0.0 ld4r { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32 +# CHECK-NEXT: 5. 1 9.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 8.0 0.0 0.0 ld4r { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #32 +# CHECK-NEXT: 7. 1 9.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 8.0 0.0 0.0 ld4r { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #16 +# CHECK-NEXT: 9. 1 9.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 1 7.1 0.1 3.0 # CHECK: [41] Code Region - G42 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 4004 +# CHECK-NEXT: Total Cycles: 510 # CHECK-NEXT: Total uOps: 4500 # CHECK: Dispatch Width: 15 -# CHECK-NEXT: uOps Per Cycle: 1.12 -# CHECK-NEXT: IPC: 0.25 +# CHECK-NEXT: uOps Per Cycle: 8.82 +# CHECK-NEXT: IPC: 1.96 # CHECK-NEXT: Block RThroughput: 5.0 # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 0123456789 -# CHECK-NEXT: Index 0123456789 0123456789 0123 - -# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld4r { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #8 -# CHECK-NEXT: [0,1] D========eER . . . . . . . add x0, x27, #1 -# CHECK-NEXT: [0,2] .D=======eeeeeeeeER . . . . . . ld4r { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #16 -# CHECK-NEXT: [0,3] .D===============eER. . . . . . add x0, x27, #1 -# CHECK-NEXT: [0,4] . D==============eeeeeeeeER . . . . ld4r { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #4 -# CHECK-NEXT: [0,5] . D======================eER . . . . add x0, x27, #1 -# CHECK-NEXT: [0,6] . D=====================eeeeeeeeER. . . ld4r { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #8 -# CHECK-NEXT: [0,7] . D=============================eER . . add x0, x27, #1 -# CHECK-NEXT: [0,8] . D============================eeeeeeeeER. ld4r { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #4 -# CHECK-NEXT: [0,9] . D====================================eER add x0, x27, #1 +# CHECK-NEXT: 01234 +# CHECK-NEXT: Index 0123456789 + +# CHECK: [0,0] DeeeeeeeeER . ld4r { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #8 +# CHECK-NEXT: [0,1] D=eE------R . add x0, x27, #1 +# CHECK-NEXT: [0,2] .DeeeeeeeeER . ld4r { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #16 +# CHECK-NEXT: [0,3] .D=eE------R . add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeeeeeeeER . ld4r { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #4 +# CHECK-NEXT: [0,5] . D=eE------R . add x0, x27, #1 +# CHECK-NEXT: [0,6] . DeeeeeeeeER. ld4r { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #8 +# CHECK-NEXT: [0,7] . D=eE------R. add x0, x27, #1 +# CHECK-NEXT: [0,8] . DeeeeeeeeER ld4r { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #4 +# CHECK-NEXT: [0,9] . D=eE------R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -3079,43 +3079,43 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld4r { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #8 -# CHECK-NEXT: 1. 1 9.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 8.0 0.0 0.0 ld4r { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #16 -# CHECK-NEXT: 3. 1 16.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 15.0 0.0 0.0 ld4r { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #4 -# CHECK-NEXT: 5. 1 23.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 22.0 0.0 0.0 ld4r { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #8 -# CHECK-NEXT: 7. 1 30.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 29.0 0.0 0.0 ld4r { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #4 -# CHECK-NEXT: 9. 1 37.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 19.0 0.1 0.0 +# CHECK-NEXT: 1. 1 2.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 1.0 0.0 0.0 ld4r { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #16 +# CHECK-NEXT: 3. 1 2.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 0.0 0.0 ld4r { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #4 +# CHECK-NEXT: 5. 1 2.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 0.0 0.0 ld4r { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #8 +# CHECK-NEXT: 7. 1 2.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 0.0 0.0 ld4r { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #4 +# CHECK-NEXT: 9. 1 2.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.5 0.1 3.0 # CHECK: [42] Code Region - G43 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 4004 +# CHECK-NEXT: Total Cycles: 510 # CHECK-NEXT: Total uOps: 4500 # CHECK: Dispatch Width: 15 -# CHECK-NEXT: uOps Per Cycle: 1.12 -# CHECK-NEXT: IPC: 0.25 +# CHECK-NEXT: uOps Per Cycle: 8.82 +# CHECK-NEXT: IPC: 1.96 # CHECK-NEXT: Block RThroughput: 5.0 # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 0123456789 -# CHECK-NEXT: Index 0123456789 0123456789 0123 - -# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld4r { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28 -# CHECK-NEXT: [0,1] D========eER . . . . . . . add x0, x27, #1 -# CHECK-NEXT: [0,2] .D=======eeeeeeeeER . . . . . . ld4r { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28 -# CHECK-NEXT: [0,3] .D===============eER. . . . . . add x0, x27, #1 -# CHECK-NEXT: [0,4] . D==============eeeeeeeeER . . . . ld4r { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28 -# CHECK-NEXT: [0,5] . D======================eER . . . . add x0, x27, #1 -# CHECK-NEXT: [0,6] . D=====================eeeeeeeeER. . . ld4r { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28 -# CHECK-NEXT: [0,7] . D=============================eER . . add x0, x27, #1 -# CHECK-NEXT: [0,8] . D============================eeeeeeeeER. ld4r { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28 -# CHECK-NEXT: [0,9] . D====================================eER add x0, x27, #1 +# CHECK-NEXT: 01234 +# CHECK-NEXT: Index 0123456789 + +# CHECK: [0,0] DeeeeeeeeER . ld4r { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28 +# CHECK-NEXT: [0,1] D=eE------R . add x0, x27, #1 +# CHECK-NEXT: [0,2] .DeeeeeeeeER . ld4r { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28 +# CHECK-NEXT: [0,3] .D=eE------R . add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeeeeeeeER . ld4r { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28 +# CHECK-NEXT: [0,5] . D=eE------R . add x0, x27, #1 +# CHECK-NEXT: [0,6] . DeeeeeeeeER. ld4r { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28 +# CHECK-NEXT: [0,7] . D=eE------R. add x0, x27, #1 +# CHECK-NEXT: [0,8] . DeeeeeeeeER ld4r { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28 +# CHECK-NEXT: [0,9] . D=eE------R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -3125,43 +3125,43 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld4r { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28 -# CHECK-NEXT: 1. 1 9.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 8.0 0.0 0.0 ld4r { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28 -# CHECK-NEXT: 3. 1 16.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 15.0 0.0 0.0 ld4r { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28 -# CHECK-NEXT: 5. 1 23.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 22.0 0.0 0.0 ld4r { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28 -# CHECK-NEXT: 7. 1 30.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 29.0 0.0 0.0 ld4r { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28 -# CHECK-NEXT: 9. 1 37.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 19.0 0.1 0.0 +# CHECK-NEXT: 1. 1 2.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 1.0 0.0 0.0 ld4r { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28 +# CHECK-NEXT: 3. 1 2.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 0.0 0.0 ld4r { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28 +# CHECK-NEXT: 5. 1 2.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 0.0 0.0 ld4r { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28 +# CHECK-NEXT: 7. 1 2.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 0.0 0.0 ld4r { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28 +# CHECK-NEXT: 9. 1 2.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.5 0.1 3.0 # CHECK: [43] Code Region - G44 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 3604 +# CHECK-NEXT: Total Cycles: 508 # CHECK-NEXT: Total uOps: 3300 # CHECK: Dispatch Width: 15 -# CHECK-NEXT: uOps Per Cycle: 0.92 -# CHECK-NEXT: IPC: 0.28 +# CHECK-NEXT: uOps Per Cycle: 6.50 +# CHECK-NEXT: IPC: 1.97 # CHECK-NEXT: Block RThroughput: 3.7 # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 0123456789 -# CHECK-NEXT: Index 0123456789 0123456789 - -# CHECK: [0,0] DeeeeeeeeER . . . . . . ld4r { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28 -# CHECK-NEXT: [0,1] D========eER . . . . . . add x0, x27, #1 -# CHECK-NEXT: [0,2] .D=======eeeeeeeeER . . . . . ld4r { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28 -# CHECK-NEXT: [0,3] .D===============eER. . . . . add x0, x27, #1 -# CHECK-NEXT: [0,4] . D==============eeeeeeeeER . . . ld4r { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28 -# CHECK-NEXT: [0,5] . D======================eER . . . add x0, x27, #1 -# CHECK-NEXT: [0,6] . D======================eeeeeeER . . ldp s1, s2, [x27], #248 -# CHECK-NEXT: [0,7] . D============================eER . . add x0, x27, #1 -# CHECK-NEXT: [0,8] . D============================eeeeeeER. ldp d1, d2, [x27], #496 -# CHECK-NEXT: [0,9] . D==================================eER add x0, x27, #1 +# CHECK-NEXT: 012 +# CHECK-NEXT: Index 0123456789 + +# CHECK: [0,0] DeeeeeeeeER . ld4r { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28 +# CHECK-NEXT: [0,1] D=eE------R . add x0, x27, #1 +# CHECK-NEXT: [0,2] .DeeeeeeeeER. ld4r { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28 +# CHECK-NEXT: [0,3] .D=eE------R. add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeeeeeeeER ld4r { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28 +# CHECK-NEXT: [0,5] . D=eE------R add x0, x27, #1 +# CHECK-NEXT: [0,6] . D=eeeeeeE-R ldp s1, s2, [x27], #248 +# CHECK-NEXT: [0,7] . D==eE-----R add x0, x27, #1 +# CHECK-NEXT: [0,8] . D==eeeeeeER ldp d1, d2, [x27], #496 +# CHECK-NEXT: [0,9] . D===eE----R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -3171,43 +3171,43 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld4r { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28 -# CHECK-NEXT: 1. 1 9.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 8.0 0.0 0.0 ld4r { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28 -# CHECK-NEXT: 3. 1 16.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 15.0 0.0 0.0 ld4r { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28 -# CHECK-NEXT: 5. 1 23.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 23.0 0.0 0.0 ldp s1, s2, [x27], #248 -# CHECK-NEXT: 7. 1 29.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 29.0 0.0 0.0 ldp d1, d2, [x27], #496 -# CHECK-NEXT: 9. 1 35.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 18.8 0.1 0.0 +# CHECK-NEXT: 1. 1 2.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 1.0 0.0 0.0 ld4r { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28 +# CHECK-NEXT: 3. 1 2.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 0.0 0.0 ld4r { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28 +# CHECK-NEXT: 5. 1 2.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 2.0 0.0 1.0 ldp s1, s2, [x27], #248 +# CHECK-NEXT: 7. 1 3.0 0.0 5.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 3.0 0.0 0.0 ldp d1, d2, [x27], #496 +# CHECK-NEXT: 9. 1 4.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 1 2.1 0.1 2.8 # CHECK: [44] Code Region - G45 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 2804 +# CHECK-NEXT: Total Cycles: 507 # CHECK-NEXT: Total uOps: 1700 # CHECK: Dispatch Width: 15 -# CHECK-NEXT: uOps Per Cycle: 0.61 -# CHECK-NEXT: IPC: 0.36 +# CHECK-NEXT: uOps Per Cycle: 3.35 +# CHECK-NEXT: IPC: 1.97 # CHECK-NEXT: Block RThroughput: 2.5 # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 01 -# CHECK-NEXT: Index 0123456789 0123456789 - -# CHECK: [0,0] DeeeeeeER . . . . .. ldp q1, q2, [x27], #992 -# CHECK-NEXT: [0,1] D======eER. . . . .. add x0, x27, #1 -# CHECK-NEXT: [0,2] D======eeeeeeER. . . .. ldp s1, s2, [x27, #248]! -# CHECK-NEXT: [0,3] D============eER . . .. add x0, x27, #1 -# CHECK-NEXT: [0,4] D============eeeeeeER . .. ldp d1, d2, [x27, #496]! -# CHECK-NEXT: [0,5] D==================eER . .. add x0, x27, #1 -# CHECK-NEXT: [0,6] D==================eeeeeeER .. ldp q1, q2, [x27, #992]! -# CHECK-NEXT: [0,7] D========================eER .. add x0, x27, #1 -# CHECK-NEXT: [0,8] .D=======================eeeeER. ldp w1, w2, [x27], #248 -# CHECK-NEXT: [0,9] .D===========================eER add x0, x27, #1 +# CHECK-NEXT: 01 +# CHECK-NEXT: Index 0123456789 + +# CHECK: [0,0] DeeeeeeER .. ldp q1, q2, [x27], #992 +# CHECK-NEXT: [0,1] D=eE----R .. add x0, x27, #1 +# CHECK-NEXT: [0,2] D=eeeeeeER.. ldp s1, s2, [x27, #248]! +# CHECK-NEXT: [0,3] D==eE----R.. add x0, x27, #1 +# CHECK-NEXT: [0,4] D==eeeeeeER. ldp d1, d2, [x27, #496]! +# CHECK-NEXT: [0,5] D===eE----R. add x0, x27, #1 +# CHECK-NEXT: [0,6] D===eeeeeeER ldp q1, q2, [x27, #992]! +# CHECK-NEXT: [0,7] D====eE----R add x0, x27, #1 +# CHECK-NEXT: [0,8] .D===eeeeE-R ldp w1, w2, [x27], #248 +# CHECK-NEXT: [0,9] .D====eE---R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -3217,43 +3217,43 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ldp q1, q2, [x27], #992 -# CHECK-NEXT: 1. 1 7.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 7.0 0.0 0.0 ldp s1, s2, [x27, #248]! -# CHECK-NEXT: 3. 1 13.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 13.0 0.0 0.0 ldp d1, d2, [x27, #496]! -# CHECK-NEXT: 5. 1 19.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 19.0 0.0 0.0 ldp q1, q2, [x27, #992]! -# CHECK-NEXT: 7. 1 25.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 24.0 0.0 0.0 ldp w1, w2, [x27], #248 -# CHECK-NEXT: 9. 1 28.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 15.6 0.1 0.0 +# CHECK-NEXT: 1. 1 2.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ldp s1, s2, [x27, #248]! +# CHECK-NEXT: 3. 1 3.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ldp d1, d2, [x27, #496]! +# CHECK-NEXT: 5. 1 4.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 4.0 0.0 0.0 ldp q1, q2, [x27, #992]! +# CHECK-NEXT: 7. 1 5.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 4.0 0.0 1.0 ldp w1, w2, [x27], #248 +# CHECK-NEXT: 9. 1 5.0 0.0 3.0 add x0, x27, #1 +# CHECK-NEXT: 1 3.3 0.1 2.0 # CHECK: [45] Code Region - G46 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 1604 +# CHECK-NEXT: Total Cycles: 507 # CHECK-NEXT: Total uOps: 1900 # CHECK: Dispatch Width: 15 -# CHECK-NEXT: uOps Per Cycle: 1.18 -# CHECK-NEXT: IPC: 0.62 +# CHECK-NEXT: uOps Per Cycle: 3.75 +# CHECK-NEXT: IPC: 1.97 # CHECK-NEXT: Block RThroughput: 3.0 # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 +# CHECK-NEXT: 01 # CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeeER . . . ldp x1, x2, [x27], #496 -# CHECK-NEXT: [0,1] D=eE--R . . . add x0, x27, #1 -# CHECK-NEXT: [0,2] D=eeeeER . . . ldp w1, w2, [x27, #248]! -# CHECK-NEXT: [0,3] D=====eER . . . add x0, x27, #1 -# CHECK-NEXT: [0,4] D=====eeeeER . . ldp x1, x2, [x27, #496]! -# CHECK-NEXT: [0,5] D======eE--R . . add x0, x27, #1 -# CHECK-NEXT: [0,6] D======eeeeeER . . ldpsw x1, x2, [x27], #248 -# CHECK-NEXT: [0,7] D===========eER. . add x0, x27, #1 -# CHECK-NEXT: [0,8] .D==========eeeeeER. ldpsw x1, x2, [x27, #248]! -# CHECK-NEXT: [0,9] .D===============eER add x0, x27, #1 +# CHECK: [0,0] DeeeeER .. ldp x1, x2, [x27], #496 +# CHECK-NEXT: [0,1] D=eE--R .. add x0, x27, #1 +# CHECK-NEXT: [0,2] D=eeeeER .. ldp w1, w2, [x27, #248]! +# CHECK-NEXT: [0,3] D==eE--R .. add x0, x27, #1 +# CHECK-NEXT: [0,4] D==eeeeER .. ldp x1, x2, [x27, #496]! +# CHECK-NEXT: [0,5] D===eE--R .. add x0, x27, #1 +# CHECK-NEXT: [0,6] D===eeeeeER. ldpsw x1, x2, [x27], #248 +# CHECK-NEXT: [0,7] D====eE---R. add x0, x27, #1 +# CHECK-NEXT: [0,8] .D===eeeeeER ldpsw x1, x2, [x27, #248]! +# CHECK-NEXT: [0,9] .D====eE---R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -3265,41 +3265,41 @@ add x0, x27, 1 # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ldp x1, x2, [x27], #496 # CHECK-NEXT: 1. 1 2.0 0.0 2.0 add x0, x27, #1 # CHECK-NEXT: 2. 1 2.0 0.0 0.0 ldp w1, w2, [x27, #248]! -# CHECK-NEXT: 3. 1 6.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 6.0 0.0 0.0 ldp x1, x2, [x27, #496]! -# CHECK-NEXT: 5. 1 7.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 7.0 0.0 0.0 ldpsw x1, x2, [x27], #248 -# CHECK-NEXT: 7. 1 12.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 11.0 0.0 0.0 ldpsw x1, x2, [x27, #248]! -# CHECK-NEXT: 9. 1 16.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 7.0 0.1 0.4 +# CHECK-NEXT: 3. 1 3.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ldp x1, x2, [x27, #496]! +# CHECK-NEXT: 5. 1 4.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 4.0 0.0 0.0 ldpsw x1, x2, [x27], #248 +# CHECK-NEXT: 7. 1 5.0 0.0 3.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 4.0 0.0 0.0 ldpsw x1, x2, [x27, #248]! +# CHECK-NEXT: 9. 1 5.0 0.0 3.0 add x0, x27, #1 +# CHECK-NEXT: 1 3.3 0.1 1.2 # CHECK: [46] Code Region - G47 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 3004 +# CHECK-NEXT: Total Cycles: 508 # CHECK-NEXT: Total uOps: 1500 # CHECK: Dispatch Width: 15 -# CHECK-NEXT: uOps Per Cycle: 0.50 -# CHECK-NEXT: IPC: 0.33 +# CHECK-NEXT: uOps Per Cycle: 2.95 +# CHECK-NEXT: IPC: 1.97 # CHECK-NEXT: Block RThroughput: 2.5 # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 0123 -# CHECK-NEXT: Index 0123456789 0123456789 - -# CHECK: [0,0] DeeeeeeER . . . . . . ldr b1, [x27], #254 -# CHECK-NEXT: [0,1] D======eER. . . . . . add x0, x27, #1 -# CHECK-NEXT: [0,2] D======eeeeeeER. . . . . ldr h1, [x27], #254 -# CHECK-NEXT: [0,3] D============eER . . . . add x0, x27, #1 -# CHECK-NEXT: [0,4] D============eeeeeeER . . . ldr s1, [x27], #254 -# CHECK-NEXT: [0,5] D==================eER . . . add x0, x27, #1 -# CHECK-NEXT: [0,6] D==================eeeeeeER . . ldr d1, [x27], #254 -# CHECK-NEXT: [0,7] D========================eER . . add x0, x27, #1 -# CHECK-NEXT: [0,8] D========================eeeeeeER. ldr q1, [x27], #254 -# CHECK-NEXT: [0,9] D==============================eER add x0, x27, #1 +# CHECK-NEXT: 012 +# CHECK-NEXT: Index 0123456789 + +# CHECK: [0,0] DeeeeeeER . . ldr b1, [x27], #254 +# CHECK-NEXT: [0,1] D=eE----R . . add x0, x27, #1 +# CHECK-NEXT: [0,2] D=eeeeeeER. . ldr h1, [x27], #254 +# CHECK-NEXT: [0,3] D==eE----R. . add x0, x27, #1 +# CHECK-NEXT: [0,4] D==eeeeeeER . ldr s1, [x27], #254 +# CHECK-NEXT: [0,5] D===eE----R . add x0, x27, #1 +# CHECK-NEXT: [0,6] D===eeeeeeER. ldr d1, [x27], #254 +# CHECK-NEXT: [0,7] D====eE----R. add x0, x27, #1 +# CHECK-NEXT: [0,8] D====eeeeeeER ldr q1, [x27], #254 +# CHECK-NEXT: [0,9] D=====eE----R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -3309,43 +3309,43 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ldr b1, [x27], #254 -# CHECK-NEXT: 1. 1 7.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 7.0 0.0 0.0 ldr h1, [x27], #254 -# CHECK-NEXT: 3. 1 13.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 13.0 0.0 0.0 ldr s1, [x27], #254 -# CHECK-NEXT: 5. 1 19.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 19.0 0.0 0.0 ldr d1, [x27], #254 -# CHECK-NEXT: 7. 1 25.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 25.0 0.0 0.0 ldr q1, [x27], #254 -# CHECK-NEXT: 9. 1 31.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 16.0 0.1 0.0 +# CHECK-NEXT: 1. 1 2.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ldr h1, [x27], #254 +# CHECK-NEXT: 3. 1 3.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ldr s1, [x27], #254 +# CHECK-NEXT: 5. 1 4.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 4.0 0.0 0.0 ldr d1, [x27], #254 +# CHECK-NEXT: 7. 1 5.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 5.0 0.0 0.0 ldr q1, [x27], #254 +# CHECK-NEXT: 9. 1 6.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 1 3.5 0.1 2.0 # CHECK: [47] Code Region - G48 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 3004 +# CHECK-NEXT: Total Cycles: 508 # CHECK-NEXT: Total uOps: 1500 # CHECK: Dispatch Width: 15 -# CHECK-NEXT: uOps Per Cycle: 0.50 -# CHECK-NEXT: IPC: 0.33 +# CHECK-NEXT: uOps Per Cycle: 2.95 +# CHECK-NEXT: IPC: 1.97 # CHECK-NEXT: Block RThroughput: 2.5 # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 0123 -# CHECK-NEXT: Index 0123456789 0123456789 - -# CHECK: [0,0] DeeeeeeER . . . . . . ldr b1, [x27, #254]! -# CHECK-NEXT: [0,1] D======eER. . . . . . add x0, x27, #1 -# CHECK-NEXT: [0,2] D======eeeeeeER. . . . . ldr h1, [x27, #254]! -# CHECK-NEXT: [0,3] D============eER . . . . add x0, x27, #1 -# CHECK-NEXT: [0,4] D============eeeeeeER . . . ldr s1, [x27, #254]! -# CHECK-NEXT: [0,5] D==================eER . . . add x0, x27, #1 -# CHECK-NEXT: [0,6] D==================eeeeeeER . . ldr d1, [x27, #254]! -# CHECK-NEXT: [0,7] D========================eER . . add x0, x27, #1 -# CHECK-NEXT: [0,8] D========================eeeeeeER. ldr q1, [x27, #254]! -# CHECK-NEXT: [0,9] D==============================eER add x0, x27, #1 +# CHECK-NEXT: 012 +# CHECK-NEXT: Index 0123456789 + +# CHECK: [0,0] DeeeeeeER . . ldr b1, [x27, #254]! +# CHECK-NEXT: [0,1] D=eE----R . . add x0, x27, #1 +# CHECK-NEXT: [0,2] D=eeeeeeER. . ldr h1, [x27, #254]! +# CHECK-NEXT: [0,3] D==eE----R. . add x0, x27, #1 +# CHECK-NEXT: [0,4] D==eeeeeeER . ldr s1, [x27, #254]! +# CHECK-NEXT: [0,5] D===eE----R . add x0, x27, #1 +# CHECK-NEXT: [0,6] D===eeeeeeER. ldr d1, [x27, #254]! +# CHECK-NEXT: [0,7] D====eE----R. add x0, x27, #1 +# CHECK-NEXT: [0,8] D====eeeeeeER ldr q1, [x27, #254]! +# CHECK-NEXT: [0,9] D=====eE----R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -3355,16 +3355,16 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ldr b1, [x27, #254]! -# CHECK-NEXT: 1. 1 7.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 7.0 0.0 0.0 ldr h1, [x27, #254]! -# CHECK-NEXT: 3. 1 13.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 13.0 0.0 0.0 ldr s1, [x27, #254]! -# CHECK-NEXT: 5. 1 19.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 19.0 0.0 0.0 ldr d1, [x27, #254]! -# CHECK-NEXT: 7. 1 25.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 25.0 0.0 0.0 ldr q1, [x27, #254]! -# CHECK-NEXT: 9. 1 31.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 16.0 0.1 0.0 +# CHECK-NEXT: 1. 1 2.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ldr h1, [x27, #254]! +# CHECK-NEXT: 3. 1 3.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ldr s1, [x27, #254]! +# CHECK-NEXT: 5. 1 4.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 4.0 0.0 0.0 ldr d1, [x27, #254]! +# CHECK-NEXT: 7. 1 5.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 5.0 0.0 0.0 ldr q1, [x27, #254]! +# CHECK-NEXT: 9. 1 6.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 1 3.5 0.1 2.0 # CHECK: [48] Code Region - G49 @@ -3508,28 +3508,27 @@ add x0, x27, 1 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 704 +# CHECK-NEXT: Total Cycles: 504 # CHECK-NEXT: Total uOps: 1700 # CHECK: Dispatch Width: 15 -# CHECK-NEXT: uOps Per Cycle: 2.41 -# CHECK-NEXT: IPC: 1.42 +# CHECK-NEXT: uOps Per Cycle: 3.37 +# CHECK-NEXT: IPC: 1.98 # CHECK-NEXT: Block RThroughput: 2.5 # CHECK: Timeline view: -# CHECK-NEXT: 0 -# CHECK-NEXT: Index 0123456789 +# CHECK-NEXT: Index 012345678 -# CHECK: [0,0] DeeeeER . ldrsh x1, [x27, #254]! -# CHECK-NEXT: [0,1] D=eE--R . add x0, x27, #1 -# CHECK-NEXT: [0,2] D=eeeeER . ldrsw x1, [x27], #254 -# CHECK-NEXT: [0,3] D==eE--R . add x0, x27, #1 -# CHECK-NEXT: [0,4] D==eeeeER . ldrsw x1, [x27, #254]! -# CHECK-NEXT: [0,5] D===eE--R . add x0, x27, #1 -# CHECK-NEXT: [0,6] D===eeE-R . st1 { v1.1d }, [x27], #8 -# CHECK-NEXT: [0,7] D=====eER . add x0, x27, #1 -# CHECK-NEXT: [0,8] .D====eeER. st1 { v1.2d }, [x27], #16 -# CHECK-NEXT: [0,9] .D======eER add x0, x27, #1 +# CHECK: [0,0] DeeeeER . ldrsh x1, [x27, #254]! +# CHECK-NEXT: [0,1] D=eE--R . add x0, x27, #1 +# CHECK-NEXT: [0,2] D=eeeeER. ldrsw x1, [x27], #254 +# CHECK-NEXT: [0,3] D==eE--R. add x0, x27, #1 +# CHECK-NEXT: [0,4] D==eeeeER ldrsw x1, [x27, #254]! +# CHECK-NEXT: [0,5] D===eE--R add x0, x27, #1 +# CHECK-NEXT: [0,6] D===eeE-R st1 { v1.1d }, [x27], #8 +# CHECK-NEXT: [0,7] D====eE-R add x0, x27, #1 +# CHECK-NEXT: [0,8] .D===eeER st1 { v1.2d }, [x27], #16 +# CHECK-NEXT: [0,9] .D====eER add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -3545,37 +3544,36 @@ add x0, x27, 1 # CHECK-NEXT: 4. 1 3.0 0.0 0.0 ldrsw x1, [x27, #254]! # CHECK-NEXT: 5. 1 4.0 0.0 2.0 add x0, x27, #1 # CHECK-NEXT: 6. 1 4.0 0.0 1.0 st1 { v1.1d }, [x27], #8 -# CHECK-NEXT: 7. 1 6.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 5.0 0.0 0.0 st1 { v1.2d }, [x27], #16 -# CHECK-NEXT: 9. 1 7.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 3.7 0.1 0.7 +# CHECK-NEXT: 7. 1 5.0 0.0 1.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 4.0 0.0 0.0 st1 { v1.2d }, [x27], #16 +# CHECK-NEXT: 9. 1 5.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 1 3.3 0.1 0.8 # CHECK: [52] Code Region - G53 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 1004 +# CHECK-NEXT: Total Cycles: 504 # CHECK-NEXT: Total uOps: 2000 # CHECK: Dispatch Width: 15 -# CHECK-NEXT: uOps Per Cycle: 1.99 -# CHECK-NEXT: IPC: 1.00 +# CHECK-NEXT: uOps Per Cycle: 3.97 +# CHECK-NEXT: IPC: 1.98 # CHECK-NEXT: Block RThroughput: 2.5 # CHECK: Timeline view: -# CHECK-NEXT: 0123 -# CHECK-NEXT: Index 0123456789 +# CHECK-NEXT: Index 012345678 -# CHECK: [0,0] DeeER. . . st1 { v1.2s }, [x27], #8 -# CHECK-NEXT: [0,1] D==eER . . add x0, x27, #1 -# CHECK-NEXT: [0,2] D==eeER . . st1 { v1.4h }, [x27], #8 -# CHECK-NEXT: [0,3] D====eER . . add x0, x27, #1 -# CHECK-NEXT: [0,4] D====eeER . . st1 { v1.4s }, [x27], #16 -# CHECK-NEXT: [0,5] D======eER. . add x0, x27, #1 -# CHECK-NEXT: [0,6] D======eeER . st1 { v1.8b }, [x27], #8 -# CHECK-NEXT: [0,7] .D=======eER . add x0, x27, #1 -# CHECK-NEXT: [0,8] .D=======eeER. st1 { v1.8h }, [x27], #16 -# CHECK-NEXT: [0,9] .D=========eER add x0, x27, #1 +# CHECK: [0,0] DeeER. . st1 { v1.2s }, [x27], #8 +# CHECK-NEXT: [0,1] D=eER. . add x0, x27, #1 +# CHECK-NEXT: [0,2] D=eeER . st1 { v1.4h }, [x27], #8 +# CHECK-NEXT: [0,3] D==eER . add x0, x27, #1 +# CHECK-NEXT: [0,4] D==eeER . st1 { v1.4s }, [x27], #16 +# CHECK-NEXT: [0,5] D===eER . add x0, x27, #1 +# CHECK-NEXT: [0,6] D===eeER. st1 { v1.8b }, [x27], #8 +# CHECK-NEXT: [0,7] .D===eER. add x0, x27, #1 +# CHECK-NEXT: [0,8] .D===eeER st1 { v1.8h }, [x27], #16 +# CHECK-NEXT: [0,9] .D====eER add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -3585,43 +3583,42 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.2s }, [x27], #8 -# CHECK-NEXT: 1. 1 3.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 3.0 0.0 0.0 st1 { v1.4h }, [x27], #8 -# CHECK-NEXT: 3. 1 5.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 5.0 0.0 0.0 st1 { v1.4s }, [x27], #16 -# CHECK-NEXT: 5. 1 7.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 7.0 0.0 0.0 st1 { v1.8b }, [x27], #8 -# CHECK-NEXT: 7. 1 8.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 8.0 0.0 0.0 st1 { v1.8h }, [x27], #16 -# CHECK-NEXT: 9. 1 10.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 5.7 0.1 0.0 +# CHECK-NEXT: 1. 1 2.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st1 { v1.4h }, [x27], #8 +# CHECK-NEXT: 3. 1 3.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 3.0 0.0 0.0 st1 { v1.4s }, [x27], #16 +# CHECK-NEXT: 5. 1 4.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 4.0 0.0 0.0 st1 { v1.8b }, [x27], #8 +# CHECK-NEXT: 7. 1 4.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 4.0 0.0 0.0 st1 { v1.8h }, [x27], #16 +# CHECK-NEXT: 9. 1 5.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 1 3.2 0.1 0.0 # CHECK: [53] Code Region - G54 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 1004 +# CHECK-NEXT: Total Cycles: 504 # CHECK-NEXT: Total uOps: 2000 # CHECK: Dispatch Width: 15 -# CHECK-NEXT: uOps Per Cycle: 1.99 -# CHECK-NEXT: IPC: 1.00 +# CHECK-NEXT: uOps Per Cycle: 3.97 +# CHECK-NEXT: IPC: 1.98 # CHECK-NEXT: Block RThroughput: 2.5 # CHECK: Timeline view: -# CHECK-NEXT: 0123 -# CHECK-NEXT: Index 0123456789 +# CHECK-NEXT: Index 012345678 -# CHECK: [0,0] DeeER. . . st1 { v1.16b }, [x27], #16 -# CHECK-NEXT: [0,1] D==eER . . add x0, x27, #1 -# CHECK-NEXT: [0,2] D==eeER . . st1 { v1.1d }, [x27], x28 -# CHECK-NEXT: [0,3] D====eER . . add x0, x27, #1 -# CHECK-NEXT: [0,4] D====eeER . . st1 { v1.2d }, [x27], x28 -# CHECK-NEXT: [0,5] D======eER. . add x0, x27, #1 -# CHECK-NEXT: [0,6] D======eeER . st1 { v1.2s }, [x27], x28 -# CHECK-NEXT: [0,7] .D=======eER . add x0, x27, #1 -# CHECK-NEXT: [0,8] .D=======eeER. st1 { v1.4h }, [x27], x28 -# CHECK-NEXT: [0,9] .D=========eER add x0, x27, #1 +# CHECK: [0,0] DeeER. . st1 { v1.16b }, [x27], #16 +# CHECK-NEXT: [0,1] D=eER. . add x0, x27, #1 +# CHECK-NEXT: [0,2] D=eeER . st1 { v1.1d }, [x27], x28 +# CHECK-NEXT: [0,3] D==eER . add x0, x27, #1 +# CHECK-NEXT: [0,4] D==eeER . st1 { v1.2d }, [x27], x28 +# CHECK-NEXT: [0,5] D===eER . add x0, x27, #1 +# CHECK-NEXT: [0,6] D===eeER. st1 { v1.2s }, [x27], x28 +# CHECK-NEXT: [0,7] .D===eER. add x0, x27, #1 +# CHECK-NEXT: [0,8] .D===eeER st1 { v1.4h }, [x27], x28 +# CHECK-NEXT: [0,9] .D====eER add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -3631,43 +3628,42 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.16b }, [x27], #16 -# CHECK-NEXT: 1. 1 3.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 3.0 0.0 0.0 st1 { v1.1d }, [x27], x28 -# CHECK-NEXT: 3. 1 5.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 5.0 0.0 0.0 st1 { v1.2d }, [x27], x28 -# CHECK-NEXT: 5. 1 7.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 7.0 0.0 0.0 st1 { v1.2s }, [x27], x28 -# CHECK-NEXT: 7. 1 8.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 8.0 0.0 0.0 st1 { v1.4h }, [x27], x28 -# CHECK-NEXT: 9. 1 10.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 5.7 0.1 0.0 +# CHECK-NEXT: 1. 1 2.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st1 { v1.1d }, [x27], x28 +# CHECK-NEXT: 3. 1 3.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 3.0 0.0 0.0 st1 { v1.2d }, [x27], x28 +# CHECK-NEXT: 5. 1 4.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 4.0 0.0 0.0 st1 { v1.2s }, [x27], x28 +# CHECK-NEXT: 7. 1 4.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 4.0 0.0 0.0 st1 { v1.4h }, [x27], x28 +# CHECK-NEXT: 9. 1 5.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 1 3.2 0.1 0.0 # CHECK: [54] Code Region - G55 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 1004 +# CHECK-NEXT: Total Cycles: 504 # CHECK-NEXT: Total uOps: 2000 # CHECK: Dispatch Width: 15 -# CHECK-NEXT: uOps Per Cycle: 1.99 -# CHECK-NEXT: IPC: 1.00 +# CHECK-NEXT: uOps Per Cycle: 3.97 +# CHECK-NEXT: IPC: 1.98 # CHECK-NEXT: Block RThroughput: 2.5 # CHECK: Timeline view: -# CHECK-NEXT: 0123 -# CHECK-NEXT: Index 0123456789 +# CHECK-NEXT: Index 012345678 -# CHECK: [0,0] DeeER. . . st1 { v1.4s }, [x27], x28 -# CHECK-NEXT: [0,1] D==eER . . add x0, x27, #1 -# CHECK-NEXT: [0,2] D==eeER . . st1 { v1.8b }, [x27], x28 -# CHECK-NEXT: [0,3] D====eER . . add x0, x27, #1 -# CHECK-NEXT: [0,4] D====eeER . . st1 { v1.8h }, [x27], x28 -# CHECK-NEXT: [0,5] D======eER. . add x0, x27, #1 -# CHECK-NEXT: [0,6] D======eeER . st1 { v1.16b }, [x27], x28 -# CHECK-NEXT: [0,7] .D=======eER . add x0, x27, #1 -# CHECK-NEXT: [0,8] .D=======eeER. st1 { v1.1d, v2.1d }, [x27], #16 -# CHECK-NEXT: [0,9] .D=========eER add x0, x27, #1 +# CHECK: [0,0] DeeER. . st1 { v1.4s }, [x27], x28 +# CHECK-NEXT: [0,1] D=eER. . add x0, x27, #1 +# CHECK-NEXT: [0,2] D=eeER . st1 { v1.8b }, [x27], x28 +# CHECK-NEXT: [0,3] D==eER . add x0, x27, #1 +# CHECK-NEXT: [0,4] D==eeER . st1 { v1.8h }, [x27], x28 +# CHECK-NEXT: [0,5] D===eER . add x0, x27, #1 +# CHECK-NEXT: [0,6] D===eeER. st1 { v1.16b }, [x27], x28 +# CHECK-NEXT: [0,7] .D===eER. add x0, x27, #1 +# CHECK-NEXT: [0,8] .D===eeER st1 { v1.1d, v2.1d }, [x27], #16 +# CHECK-NEXT: [0,9] .D====eER add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -3677,43 +3673,42 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.4s }, [x27], x28 -# CHECK-NEXT: 1. 1 3.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 3.0 0.0 0.0 st1 { v1.8b }, [x27], x28 -# CHECK-NEXT: 3. 1 5.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 5.0 0.0 0.0 st1 { v1.8h }, [x27], x28 -# CHECK-NEXT: 5. 1 7.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 7.0 0.0 0.0 st1 { v1.16b }, [x27], x28 -# CHECK-NEXT: 7. 1 8.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 8.0 0.0 0.0 st1 { v1.1d, v2.1d }, [x27], #16 -# CHECK-NEXT: 9. 1 10.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 5.7 0.1 0.0 +# CHECK-NEXT: 1. 1 2.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st1 { v1.8b }, [x27], x28 +# CHECK-NEXT: 3. 1 3.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 3.0 0.0 0.0 st1 { v1.8h }, [x27], x28 +# CHECK-NEXT: 5. 1 4.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 4.0 0.0 0.0 st1 { v1.16b }, [x27], x28 +# CHECK-NEXT: 7. 1 4.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 4.0 0.0 0.0 st1 { v1.1d, v2.1d }, [x27], #16 +# CHECK-NEXT: 9. 1 5.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 1 3.2 0.1 0.0 # CHECK: [55] Code Region - G56 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 1004 +# CHECK-NEXT: Total Cycles: 504 # CHECK-NEXT: Total uOps: 2400 # CHECK: Dispatch Width: 15 -# CHECK-NEXT: uOps Per Cycle: 2.39 -# CHECK-NEXT: IPC: 1.00 +# CHECK-NEXT: uOps Per Cycle: 4.76 +# CHECK-NEXT: IPC: 1.98 # CHECK-NEXT: Block RThroughput: 3.5 # CHECK: Timeline view: -# CHECK-NEXT: 0123 -# CHECK-NEXT: Index 0123456789 +# CHECK-NEXT: Index 012345678 -# CHECK: [0,0] DeeER. . . st1 { v1.2d, v2.2d }, [x27], #32 -# CHECK-NEXT: [0,1] D==eER . . add x0, x27, #1 -# CHECK-NEXT: [0,2] D==eeER . . st1 { v1.2s, v2.2s }, [x27], #16 -# CHECK-NEXT: [0,3] D====eER . . add x0, x27, #1 -# CHECK-NEXT: [0,4] D====eeER . . st1 { v1.4h, v2.4h }, [x27], #16 -# CHECK-NEXT: [0,5] D======eER. . add x0, x27, #1 -# CHECK-NEXT: [0,6] .D=====eeER . st1 { v1.4s, v2.4s }, [x27], #32 -# CHECK-NEXT: [0,7] .D=======eER . add x0, x27, #1 -# CHECK-NEXT: [0,8] .D=======eeER. st1 { v1.8b, v2.8b }, [x27], #16 -# CHECK-NEXT: [0,9] .D=========eER add x0, x27, #1 +# CHECK: [0,0] DeeER. . st1 { v1.2d, v2.2d }, [x27], #32 +# CHECK-NEXT: [0,1] D=eER. . add x0, x27, #1 +# CHECK-NEXT: [0,2] D=eeER . st1 { v1.2s, v2.2s }, [x27], #16 +# CHECK-NEXT: [0,3] D==eER . add x0, x27, #1 +# CHECK-NEXT: [0,4] D==eeER . st1 { v1.4h, v2.4h }, [x27], #16 +# CHECK-NEXT: [0,5] D===eER . add x0, x27, #1 +# CHECK-NEXT: [0,6] .D==eeER. st1 { v1.4s, v2.4s }, [x27], #32 +# CHECK-NEXT: [0,7] .D===eER. add x0, x27, #1 +# CHECK-NEXT: [0,8] .D===eeER st1 { v1.8b, v2.8b }, [x27], #16 +# CHECK-NEXT: [0,9] .D====eER add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -3723,43 +3718,42 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.2d, v2.2d }, [x27], #32 -# CHECK-NEXT: 1. 1 3.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 3.0 0.0 0.0 st1 { v1.2s, v2.2s }, [x27], #16 -# CHECK-NEXT: 3. 1 5.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 5.0 0.0 0.0 st1 { v1.4h, v2.4h }, [x27], #16 -# CHECK-NEXT: 5. 1 7.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 6.0 0.0 0.0 st1 { v1.4s, v2.4s }, [x27], #32 -# CHECK-NEXT: 7. 1 8.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 8.0 0.0 0.0 st1 { v1.8b, v2.8b }, [x27], #16 -# CHECK-NEXT: 9. 1 10.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 5.6 0.1 0.0 +# CHECK-NEXT: 1. 1 2.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st1 { v1.2s, v2.2s }, [x27], #16 +# CHECK-NEXT: 3. 1 3.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 3.0 0.0 0.0 st1 { v1.4h, v2.4h }, [x27], #16 +# CHECK-NEXT: 5. 1 4.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 3.0 0.0 0.0 st1 { v1.4s, v2.4s }, [x27], #32 +# CHECK-NEXT: 7. 1 4.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 4.0 0.0 0.0 st1 { v1.8b, v2.8b }, [x27], #16 +# CHECK-NEXT: 9. 1 5.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 1 3.1 0.1 0.0 # CHECK: [56] Code Region - G57 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 1004 +# CHECK-NEXT: Total Cycles: 504 # CHECK-NEXT: Total uOps: 2600 # CHECK: Dispatch Width: 15 -# CHECK-NEXT: uOps Per Cycle: 2.59 -# CHECK-NEXT: IPC: 1.00 +# CHECK-NEXT: uOps Per Cycle: 5.16 +# CHECK-NEXT: IPC: 1.98 # CHECK-NEXT: Block RThroughput: 4.0 # CHECK: Timeline view: -# CHECK-NEXT: 0123 -# CHECK-NEXT: Index 0123456789 +# CHECK-NEXT: Index 012345678 -# CHECK: [0,0] DeeER. . . st1 { v1.8h, v2.8h }, [x27], #32 -# CHECK-NEXT: [0,1] D==eER . . add x0, x27, #1 -# CHECK-NEXT: [0,2] D==eeER . . st1 { v1.16b, v2.16b }, [x27], #32 -# CHECK-NEXT: [0,3] D====eER . . add x0, x27, #1 -# CHECK-NEXT: [0,4] D====eeER . . st1 { v1.1d, v2.1d }, [x27], x28 -# CHECK-NEXT: [0,5] .D=====eER. . add x0, x27, #1 -# CHECK-NEXT: [0,6] .D=====eeER . st1 { v1.2d, v2.2d }, [x27], x28 -# CHECK-NEXT: [0,7] .D=======eER . add x0, x27, #1 -# CHECK-NEXT: [0,8] .D=======eeER. st1 { v1.2s, v2.2s }, [x27], x28 -# CHECK-NEXT: [0,9] .D=========eER add x0, x27, #1 +# CHECK: [0,0] DeeER. . st1 { v1.8h, v2.8h }, [x27], #32 +# CHECK-NEXT: [0,1] D=eER. . add x0, x27, #1 +# CHECK-NEXT: [0,2] D=eeER . st1 { v1.16b, v2.16b }, [x27], #32 +# CHECK-NEXT: [0,3] D==eER . add x0, x27, #1 +# CHECK-NEXT: [0,4] D==eeER . st1 { v1.1d, v2.1d }, [x27], x28 +# CHECK-NEXT: [0,5] .D==eER . add x0, x27, #1 +# CHECK-NEXT: [0,6] .D==eeER. st1 { v1.2d, v2.2d }, [x27], x28 +# CHECK-NEXT: [0,7] .D===eER. add x0, x27, #1 +# CHECK-NEXT: [0,8] .D===eeER st1 { v1.2s, v2.2s }, [x27], x28 +# CHECK-NEXT: [0,9] .D====eER add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -3769,43 +3763,42 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.8h, v2.8h }, [x27], #32 -# CHECK-NEXT: 1. 1 3.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 3.0 0.0 0.0 st1 { v1.16b, v2.16b }, [x27], #32 -# CHECK-NEXT: 3. 1 5.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 5.0 0.0 0.0 st1 { v1.1d, v2.1d }, [x27], x28 -# CHECK-NEXT: 5. 1 6.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 6.0 0.0 0.0 st1 { v1.2d, v2.2d }, [x27], x28 -# CHECK-NEXT: 7. 1 8.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 8.0 0.0 0.0 st1 { v1.2s, v2.2s }, [x27], x28 -# CHECK-NEXT: 9. 1 10.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 5.5 0.1 0.0 +# CHECK-NEXT: 1. 1 2.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st1 { v1.16b, v2.16b }, [x27], #32 +# CHECK-NEXT: 3. 1 3.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 3.0 0.0 0.0 st1 { v1.1d, v2.1d }, [x27], x28 +# CHECK-NEXT: 5. 1 3.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 3.0 0.0 0.0 st1 { v1.2d, v2.2d }, [x27], x28 +# CHECK-NEXT: 7. 1 4.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 4.0 0.0 0.0 st1 { v1.2s, v2.2s }, [x27], x28 +# CHECK-NEXT: 9. 1 5.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 1 3.0 0.1 0.0 # CHECK: [57] Code Region - G58 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 1004 +# CHECK-NEXT: Total Cycles: 504 # CHECK-NEXT: Total uOps: 2600 # CHECK: Dispatch Width: 15 -# CHECK-NEXT: uOps Per Cycle: 2.59 -# CHECK-NEXT: IPC: 1.00 +# CHECK-NEXT: uOps Per Cycle: 5.16 +# CHECK-NEXT: IPC: 1.98 # CHECK-NEXT: Block RThroughput: 4.0 # CHECK: Timeline view: -# CHECK-NEXT: 0123 -# CHECK-NEXT: Index 0123456789 +# CHECK-NEXT: Index 012345678 -# CHECK: [0,0] DeeER. . . st1 { v1.4h, v2.4h }, [x27], x28 -# CHECK-NEXT: [0,1] D==eER . . add x0, x27, #1 -# CHECK-NEXT: [0,2] D==eeER . . st1 { v1.4s, v2.4s }, [x27], x28 -# CHECK-NEXT: [0,3] D====eER . . add x0, x27, #1 -# CHECK-NEXT: [0,4] D====eeER . . st1 { v1.8b, v2.8b }, [x27], x28 -# CHECK-NEXT: [0,5] D======eER. . add x0, x27, #1 -# CHECK-NEXT: [0,6] .D=====eeER . st1 { v1.8h, v2.8h }, [x27], x28 -# CHECK-NEXT: [0,7] .D=======eER . add x0, x27, #1 -# CHECK-NEXT: [0,8] .D=======eeER. st1 { v1.16b, v2.16b }, [x27], x28 -# CHECK-NEXT: [0,9] .D=========eER add x0, x27, #1 +# CHECK: [0,0] DeeER. . st1 { v1.4h, v2.4h }, [x27], x28 +# CHECK-NEXT: [0,1] D=eER. . add x0, x27, #1 +# CHECK-NEXT: [0,2] D=eeER . st1 { v1.4s, v2.4s }, [x27], x28 +# CHECK-NEXT: [0,3] D==eER . add x0, x27, #1 +# CHECK-NEXT: [0,4] D==eeER . st1 { v1.8b, v2.8b }, [x27], x28 +# CHECK-NEXT: [0,5] D===eER . add x0, x27, #1 +# CHECK-NEXT: [0,6] .D==eeER. st1 { v1.8h, v2.8h }, [x27], x28 +# CHECK-NEXT: [0,7] .D===eER. add x0, x27, #1 +# CHECK-NEXT: [0,8] .D===eeER st1 { v1.16b, v2.16b }, [x27], x28 +# CHECK-NEXT: [0,9] .D====eER add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -3815,43 +3808,42 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.4h, v2.4h }, [x27], x28 -# CHECK-NEXT: 1. 1 3.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 3.0 0.0 0.0 st1 { v1.4s, v2.4s }, [x27], x28 -# CHECK-NEXT: 3. 1 5.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 5.0 0.0 0.0 st1 { v1.8b, v2.8b }, [x27], x28 -# CHECK-NEXT: 5. 1 7.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 6.0 0.0 0.0 st1 { v1.8h, v2.8h }, [x27], x28 -# CHECK-NEXT: 7. 1 8.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 8.0 0.0 0.0 st1 { v1.16b, v2.16b }, [x27], x28 -# CHECK-NEXT: 9. 1 10.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 5.6 0.1 0.0 +# CHECK-NEXT: 1. 1 2.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st1 { v1.4s, v2.4s }, [x27], x28 +# CHECK-NEXT: 3. 1 3.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 3.0 0.0 0.0 st1 { v1.8b, v2.8b }, [x27], x28 +# CHECK-NEXT: 5. 1 4.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 3.0 0.0 0.0 st1 { v1.8h, v2.8h }, [x27], x28 +# CHECK-NEXT: 7. 1 4.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 4.0 0.0 0.0 st1 { v1.16b, v2.16b }, [x27], x28 +# CHECK-NEXT: 9. 1 5.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 1 3.1 0.1 0.0 # CHECK: [58] Code Region - G59 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 1004 +# CHECK-NEXT: Total Cycles: 703 # CHECK-NEXT: Total uOps: 3400 # CHECK: Dispatch Width: 15 -# CHECK-NEXT: uOps Per Cycle: 3.39 -# CHECK-NEXT: IPC: 1.00 +# CHECK-NEXT: uOps Per Cycle: 4.84 +# CHECK-NEXT: IPC: 1.42 # CHECK-NEXT: Block RThroughput: 6.0 # CHECK: Timeline view: -# CHECK-NEXT: 0123 # CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeER. . . st1 { v1.1d, v2.1d, v3.1d }, [x27], #24 -# CHECK-NEXT: [0,1] D==eER . . add x0, x27, #1 -# CHECK-NEXT: [0,2] D==eeER . . st1 { v1.2d, v2.2d, v3.2d }, [x27], #48 -# CHECK-NEXT: [0,3] D====eER . . add x0, x27, #1 -# CHECK-NEXT: [0,4] .D===eeER . . st1 { v1.2s, v2.2s, v3.2s }, [x27], #24 -# CHECK-NEXT: [0,5] .D=====eER. . add x0, x27, #1 -# CHECK-NEXT: [0,6] .D=====eeER . st1 { v1.4h, v2.4h, v3.4h }, [x27], #24 -# CHECK-NEXT: [0,7] .D=======eER . add x0, x27, #1 -# CHECK-NEXT: [0,8] . D======eeER. st1 { v1.4s, v2.4s, v3.4s }, [x27], #48 -# CHECK-NEXT: [0,9] . D========eER add x0, x27, #1 +# CHECK: [0,0] DeeER. . st1 { v1.1d, v2.1d, v3.1d }, [x27], #24 +# CHECK-NEXT: [0,1] D=eER. . add x0, x27, #1 +# CHECK-NEXT: [0,2] D=eeER . st1 { v1.2d, v2.2d, v3.2d }, [x27], #48 +# CHECK-NEXT: [0,3] D==eER . add x0, x27, #1 +# CHECK-NEXT: [0,4] .D=eeER . st1 { v1.2s, v2.2s, v3.2s }, [x27], #24 +# CHECK-NEXT: [0,5] .D==eER . add x0, x27, #1 +# CHECK-NEXT: [0,6] .D===eeER. st1 { v1.4h, v2.4h, v3.4h }, [x27], #24 +# CHECK-NEXT: [0,7] .D====eER. add x0, x27, #1 +# CHECK-NEXT: [0,8] . D===eeER st1 { v1.4s, v2.4s, v3.4s }, [x27], #48 +# CHECK-NEXT: [0,9] . D====eER add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -3861,43 +3853,42 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.1d, v2.1d, v3.1d }, [x27], #24 -# CHECK-NEXT: 1. 1 3.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 3.0 0.0 0.0 st1 { v1.2d, v2.2d, v3.2d }, [x27], #48 -# CHECK-NEXT: 3. 1 5.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 4.0 0.0 0.0 st1 { v1.2s, v2.2s, v3.2s }, [x27], #24 -# CHECK-NEXT: 5. 1 6.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 6.0 0.0 0.0 st1 { v1.4h, v2.4h, v3.4h }, [x27], #24 -# CHECK-NEXT: 7. 1 8.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 7.0 0.0 0.0 st1 { v1.4s, v2.4s, v3.4s }, [x27], #48 -# CHECK-NEXT: 9. 1 9.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 5.2 0.1 0.0 +# CHECK-NEXT: 1. 1 2.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st1 { v1.2d, v2.2d, v3.2d }, [x27], #48 +# CHECK-NEXT: 3. 1 3.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 2.0 0.0 0.0 st1 { v1.2s, v2.2s, v3.2s }, [x27], #24 +# CHECK-NEXT: 5. 1 3.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 4.0 1.0 0.0 st1 { v1.4h, v2.4h, v3.4h }, [x27], #24 +# CHECK-NEXT: 7. 1 5.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 4.0 0.0 0.0 st1 { v1.4s, v2.4s, v3.4s }, [x27], #48 +# CHECK-NEXT: 9. 1 5.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 1 3.1 0.2 0.0 # CHECK: [59] Code Region - G60 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 1004 +# CHECK-NEXT: Total Cycles: 703 # CHECK-NEXT: Total uOps: 3600 # CHECK: Dispatch Width: 15 -# CHECK-NEXT: uOps Per Cycle: 3.59 -# CHECK-NEXT: IPC: 1.00 +# CHECK-NEXT: uOps Per Cycle: 5.12 +# CHECK-NEXT: IPC: 1.42 # CHECK-NEXT: Block RThroughput: 6.5 # CHECK: Timeline view: -# CHECK-NEXT: 0123 # CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeER. . . st1 { v1.8b, v2.8b, v3.8b }, [x27], #24 -# CHECK-NEXT: [0,1] D==eER . . add x0, x27, #1 -# CHECK-NEXT: [0,2] D==eeER . . st1 { v1.8h, v2.8h, v3.8h }, [x27], #48 -# CHECK-NEXT: [0,3] D====eER . . add x0, x27, #1 -# CHECK-NEXT: [0,4] .D===eeER . . st1 { v1.16b, v2.16b, v3.16b }, [x27], #48 -# CHECK-NEXT: [0,5] .D=====eER. . add x0, x27, #1 -# CHECK-NEXT: [0,6] .D=====eeER . st1 { v1.1d, v2.1d, v3.1d }, [x27], x28 -# CHECK-NEXT: [0,7] .D=======eER . add x0, x27, #1 -# CHECK-NEXT: [0,8] . D======eeER. st1 { v1.2d, v2.2d, v3.2d }, [x27], x28 -# CHECK-NEXT: [0,9] . D========eER add x0, x27, #1 +# CHECK: [0,0] DeeER. . st1 { v1.8b, v2.8b, v3.8b }, [x27], #24 +# CHECK-NEXT: [0,1] D=eER. . add x0, x27, #1 +# CHECK-NEXT: [0,2] D=eeER . st1 { v1.8h, v2.8h, v3.8h }, [x27], #48 +# CHECK-NEXT: [0,3] D==eER . add x0, x27, #1 +# CHECK-NEXT: [0,4] .D=eeER . st1 { v1.16b, v2.16b, v3.16b }, [x27], #48 +# CHECK-NEXT: [0,5] .D==eER . add x0, x27, #1 +# CHECK-NEXT: [0,6] .D===eeER. st1 { v1.1d, v2.1d, v3.1d }, [x27], x28 +# CHECK-NEXT: [0,7] .D====eER. add x0, x27, #1 +# CHECK-NEXT: [0,8] . D===eeER st1 { v1.2d, v2.2d, v3.2d }, [x27], x28 +# CHECK-NEXT: [0,9] . D====eER add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -3907,43 +3898,42 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.8b, v2.8b, v3.8b }, [x27], #24 -# CHECK-NEXT: 1. 1 3.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 3.0 0.0 0.0 st1 { v1.8h, v2.8h, v3.8h }, [x27], #48 -# CHECK-NEXT: 3. 1 5.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 4.0 0.0 0.0 st1 { v1.16b, v2.16b, v3.16b }, [x27], #48 -# CHECK-NEXT: 5. 1 6.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 6.0 0.0 0.0 st1 { v1.1d, v2.1d, v3.1d }, [x27], x28 -# CHECK-NEXT: 7. 1 8.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 7.0 0.0 0.0 st1 { v1.2d, v2.2d, v3.2d }, [x27], x28 -# CHECK-NEXT: 9. 1 9.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 5.2 0.1 0.0 +# CHECK-NEXT: 1. 1 2.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st1 { v1.8h, v2.8h, v3.8h }, [x27], #48 +# CHECK-NEXT: 3. 1 3.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 2.0 0.0 0.0 st1 { v1.16b, v2.16b, v3.16b }, [x27], #48 +# CHECK-NEXT: 5. 1 3.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 4.0 1.0 0.0 st1 { v1.1d, v2.1d, v3.1d }, [x27], x28 +# CHECK-NEXT: 7. 1 5.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 4.0 0.0 0.0 st1 { v1.2d, v2.2d, v3.2d }, [x27], x28 +# CHECK-NEXT: 9. 1 5.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 1 3.1 0.2 0.0 # CHECK: [60] Code Region - G61 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 1004 +# CHECK-NEXT: Total Cycles: 703 # CHECK-NEXT: Total uOps: 3400 # CHECK: Dispatch Width: 15 -# CHECK-NEXT: uOps Per Cycle: 3.39 -# CHECK-NEXT: IPC: 1.00 +# CHECK-NEXT: uOps Per Cycle: 4.84 +# CHECK-NEXT: IPC: 1.42 # CHECK-NEXT: Block RThroughput: 6.0 # CHECK: Timeline view: -# CHECK-NEXT: 0123 # CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeER. . . st1 { v1.2s, v2.2s, v3.2s }, [x27], x28 -# CHECK-NEXT: [0,1] D==eER . . add x0, x27, #1 -# CHECK-NEXT: [0,2] D==eeER . . st1 { v1.4h, v2.4h, v3.4h }, [x27], x28 -# CHECK-NEXT: [0,3] D====eER . . add x0, x27, #1 -# CHECK-NEXT: [0,4] .D===eeER . . st1 { v1.4s, v2.4s, v3.4s }, [x27], x28 -# CHECK-NEXT: [0,5] .D=====eER. . add x0, x27, #1 -# CHECK-NEXT: [0,6] .D=====eeER . st1 { v1.8b, v2.8b, v3.8b }, [x27], x28 -# CHECK-NEXT: [0,7] .D=======eER . add x0, x27, #1 -# CHECK-NEXT: [0,8] . D======eeER. st1 { v1.8h, v2.8h, v3.8h }, [x27], x28 -# CHECK-NEXT: [0,9] . D========eER add x0, x27, #1 +# CHECK: [0,0] DeeER. . st1 { v1.2s, v2.2s, v3.2s }, [x27], x28 +# CHECK-NEXT: [0,1] D=eER. . add x0, x27, #1 +# CHECK-NEXT: [0,2] D=eeER . st1 { v1.4h, v2.4h, v3.4h }, [x27], x28 +# CHECK-NEXT: [0,3] D==eER . add x0, x27, #1 +# CHECK-NEXT: [0,4] .D=eeER . st1 { v1.4s, v2.4s, v3.4s }, [x27], x28 +# CHECK-NEXT: [0,5] .D==eER . add x0, x27, #1 +# CHECK-NEXT: [0,6] .D==eeER . st1 { v1.8b, v2.8b, v3.8b }, [x27], x28 +# CHECK-NEXT: [0,7] .D===eER . add x0, x27, #1 +# CHECK-NEXT: [0,8] . D===eeER st1 { v1.8h, v2.8h, v3.8h }, [x27], x28 +# CHECK-NEXT: [0,9] . D====eER add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -3953,43 +3943,43 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.2s, v2.2s, v3.2s }, [x27], x28 -# CHECK-NEXT: 1. 1 3.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 3.0 0.0 0.0 st1 { v1.4h, v2.4h, v3.4h }, [x27], x28 -# CHECK-NEXT: 3. 1 5.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 4.0 0.0 0.0 st1 { v1.4s, v2.4s, v3.4s }, [x27], x28 -# CHECK-NEXT: 5. 1 6.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 6.0 0.0 0.0 st1 { v1.8b, v2.8b, v3.8b }, [x27], x28 -# CHECK-NEXT: 7. 1 8.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 7.0 0.0 0.0 st1 { v1.8h, v2.8h, v3.8h }, [x27], x28 -# CHECK-NEXT: 9. 1 9.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 5.2 0.1 0.0 +# CHECK-NEXT: 1. 1 2.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st1 { v1.4h, v2.4h, v3.4h }, [x27], x28 +# CHECK-NEXT: 3. 1 3.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 2.0 0.0 0.0 st1 { v1.4s, v2.4s, v3.4s }, [x27], x28 +# CHECK-NEXT: 5. 1 3.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 3.0 0.0 0.0 st1 { v1.8b, v2.8b, v3.8b }, [x27], x28 +# CHECK-NEXT: 7. 1 4.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 4.0 1.0 0.0 st1 { v1.8h, v2.8h, v3.8h }, [x27], x28 +# CHECK-NEXT: 9. 1 5.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 1 2.9 0.2 0.0 # CHECK: [61] Code Region - G62 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 1004 +# CHECK-NEXT: Total Cycles: 704 # CHECK-NEXT: Total uOps: 3600 # CHECK: Dispatch Width: 15 -# CHECK-NEXT: uOps Per Cycle: 3.59 -# CHECK-NEXT: IPC: 1.00 +# CHECK-NEXT: uOps Per Cycle: 5.11 +# CHECK-NEXT: IPC: 1.42 # CHECK-NEXT: Block RThroughput: 6.5 # CHECK: Timeline view: -# CHECK-NEXT: 0123 +# CHECK-NEXT: 0 # CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeER. . . st1 { v1.16b, v2.16b, v3.16b }, [x27], x28 -# CHECK-NEXT: [0,1] D==eER . . add x0, x27, #1 -# CHECK-NEXT: [0,2] D==eeER . . st1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32 -# CHECK-NEXT: [0,3] D====eER . . add x0, x27, #1 -# CHECK-NEXT: [0,4] .D===eeER . . st1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64 -# CHECK-NEXT: [0,5] .D=====eER. . add x0, x27, #1 -# CHECK-NEXT: [0,6] .D=====eeER . st1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32 -# CHECK-NEXT: [0,7] . D======eER . add x0, x27, #1 -# CHECK-NEXT: [0,8] . D======eeER. st1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32 -# CHECK-NEXT: [0,9] . D========eER add x0, x27, #1 +# CHECK: [0,0] DeeER. . st1 { v1.16b, v2.16b, v3.16b }, [x27], x28 +# CHECK-NEXT: [0,1] D=eER. . add x0, x27, #1 +# CHECK-NEXT: [0,2] D=eeER . st1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32 +# CHECK-NEXT: [0,3] D==eER . add x0, x27, #1 +# CHECK-NEXT: [0,4] .D==eeER . st1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64 +# CHECK-NEXT: [0,5] .D===eER . add x0, x27, #1 +# CHECK-NEXT: [0,6] .D===eeER . st1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32 +# CHECK-NEXT: [0,7] . D===eER . add x0, x27, #1 +# CHECK-NEXT: [0,8] . D====eeER st1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32 +# CHECK-NEXT: [0,9] . D=====eER add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -3999,43 +3989,43 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.16b, v2.16b, v3.16b }, [x27], x28 -# CHECK-NEXT: 1. 1 3.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 3.0 0.0 0.0 st1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32 -# CHECK-NEXT: 3. 1 5.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 4.0 0.0 0.0 st1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64 -# CHECK-NEXT: 5. 1 6.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 6.0 0.0 0.0 st1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32 -# CHECK-NEXT: 7. 1 7.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 7.0 0.0 0.0 st1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32 -# CHECK-NEXT: 9. 1 9.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 5.1 0.1 0.0 +# CHECK-NEXT: 1. 1 2.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32 +# CHECK-NEXT: 3. 1 3.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 3.0 1.0 0.0 st1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64 +# CHECK-NEXT: 5. 1 4.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 4.0 0.0 0.0 st1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32 +# CHECK-NEXT: 7. 1 4.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 5.0 1.0 0.0 st1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32 +# CHECK-NEXT: 9. 1 6.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 1 3.4 0.3 0.0 # CHECK: [62] Code Region - G63 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 1004 +# CHECK-NEXT: Total Cycles: 804 # CHECK-NEXT: Total uOps: 4200 # CHECK: Dispatch Width: 15 -# CHECK-NEXT: uOps Per Cycle: 4.18 -# CHECK-NEXT: IPC: 1.00 +# CHECK-NEXT: uOps Per Cycle: 5.22 +# CHECK-NEXT: IPC: 1.24 # CHECK-NEXT: Block RThroughput: 8.0 # CHECK: Timeline view: -# CHECK-NEXT: 0123 +# CHECK-NEXT: 01 # CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeER. . . st1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64 -# CHECK-NEXT: [0,1] D==eER . . add x0, x27, #1 -# CHECK-NEXT: [0,2] D==eeER . . st1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32 -# CHECK-NEXT: [0,3] .D===eER . . add x0, x27, #1 -# CHECK-NEXT: [0,4] .D===eeER . . st1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64 -# CHECK-NEXT: [0,5] .D=====eER. . add x0, x27, #1 -# CHECK-NEXT: [0,6] . D====eeER . st1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64 -# CHECK-NEXT: [0,7] . D======eER . add x0, x27, #1 -# CHECK-NEXT: [0,8] . D======eeER. st1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28 -# CHECK-NEXT: [0,9] . D=======eER add x0, x27, #1 +# CHECK: [0,0] DeeER. .. st1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64 +# CHECK-NEXT: [0,1] D=eER. .. add x0, x27, #1 +# CHECK-NEXT: [0,2] D=eeER .. st1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32 +# CHECK-NEXT: [0,3] .D=eER .. add x0, x27, #1 +# CHECK-NEXT: [0,4] .D==eeER .. st1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64 +# CHECK-NEXT: [0,5] .D===eER .. add x0, x27, #1 +# CHECK-NEXT: [0,6] . D==eeER .. st1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64 +# CHECK-NEXT: [0,7] . D===eER .. add x0, x27, #1 +# CHECK-NEXT: [0,8] . D=====eeER st1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28 +# CHECK-NEXT: [0,9] . D=====eER add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -4045,43 +4035,42 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64 -# CHECK-NEXT: 1. 1 3.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 3.0 0.0 0.0 st1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32 -# CHECK-NEXT: 3. 1 4.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 4.0 0.0 0.0 st1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64 -# CHECK-NEXT: 5. 1 6.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 5.0 0.0 0.0 st1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64 -# CHECK-NEXT: 7. 1 7.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 7.0 0.0 0.0 st1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28 -# CHECK-NEXT: 9. 1 8.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 4.8 0.1 0.0 +# CHECK-NEXT: 1. 1 2.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32 +# CHECK-NEXT: 3. 1 2.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 3.0 1.0 0.0 st1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64 +# CHECK-NEXT: 5. 1 4.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 3.0 0.0 0.0 st1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64 +# CHECK-NEXT: 7. 1 4.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 6.0 2.0 0.0 st1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28 +# CHECK-NEXT: 9. 1 6.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 1 3.3 0.4 0.0 # CHECK: [63] Code Region - G64 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 1004 +# CHECK-NEXT: Total Cycles: 703 # CHECK-NEXT: Total uOps: 3800 # CHECK: Dispatch Width: 15 -# CHECK-NEXT: uOps Per Cycle: 3.78 -# CHECK-NEXT: IPC: 1.00 +# CHECK-NEXT: uOps Per Cycle: 5.41 +# CHECK-NEXT: IPC: 1.42 # CHECK-NEXT: Block RThroughput: 7.0 # CHECK: Timeline view: -# CHECK-NEXT: 0123 # CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeER. . . st1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28 -# CHECK-NEXT: [0,1] D==eER . . add x0, x27, #1 -# CHECK-NEXT: [0,2] D==eeER . . st1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28 -# CHECK-NEXT: [0,3] .D===eER . . add x0, x27, #1 -# CHECK-NEXT: [0,4] .D===eeER . . st1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28 -# CHECK-NEXT: [0,5] .D=====eER. . add x0, x27, #1 -# CHECK-NEXT: [0,6] . D====eeER . st1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28 -# CHECK-NEXT: [0,7] . D======eER . add x0, x27, #1 -# CHECK-NEXT: [0,8] . D======eeER. st1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28 -# CHECK-NEXT: [0,9] . D=======eER add x0, x27, #1 +# CHECK: [0,0] DeeER. . st1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28 +# CHECK-NEXT: [0,1] D=eER. . add x0, x27, #1 +# CHECK-NEXT: [0,2] D=eeER . st1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28 +# CHECK-NEXT: [0,3] .D=eER . add x0, x27, #1 +# CHECK-NEXT: [0,4] .D==eeER . st1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28 +# CHECK-NEXT: [0,5] .D===eER . add x0, x27, #1 +# CHECK-NEXT: [0,6] . D==eeER. st1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28 +# CHECK-NEXT: [0,7] . D===eER. add x0, x27, #1 +# CHECK-NEXT: [0,8] . D===eeER st1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28 +# CHECK-NEXT: [0,9] . D===eER add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -4091,43 +4080,43 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28 -# CHECK-NEXT: 1. 1 3.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 3.0 0.0 0.0 st1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28 -# CHECK-NEXT: 3. 1 4.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 4.0 0.0 0.0 st1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28 -# CHECK-NEXT: 5. 1 6.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 5.0 0.0 0.0 st1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28 -# CHECK-NEXT: 7. 1 7.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 7.0 0.0 0.0 st1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28 -# CHECK-NEXT: 9. 1 8.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 4.8 0.1 0.0 +# CHECK-NEXT: 1. 1 2.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28 +# CHECK-NEXT: 3. 1 2.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 3.0 1.0 0.0 st1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28 +# CHECK-NEXT: 5. 1 4.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 3.0 0.0 0.0 st1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28 +# CHECK-NEXT: 7. 1 4.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 4.0 0.0 0.0 st1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28 +# CHECK-NEXT: 9. 1 4.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 1 2.9 0.2 0.0 # CHECK: [64] Code Region - G65 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 1604 +# CHECK-NEXT: Total Cycles: 706 # CHECK-NEXT: Total uOps: 3200 # CHECK: Dispatch Width: 15 -# CHECK-NEXT: uOps Per Cycle: 2.00 -# CHECK-NEXT: IPC: 0.62 +# CHECK-NEXT: uOps Per Cycle: 4.53 +# CHECK-NEXT: IPC: 1.42 # CHECK-NEXT: Block RThroughput: 5.5 # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 +# CHECK-NEXT: 012 # CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeER. . . . st1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28 -# CHECK-NEXT: [0,1] D==eER . . . add x0, x27, #1 -# CHECK-NEXT: [0,2] .D=eeER . . . st1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28 -# CHECK-NEXT: [0,3] .D===eER . . . add x0, x27, #1 -# CHECK-NEXT: [0,4] .D===eeeeER . . st1 { v1.b }[0], [x27], #1 -# CHECK-NEXT: [0,5] .D=======eER . . add x0, x27, #1 -# CHECK-NEXT: [0,6] . D======eeeeER. . st1 { v1.b }[8], [x27], #1 -# CHECK-NEXT: [0,7] . D==========eER . add x0, x27, #1 -# CHECK-NEXT: [0,8] . D==========eeeeER. st1 { v1.b }[0], [x27], x28 -# CHECK-NEXT: [0,9] . D==============eER add x0, x27, #1 +# CHECK: [0,0] DeeER. . . st1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28 +# CHECK-NEXT: [0,1] D=eER. . . add x0, x27, #1 +# CHECK-NEXT: [0,2] .DeeER . . st1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28 +# CHECK-NEXT: [0,3] .D=eER . . add x0, x27, #1 +# CHECK-NEXT: [0,4] .D===eeeeER . st1 { v1.b }[0], [x27], #1 +# CHECK-NEXT: [0,5] .D====eE--R . add x0, x27, #1 +# CHECK-NEXT: [0,6] . D===eeeeER. st1 { v1.b }[8], [x27], #1 +# CHECK-NEXT: [0,7] . D====eE--R. add x0, x27, #1 +# CHECK-NEXT: [0,8] . D====eeeeER st1 { v1.b }[0], [x27], x28 +# CHECK-NEXT: [0,9] . D=====eE--R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -4137,43 +4126,43 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28 -# CHECK-NEXT: 1. 1 3.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28 -# CHECK-NEXT: 3. 1 4.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 4.0 0.0 0.0 st1 { v1.b }[0], [x27], #1 -# CHECK-NEXT: 5. 1 8.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 7.0 0.0 0.0 st1 { v1.b }[8], [x27], #1 -# CHECK-NEXT: 7. 1 11.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 11.0 0.0 0.0 st1 { v1.b }[0], [x27], x28 -# CHECK-NEXT: 9. 1 15.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 6.6 0.1 0.0 +# CHECK-NEXT: 1. 1 2.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 1.0 0.0 0.0 st1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28 +# CHECK-NEXT: 3. 1 2.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 4.0 2.0 0.0 st1 { v1.b }[0], [x27], #1 +# CHECK-NEXT: 5. 1 5.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 4.0 0.0 0.0 st1 { v1.b }[8], [x27], #1 +# CHECK-NEXT: 7. 1 5.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 5.0 0.0 0.0 st1 { v1.b }[0], [x27], x28 +# CHECK-NEXT: 9. 1 6.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 1 3.5 0.3 0.6 # CHECK: [65] Code Region - G66 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 2004 +# CHECK-NEXT: Total Cycles: 506 # CHECK-NEXT: Total uOps: 2000 # CHECK: Dispatch Width: 15 -# CHECK-NEXT: uOps Per Cycle: 1.00 -# CHECK-NEXT: IPC: 0.50 +# CHECK-NEXT: uOps Per Cycle: 3.95 +# CHECK-NEXT: IPC: 1.98 # CHECK-NEXT: Block RThroughput: 2.5 # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 -# CHECK-NEXT: Index 0123456789 0123 - -# CHECK: [0,0] DeeeeER . . . . st1 { v1.b }[8], [x27], x28 -# CHECK-NEXT: [0,1] D====eER . . . . add x0, x27, #1 -# CHECK-NEXT: [0,2] D====eeeeER . . . st1 { v1.h }[0], [x27], #2 -# CHECK-NEXT: [0,3] D========eER . . . add x0, x27, #1 -# CHECK-NEXT: [0,4] D========eeeeER. . . st1 { v1.h }[4], [x27], #2 -# CHECK-NEXT: [0,5] D============eER . . add x0, x27, #1 -# CHECK-NEXT: [0,6] D============eeeeER . . st1 { v1.h }[0], [x27], x28 -# CHECK-NEXT: [0,7] .D===============eER. . add x0, x27, #1 -# CHECK-NEXT: [0,8] .D===============eeeeER. st1 { v1.h }[4], [x27], x28 -# CHECK-NEXT: [0,9] .D===================eER add x0, x27, #1 +# CHECK-NEXT: 0 +# CHECK-NEXT: Index 0123456789 + +# CHECK: [0,0] DeeeeER . st1 { v1.b }[8], [x27], x28 +# CHECK-NEXT: [0,1] D=eE--R . add x0, x27, #1 +# CHECK-NEXT: [0,2] D=eeeeER . st1 { v1.h }[0], [x27], #2 +# CHECK-NEXT: [0,3] D==eE--R . add x0, x27, #1 +# CHECK-NEXT: [0,4] D==eeeeER . st1 { v1.h }[4], [x27], #2 +# CHECK-NEXT: [0,5] D===eE--R . add x0, x27, #1 +# CHECK-NEXT: [0,6] D===eeeeER. st1 { v1.h }[0], [x27], x28 +# CHECK-NEXT: [0,7] .D===eE--R. add x0, x27, #1 +# CHECK-NEXT: [0,8] .D===eeeeER st1 { v1.h }[4], [x27], x28 +# CHECK-NEXT: [0,9] .D====eE--R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -4183,43 +4172,43 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.b }[8], [x27], x28 -# CHECK-NEXT: 1. 1 5.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 5.0 0.0 0.0 st1 { v1.h }[0], [x27], #2 -# CHECK-NEXT: 3. 1 9.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 9.0 0.0 0.0 st1 { v1.h }[4], [x27], #2 -# CHECK-NEXT: 5. 1 13.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 13.0 0.0 0.0 st1 { v1.h }[0], [x27], x28 -# CHECK-NEXT: 7. 1 16.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 16.0 0.0 0.0 st1 { v1.h }[4], [x27], x28 -# CHECK-NEXT: 9. 1 20.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 10.7 0.1 0.0 +# CHECK-NEXT: 1. 1 2.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st1 { v1.h }[0], [x27], #2 +# CHECK-NEXT: 3. 1 3.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 3.0 0.0 0.0 st1 { v1.h }[4], [x27], #2 +# CHECK-NEXT: 5. 1 4.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 4.0 0.0 0.0 st1 { v1.h }[0], [x27], x28 +# CHECK-NEXT: 7. 1 4.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 4.0 0.0 0.0 st1 { v1.h }[4], [x27], x28 +# CHECK-NEXT: 9. 1 5.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 1 3.2 0.1 1.0 # CHECK: [66] Code Region - G67 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 2004 +# CHECK-NEXT: Total Cycles: 506 # CHECK-NEXT: Total uOps: 2200 # CHECK: Dispatch Width: 15 -# CHECK-NEXT: uOps Per Cycle: 1.10 -# CHECK-NEXT: IPC: 0.50 +# CHECK-NEXT: uOps Per Cycle: 4.35 +# CHECK-NEXT: IPC: 1.98 # CHECK-NEXT: Block RThroughput: 3.0 # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 -# CHECK-NEXT: Index 0123456789 0123 - -# CHECK: [0,0] DeeeeER . . . . st1 { v1.s }[0], [x27], #4 -# CHECK-NEXT: [0,1] D====eER . . . . add x0, x27, #1 -# CHECK-NEXT: [0,2] D====eeeeER . . . st1 { v1.s }[0], [x27], x28 -# CHECK-NEXT: [0,3] D========eER . . . add x0, x27, #1 -# CHECK-NEXT: [0,4] D========eeeeER. . . st1 { v1.d }[0], [x27], #8 -# CHECK-NEXT: [0,5] D============eER . . add x0, x27, #1 -# CHECK-NEXT: [0,6] D============eeeeER . . st1 { v1.d }[0], [x27], x28 -# CHECK-NEXT: [0,7] .D===============eER. . add x0, x27, #1 -# CHECK-NEXT: [0,8] .D===============eeeeER. st2 { v1.2d, v2.2d }, [x27], #32 -# CHECK-NEXT: [0,9] .D===================eER add x0, x27, #1 +# CHECK-NEXT: 0 +# CHECK-NEXT: Index 0123456789 + +# CHECK: [0,0] DeeeeER . st1 { v1.s }[0], [x27], #4 +# CHECK-NEXT: [0,1] D=eE--R . add x0, x27, #1 +# CHECK-NEXT: [0,2] D=eeeeER . st1 { v1.s }[0], [x27], x28 +# CHECK-NEXT: [0,3] D==eE--R . add x0, x27, #1 +# CHECK-NEXT: [0,4] D==eeeeER . st1 { v1.d }[0], [x27], #8 +# CHECK-NEXT: [0,5] D===eE--R . add x0, x27, #1 +# CHECK-NEXT: [0,6] D===eeeeER. st1 { v1.d }[0], [x27], x28 +# CHECK-NEXT: [0,7] .D===eE--R. add x0, x27, #1 +# CHECK-NEXT: [0,8] .D===eeeeER st2 { v1.2d, v2.2d }, [x27], #32 +# CHECK-NEXT: [0,9] .D====eE--R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -4229,43 +4218,43 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.s }[0], [x27], #4 -# CHECK-NEXT: 1. 1 5.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 5.0 0.0 0.0 st1 { v1.s }[0], [x27], x28 -# CHECK-NEXT: 3. 1 9.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 9.0 0.0 0.0 st1 { v1.d }[0], [x27], #8 -# CHECK-NEXT: 5. 1 13.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 13.0 0.0 0.0 st1 { v1.d }[0], [x27], x28 -# CHECK-NEXT: 7. 1 16.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 16.0 0.0 0.0 st2 { v1.2d, v2.2d }, [x27], #32 -# CHECK-NEXT: 9. 1 20.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 10.7 0.1 0.0 +# CHECK-NEXT: 1. 1 2.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st1 { v1.s }[0], [x27], x28 +# CHECK-NEXT: 3. 1 3.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 3.0 0.0 0.0 st1 { v1.d }[0], [x27], #8 +# CHECK-NEXT: 5. 1 4.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 4.0 0.0 0.0 st1 { v1.d }[0], [x27], x28 +# CHECK-NEXT: 7. 1 4.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 4.0 0.0 0.0 st2 { v1.2d, v2.2d }, [x27], #32 +# CHECK-NEXT: 9. 1 5.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 1 3.2 0.1 1.0 # CHECK: [67] Code Region - G68 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 2004 +# CHECK-NEXT: Total Cycles: 506 # CHECK-NEXT: Total uOps: 2400 # CHECK: Dispatch Width: 15 -# CHECK-NEXT: uOps Per Cycle: 1.20 -# CHECK-NEXT: IPC: 0.50 +# CHECK-NEXT: uOps Per Cycle: 4.74 +# CHECK-NEXT: IPC: 1.98 # CHECK-NEXT: Block RThroughput: 3.5 # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 -# CHECK-NEXT: Index 0123456789 0123 - -# CHECK: [0,0] DeeeeER . . . . st2 { v1.2s, v2.2s }, [x27], #16 -# CHECK-NEXT: [0,1] D====eER . . . . add x0, x27, #1 -# CHECK-NEXT: [0,2] D====eeeeER . . . st2 { v1.4h, v2.4h }, [x27], #16 -# CHECK-NEXT: [0,3] D========eER . . . add x0, x27, #1 -# CHECK-NEXT: [0,4] D========eeeeER. . . st2 { v1.4s, v2.4s }, [x27], #32 -# CHECK-NEXT: [0,5] D============eER . . add x0, x27, #1 -# CHECK-NEXT: [0,6] .D===========eeeeER . . st2 { v1.8b, v2.8b }, [x27], #16 -# CHECK-NEXT: [0,7] .D===============eER. . add x0, x27, #1 -# CHECK-NEXT: [0,8] .D===============eeeeER. st2 { v1.8h, v2.8h }, [x27], #32 -# CHECK-NEXT: [0,9] .D===================eER add x0, x27, #1 +# CHECK-NEXT: 0 +# CHECK-NEXT: Index 0123456789 + +# CHECK: [0,0] DeeeeER . st2 { v1.2s, v2.2s }, [x27], #16 +# CHECK-NEXT: [0,1] D=eE--R . add x0, x27, #1 +# CHECK-NEXT: [0,2] D=eeeeER . st2 { v1.4h, v2.4h }, [x27], #16 +# CHECK-NEXT: [0,3] D==eE--R . add x0, x27, #1 +# CHECK-NEXT: [0,4] D==eeeeER . st2 { v1.4s, v2.4s }, [x27], #32 +# CHECK-NEXT: [0,5] D===eE--R . add x0, x27, #1 +# CHECK-NEXT: [0,6] .D==eeeeER. st2 { v1.8b, v2.8b }, [x27], #16 +# CHECK-NEXT: [0,7] .D===eE--R. add x0, x27, #1 +# CHECK-NEXT: [0,8] .D===eeeeER st2 { v1.8h, v2.8h }, [x27], #32 +# CHECK-NEXT: [0,9] .D====eE--R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -4275,43 +4264,43 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st2 { v1.2s, v2.2s }, [x27], #16 -# CHECK-NEXT: 1. 1 5.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 5.0 0.0 0.0 st2 { v1.4h, v2.4h }, [x27], #16 -# CHECK-NEXT: 3. 1 9.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 9.0 0.0 0.0 st2 { v1.4s, v2.4s }, [x27], #32 -# CHECK-NEXT: 5. 1 13.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 12.0 0.0 0.0 st2 { v1.8b, v2.8b }, [x27], #16 -# CHECK-NEXT: 7. 1 16.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 16.0 0.0 0.0 st2 { v1.8h, v2.8h }, [x27], #32 -# CHECK-NEXT: 9. 1 20.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 10.6 0.1 0.0 +# CHECK-NEXT: 1. 1 2.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st2 { v1.4h, v2.4h }, [x27], #16 +# CHECK-NEXT: 3. 1 3.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 3.0 0.0 0.0 st2 { v1.4s, v2.4s }, [x27], #32 +# CHECK-NEXT: 5. 1 4.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 3.0 0.0 0.0 st2 { v1.8b, v2.8b }, [x27], #16 +# CHECK-NEXT: 7. 1 4.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 4.0 0.0 0.0 st2 { v1.8h, v2.8h }, [x27], #32 +# CHECK-NEXT: 9. 1 5.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 1 3.1 0.1 1.0 # CHECK: [68] Code Region - G69 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 2004 +# CHECK-NEXT: Total Cycles: 506 # CHECK-NEXT: Total uOps: 2600 # CHECK: Dispatch Width: 15 -# CHECK-NEXT: uOps Per Cycle: 1.30 -# CHECK-NEXT: IPC: 0.50 +# CHECK-NEXT: uOps Per Cycle: 5.14 +# CHECK-NEXT: IPC: 1.98 # CHECK-NEXT: Block RThroughput: 4.0 # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 -# CHECK-NEXT: Index 0123456789 0123 - -# CHECK: [0,0] DeeeeER . . . . st2 { v1.16b, v2.16b }, [x27], #32 -# CHECK-NEXT: [0,1] D====eER . . . . add x0, x27, #1 -# CHECK-NEXT: [0,2] D====eeeeER . . . st2 { v1.2d, v2.2d }, [x27], x28 -# CHECK-NEXT: [0,3] D========eER . . . add x0, x27, #1 -# CHECK-NEXT: [0,4] D========eeeeER. . . st2 { v1.2s, v2.2s }, [x27], x28 -# CHECK-NEXT: [0,5] .D===========eER . . add x0, x27, #1 -# CHECK-NEXT: [0,6] .D===========eeeeER . . st2 { v1.4h, v2.4h }, [x27], x28 -# CHECK-NEXT: [0,7] .D===============eER. . add x0, x27, #1 -# CHECK-NEXT: [0,8] .D===============eeeeER. st2 { v1.4s, v2.4s }, [x27], x28 -# CHECK-NEXT: [0,9] .D===================eER add x0, x27, #1 +# CHECK-NEXT: 0 +# CHECK-NEXT: Index 0123456789 + +# CHECK: [0,0] DeeeeER . st2 { v1.16b, v2.16b }, [x27], #32 +# CHECK-NEXT: [0,1] D=eE--R . add x0, x27, #1 +# CHECK-NEXT: [0,2] D=eeeeER . st2 { v1.2d, v2.2d }, [x27], x28 +# CHECK-NEXT: [0,3] D==eE--R . add x0, x27, #1 +# CHECK-NEXT: [0,4] D==eeeeER . st2 { v1.2s, v2.2s }, [x27], x28 +# CHECK-NEXT: [0,5] .D==eE--R . add x0, x27, #1 +# CHECK-NEXT: [0,6] .D==eeeeER. st2 { v1.4h, v2.4h }, [x27], x28 +# CHECK-NEXT: [0,7] .D===eE--R. add x0, x27, #1 +# CHECK-NEXT: [0,8] .D===eeeeER st2 { v1.4s, v2.4s }, [x27], x28 +# CHECK-NEXT: [0,9] .D====eE--R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -4321,43 +4310,43 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st2 { v1.16b, v2.16b }, [x27], #32 -# CHECK-NEXT: 1. 1 5.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 5.0 0.0 0.0 st2 { v1.2d, v2.2d }, [x27], x28 -# CHECK-NEXT: 3. 1 9.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 9.0 0.0 0.0 st2 { v1.2s, v2.2s }, [x27], x28 -# CHECK-NEXT: 5. 1 12.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 12.0 0.0 0.0 st2 { v1.4h, v2.4h }, [x27], x28 -# CHECK-NEXT: 7. 1 16.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 16.0 0.0 0.0 st2 { v1.4s, v2.4s }, [x27], x28 -# CHECK-NEXT: 9. 1 20.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 10.5 0.1 0.0 +# CHECK-NEXT: 1. 1 2.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st2 { v1.2d, v2.2d }, [x27], x28 +# CHECK-NEXT: 3. 1 3.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 3.0 0.0 0.0 st2 { v1.2s, v2.2s }, [x27], x28 +# CHECK-NEXT: 5. 1 3.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 3.0 0.0 0.0 st2 { v1.4h, v2.4h }, [x27], x28 +# CHECK-NEXT: 7. 1 4.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 4.0 0.0 0.0 st2 { v1.4s, v2.4s }, [x27], x28 +# CHECK-NEXT: 9. 1 5.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 1 3.0 0.1 1.0 # CHECK: [69] Code Region - G70 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 2004 +# CHECK-NEXT: Total Cycles: 506 # CHECK-NEXT: Total uOps: 2400 # CHECK: Dispatch Width: 15 -# CHECK-NEXT: uOps Per Cycle: 1.20 -# CHECK-NEXT: IPC: 0.50 +# CHECK-NEXT: uOps Per Cycle: 4.74 +# CHECK-NEXT: IPC: 1.98 # CHECK-NEXT: Block RThroughput: 3.5 # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 -# CHECK-NEXT: Index 0123456789 0123 - -# CHECK: [0,0] DeeeeER . . . . st2 { v1.8b, v2.8b }, [x27], x28 -# CHECK-NEXT: [0,1] D====eER . . . . add x0, x27, #1 -# CHECK-NEXT: [0,2] D====eeeeER . . . st2 { v1.8h, v2.8h }, [x27], x28 -# CHECK-NEXT: [0,3] D========eER . . . add x0, x27, #1 -# CHECK-NEXT: [0,4] D========eeeeER. . . st2 { v1.16b, v2.16b }, [x27], x28 -# CHECK-NEXT: [0,5] .D===========eER . . add x0, x27, #1 -# CHECK-NEXT: [0,6] .D===========eeeeER . . st2 { v1.b, v2.b }[0], [x27], #2 -# CHECK-NEXT: [0,7] .D===============eER. . add x0, x27, #1 -# CHECK-NEXT: [0,8] .D===============eeeeER. st2 { v1.b, v2.b }[8], [x27], #2 -# CHECK-NEXT: [0,9] .D===================eER add x0, x27, #1 +# CHECK-NEXT: 0 +# CHECK-NEXT: Index 0123456789 + +# CHECK: [0,0] DeeeeER . st2 { v1.8b, v2.8b }, [x27], x28 +# CHECK-NEXT: [0,1] D=eE--R . add x0, x27, #1 +# CHECK-NEXT: [0,2] D=eeeeER . st2 { v1.8h, v2.8h }, [x27], x28 +# CHECK-NEXT: [0,3] D==eE--R . add x0, x27, #1 +# CHECK-NEXT: [0,4] D==eeeeER . st2 { v1.16b, v2.16b }, [x27], x28 +# CHECK-NEXT: [0,5] .D==eE--R . add x0, x27, #1 +# CHECK-NEXT: [0,6] .D==eeeeER. st2 { v1.b, v2.b }[0], [x27], #2 +# CHECK-NEXT: [0,7] .D===eE--R. add x0, x27, #1 +# CHECK-NEXT: [0,8] .D===eeeeER st2 { v1.b, v2.b }[8], [x27], #2 +# CHECK-NEXT: [0,9] .D====eE--R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -4367,43 +4356,43 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st2 { v1.8b, v2.8b }, [x27], x28 -# CHECK-NEXT: 1. 1 5.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 5.0 0.0 0.0 st2 { v1.8h, v2.8h }, [x27], x28 -# CHECK-NEXT: 3. 1 9.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 9.0 0.0 0.0 st2 { v1.16b, v2.16b }, [x27], x28 -# CHECK-NEXT: 5. 1 12.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 12.0 0.0 0.0 st2 { v1.b, v2.b }[0], [x27], #2 -# CHECK-NEXT: 7. 1 16.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 16.0 0.0 0.0 st2 { v1.b, v2.b }[8], [x27], #2 -# CHECK-NEXT: 9. 1 20.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 10.5 0.1 0.0 +# CHECK-NEXT: 1. 1 2.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st2 { v1.8h, v2.8h }, [x27], x28 +# CHECK-NEXT: 3. 1 3.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 3.0 0.0 0.0 st2 { v1.16b, v2.16b }, [x27], x28 +# CHECK-NEXT: 5. 1 3.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 3.0 0.0 0.0 st2 { v1.b, v2.b }[0], [x27], #2 +# CHECK-NEXT: 7. 1 4.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 4.0 0.0 0.0 st2 { v1.b, v2.b }[8], [x27], #2 +# CHECK-NEXT: 9. 1 5.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 1 3.0 0.1 1.0 # CHECK: [70] Code Region - G71 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 2004 +# CHECK-NEXT: Total Cycles: 506 # CHECK-NEXT: Total uOps: 2000 # CHECK: Dispatch Width: 15 -# CHECK-NEXT: uOps Per Cycle: 1.00 -# CHECK-NEXT: IPC: 0.50 +# CHECK-NEXT: uOps Per Cycle: 3.95 +# CHECK-NEXT: IPC: 1.98 # CHECK-NEXT: Block RThroughput: 2.5 # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 -# CHECK-NEXT: Index 0123456789 0123 - -# CHECK: [0,0] DeeeeER . . . . st2 { v1.b, v2.b }[0], [x27], x28 -# CHECK-NEXT: [0,1] D====eER . . . . add x0, x27, #1 -# CHECK-NEXT: [0,2] D====eeeeER . . . st2 { v1.b, v2.b }[8], [x27], x28 -# CHECK-NEXT: [0,3] D========eER . . . add x0, x27, #1 -# CHECK-NEXT: [0,4] D========eeeeER. . . st2 { v1.h, v2.h }[0], [x27], #4 -# CHECK-NEXT: [0,5] D============eER . . add x0, x27, #1 -# CHECK-NEXT: [0,6] D============eeeeER . . st2 { v1.h, v2.h }[4], [x27], #4 -# CHECK-NEXT: [0,7] .D===============eER. . add x0, x27, #1 -# CHECK-NEXT: [0,8] .D===============eeeeER. st2 { v1.h, v2.h }[0], [x27], x28 -# CHECK-NEXT: [0,9] .D===================eER add x0, x27, #1 +# CHECK-NEXT: 0 +# CHECK-NEXT: Index 0123456789 + +# CHECK: [0,0] DeeeeER . st2 { v1.b, v2.b }[0], [x27], x28 +# CHECK-NEXT: [0,1] D=eE--R . add x0, x27, #1 +# CHECK-NEXT: [0,2] D=eeeeER . st2 { v1.b, v2.b }[8], [x27], x28 +# CHECK-NEXT: [0,3] D==eE--R . add x0, x27, #1 +# CHECK-NEXT: [0,4] D==eeeeER . st2 { v1.h, v2.h }[0], [x27], #4 +# CHECK-NEXT: [0,5] D===eE--R . add x0, x27, #1 +# CHECK-NEXT: [0,6] D===eeeeER. st2 { v1.h, v2.h }[4], [x27], #4 +# CHECK-NEXT: [0,7] .D===eE--R. add x0, x27, #1 +# CHECK-NEXT: [0,8] .D===eeeeER st2 { v1.h, v2.h }[0], [x27], x28 +# CHECK-NEXT: [0,9] .D====eE--R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -4413,43 +4402,43 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st2 { v1.b, v2.b }[0], [x27], x28 -# CHECK-NEXT: 1. 1 5.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 5.0 0.0 0.0 st2 { v1.b, v2.b }[8], [x27], x28 -# CHECK-NEXT: 3. 1 9.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 9.0 0.0 0.0 st2 { v1.h, v2.h }[0], [x27], #4 -# CHECK-NEXT: 5. 1 13.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 13.0 0.0 0.0 st2 { v1.h, v2.h }[4], [x27], #4 -# CHECK-NEXT: 7. 1 16.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 16.0 0.0 0.0 st2 { v1.h, v2.h }[0], [x27], x28 -# CHECK-NEXT: 9. 1 20.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 10.7 0.1 0.0 +# CHECK-NEXT: 1. 1 2.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st2 { v1.b, v2.b }[8], [x27], x28 +# CHECK-NEXT: 3. 1 3.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 3.0 0.0 0.0 st2 { v1.h, v2.h }[0], [x27], #4 +# CHECK-NEXT: 5. 1 4.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 4.0 0.0 0.0 st2 { v1.h, v2.h }[4], [x27], #4 +# CHECK-NEXT: 7. 1 4.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 4.0 0.0 0.0 st2 { v1.h, v2.h }[0], [x27], x28 +# CHECK-NEXT: 9. 1 5.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 1 3.2 0.1 1.0 # CHECK: [71] Code Region - G72 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 2004 +# CHECK-NEXT: Total Cycles: 506 # CHECK-NEXT: Total uOps: 2000 # CHECK: Dispatch Width: 15 -# CHECK-NEXT: uOps Per Cycle: 1.00 -# CHECK-NEXT: IPC: 0.50 +# CHECK-NEXT: uOps Per Cycle: 3.95 +# CHECK-NEXT: IPC: 1.98 # CHECK-NEXT: Block RThroughput: 2.5 # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 -# CHECK-NEXT: Index 0123456789 0123 - -# CHECK: [0,0] DeeeeER . . . . st2 { v1.h, v2.h }[4], [x27], x28 -# CHECK-NEXT: [0,1] D====eER . . . . add x0, x27, #1 -# CHECK-NEXT: [0,2] D====eeeeER . . . st2 { v1.s, v2.s }[0], [x27], #8 -# CHECK-NEXT: [0,3] D========eER . . . add x0, x27, #1 -# CHECK-NEXT: [0,4] D========eeeeER. . . st2 { v1.s, v2.s }[0], [x27], x28 -# CHECK-NEXT: [0,5] D============eER . . add x0, x27, #1 -# CHECK-NEXT: [0,6] D============eeeeER . . st2 { v1.d, v2.d }[0], [x27], #16 -# CHECK-NEXT: [0,7] .D===============eER. . add x0, x27, #1 -# CHECK-NEXT: [0,8] .D===============eeeeER. st2 { v1.d, v2.d }[0], [x27], x28 -# CHECK-NEXT: [0,9] .D===================eER add x0, x27, #1 +# CHECK-NEXT: 0 +# CHECK-NEXT: Index 0123456789 + +# CHECK: [0,0] DeeeeER . st2 { v1.h, v2.h }[4], [x27], x28 +# CHECK-NEXT: [0,1] D=eE--R . add x0, x27, #1 +# CHECK-NEXT: [0,2] D=eeeeER . st2 { v1.s, v2.s }[0], [x27], #8 +# CHECK-NEXT: [0,3] D==eE--R . add x0, x27, #1 +# CHECK-NEXT: [0,4] D==eeeeER . st2 { v1.s, v2.s }[0], [x27], x28 +# CHECK-NEXT: [0,5] D===eE--R . add x0, x27, #1 +# CHECK-NEXT: [0,6] D===eeeeER. st2 { v1.d, v2.d }[0], [x27], #16 +# CHECK-NEXT: [0,7] .D===eE--R. add x0, x27, #1 +# CHECK-NEXT: [0,8] .D===eeeeER st2 { v1.d, v2.d }[0], [x27], x28 +# CHECK-NEXT: [0,9] .D====eE--R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -4459,39 +4448,38 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st2 { v1.h, v2.h }[4], [x27], x28 -# CHECK-NEXT: 1. 1 5.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 5.0 0.0 0.0 st2 { v1.s, v2.s }[0], [x27], #8 -# CHECK-NEXT: 3. 1 9.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 9.0 0.0 0.0 st2 { v1.s, v2.s }[0], [x27], x28 -# CHECK-NEXT: 5. 1 13.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 13.0 0.0 0.0 st2 { v1.d, v2.d }[0], [x27], #16 -# CHECK-NEXT: 7. 1 16.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 16.0 0.0 0.0 st2 { v1.d, v2.d }[0], [x27], x28 -# CHECK-NEXT: 9. 1 20.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 10.7 0.1 0.0 +# CHECK-NEXT: 1. 1 2.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st2 { v1.s, v2.s }[0], [x27], #8 +# CHECK-NEXT: 3. 1 3.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 3.0 0.0 0.0 st2 { v1.s, v2.s }[0], [x27], x28 +# CHECK-NEXT: 5. 1 4.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 4.0 0.0 0.0 st2 { v1.d, v2.d }[0], [x27], #16 +# CHECK-NEXT: 7. 1 4.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 4.0 0.0 0.0 st2 { v1.d, v2.d }[0], [x27], x28 +# CHECK-NEXT: 9. 1 5.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 1 3.2 0.1 1.0 # CHECK: [72] Code Region - G73 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 600 -# CHECK-NEXT: Total Cycles: 1304 +# CHECK-NEXT: Total Cycles: 406 # CHECK-NEXT: Total uOps: 2000 # CHECK: Dispatch Width: 15 -# CHECK-NEXT: uOps Per Cycle: 1.53 -# CHECK-NEXT: IPC: 0.46 +# CHECK-NEXT: uOps Per Cycle: 4.93 +# CHECK-NEXT: IPC: 1.48 # CHECK-NEXT: Block RThroughput: 3.5 # CHECK: Timeline view: -# CHECK-NEXT: 0123456 # CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeeeER . .. st3 { v1.2d, v2.2d, v3.2d }, [x27], #48 -# CHECK-NEXT: [0,1] D=====eER . .. add x0, x27, #1 -# CHECK-NEXT: [0,2] D=====eeeeER .. st3 { v1.2s, v2.2s, v3.2s }, [x27], #24 -# CHECK-NEXT: [0,3] D=========eER .. add x0, x27, #1 -# CHECK-NEXT: [0,4] .D========eeeeER. st3 { v1.4h, v2.4h, v3.4h }, [x27], #24 -# CHECK-NEXT: [0,5] .D============eER add x0, x27, #1 +# CHECK: [0,0] DeeeeeER . st3 { v1.2d, v2.2d, v3.2d }, [x27], #48 +# CHECK-NEXT: [0,1] D=eE---R . add x0, x27, #1 +# CHECK-NEXT: [0,2] D=eeeeER . st3 { v1.2s, v2.2s, v3.2s }, [x27], #24 +# CHECK-NEXT: [0,3] D==eE--R . add x0, x27, #1 +# CHECK-NEXT: [0,4] .D==eeeeER st3 { v1.4h, v2.4h, v3.4h }, [x27], #24 +# CHECK-NEXT: [0,5] .D===eE--R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -4501,39 +4489,39 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st3 { v1.2d, v2.2d, v3.2d }, [x27], #48 -# CHECK-NEXT: 1. 1 6.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 6.0 0.0 0.0 st3 { v1.2s, v2.2s, v3.2s }, [x27], #24 -# CHECK-NEXT: 3. 1 10.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 9.0 0.0 0.0 st3 { v1.4h, v2.4h, v3.4h }, [x27], #24 -# CHECK-NEXT: 5. 1 13.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 7.5 0.2 0.0 +# CHECK-NEXT: 1. 1 2.0 0.0 3.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st3 { v1.2s, v2.2s, v3.2s }, [x27], #24 +# CHECK-NEXT: 3. 1 3.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 3.0 1.0 0.0 st3 { v1.4h, v2.4h, v3.4h }, [x27], #24 +# CHECK-NEXT: 5. 1 4.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 1 2.5 0.3 1.2 # CHECK: [73] Code Region - G74 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 2404 +# CHECK-NEXT: Total Cycles: 707 # CHECK-NEXT: Total uOps: 3800 # CHECK: Dispatch Width: 15 -# CHECK-NEXT: uOps Per Cycle: 1.58 -# CHECK-NEXT: IPC: 0.42 +# CHECK-NEXT: uOps Per Cycle: 5.37 +# CHECK-NEXT: IPC: 1.41 # CHECK-NEXT: Block RThroughput: 7.0 # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 -# CHECK-NEXT: Index 0123456789 01234567 - -# CHECK: [0,0] DeeeeeER . . . . . st3 { v1.4s, v2.4s, v3.4s }, [x27], #48 -# CHECK-NEXT: [0,1] D=====eER . . . . . add x0, x27, #1 -# CHECK-NEXT: [0,2] D=====eeeeER . . . . st3 { v1.8b, v2.8b, v3.8b }, [x27], #24 -# CHECK-NEXT: [0,3] D=========eER . . . . add x0, x27, #1 -# CHECK-NEXT: [0,4] .D========eeeeeER . . . st3 { v1.8h, v2.8h, v3.8h }, [x27], #48 -# CHECK-NEXT: [0,5] .D=============eER . . . add x0, x27, #1 -# CHECK-NEXT: [0,6] .D=============eeeeeER . . st3 { v1.16b, v2.16b, v3.16b }, [x27], #48 -# CHECK-NEXT: [0,7] . D=================eER . . add x0, x27, #1 -# CHECK-NEXT: [0,8] . D=================eeeeeER. st3 { v1.2d, v2.2d, v3.2d }, [x27], x28 -# CHECK-NEXT: [0,9] . D======================eER add x0, x27, #1 +# CHECK-NEXT: 0123 +# CHECK-NEXT: Index 0123456789 + +# CHECK: [0,0] DeeeeeER . . st3 { v1.4s, v2.4s, v3.4s }, [x27], #48 +# CHECK-NEXT: [0,1] D=eE---R . . add x0, x27, #1 +# CHECK-NEXT: [0,2] D=eeeeER . . st3 { v1.8b, v2.8b, v3.8b }, [x27], #24 +# CHECK-NEXT: [0,3] D==eE--R . . add x0, x27, #1 +# CHECK-NEXT: [0,4] .D==eeeeeER . st3 { v1.8h, v2.8h, v3.8h }, [x27], #48 +# CHECK-NEXT: [0,5] .D===eE---R . add x0, x27, #1 +# CHECK-NEXT: [0,6] .D===eeeeeER . st3 { v1.16b, v2.16b, v3.16b }, [x27], #48 +# CHECK-NEXT: [0,7] . D===eE---R . add x0, x27, #1 +# CHECK-NEXT: [0,8] . D====eeeeeER st3 { v1.2d, v2.2d, v3.2d }, [x27], x28 +# CHECK-NEXT: [0,9] . D=====eE---R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -4543,43 +4531,43 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st3 { v1.4s, v2.4s, v3.4s }, [x27], #48 -# CHECK-NEXT: 1. 1 6.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 6.0 0.0 0.0 st3 { v1.8b, v2.8b, v3.8b }, [x27], #24 -# CHECK-NEXT: 3. 1 10.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 9.0 0.0 0.0 st3 { v1.8h, v2.8h, v3.8h }, [x27], #48 -# CHECK-NEXT: 5. 1 14.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 14.0 0.0 0.0 st3 { v1.16b, v2.16b, v3.16b }, [x27], #48 -# CHECK-NEXT: 7. 1 18.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 18.0 0.0 0.0 st3 { v1.2d, v2.2d, v3.2d }, [x27], x28 -# CHECK-NEXT: 9. 1 23.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 11.9 0.1 0.0 +# CHECK-NEXT: 1. 1 2.0 0.0 3.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st3 { v1.8b, v2.8b, v3.8b }, [x27], #24 +# CHECK-NEXT: 3. 1 3.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 3.0 1.0 0.0 st3 { v1.8h, v2.8h, v3.8h }, [x27], #48 +# CHECK-NEXT: 5. 1 4.0 0.0 3.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 4.0 0.0 0.0 st3 { v1.16b, v2.16b, v3.16b }, [x27], #48 +# CHECK-NEXT: 7. 1 4.0 0.0 3.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 5.0 1.0 0.0 st3 { v1.2d, v2.2d, v3.2d }, [x27], x28 +# CHECK-NEXT: 9. 1 6.0 0.0 3.0 add x0, x27, #1 +# CHECK-NEXT: 1 3.4 0.3 1.4 # CHECK: [74] Code Region - G75 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 2204 +# CHECK-NEXT: Total Cycles: 706 # CHECK-NEXT: Total uOps: 3400 # CHECK: Dispatch Width: 15 -# CHECK-NEXT: uOps Per Cycle: 1.54 -# CHECK-NEXT: IPC: 0.45 +# CHECK-NEXT: uOps Per Cycle: 4.82 +# CHECK-NEXT: IPC: 1.42 # CHECK-NEXT: Block RThroughput: 6.0 # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 -# CHECK-NEXT: Index 0123456789 012345 - -# CHECK: [0,0] DeeeeER . . . . st3 { v1.2s, v2.2s, v3.2s }, [x27], x28 -# CHECK-NEXT: [0,1] D====eER . . . . add x0, x27, #1 -# CHECK-NEXT: [0,2] D====eeeeER . . . st3 { v1.4h, v2.4h, v3.4h }, [x27], x28 -# CHECK-NEXT: [0,3] D========eER . . . add x0, x27, #1 -# CHECK-NEXT: [0,4] .D=======eeeeeER . . st3 { v1.4s, v2.4s, v3.4s }, [x27], x28 -# CHECK-NEXT: [0,5] .D============eER . . add x0, x27, #1 -# CHECK-NEXT: [0,6] .D============eeeeER. . st3 { v1.8b, v2.8b, v3.8b }, [x27], x28 -# CHECK-NEXT: [0,7] .D================eER . add x0, x27, #1 -# CHECK-NEXT: [0,8] . D===============eeeeeER. st3 { v1.8h, v2.8h, v3.8h }, [x27], x28 -# CHECK-NEXT: [0,9] . D====================eER add x0, x27, #1 +# CHECK-NEXT: 012 +# CHECK-NEXT: Index 0123456789 + +# CHECK: [0,0] DeeeeER . . st3 { v1.2s, v2.2s, v3.2s }, [x27], x28 +# CHECK-NEXT: [0,1] D=eE--R . . add x0, x27, #1 +# CHECK-NEXT: [0,2] D=eeeeER . . st3 { v1.4h, v2.4h, v3.4h }, [x27], x28 +# CHECK-NEXT: [0,3] D==eE--R . . add x0, x27, #1 +# CHECK-NEXT: [0,4] .D=eeeeeER. . st3 { v1.4s, v2.4s, v3.4s }, [x27], x28 +# CHECK-NEXT: [0,5] .D==eE---R. . add x0, x27, #1 +# CHECK-NEXT: [0,6] .D==eeeeER. . st3 { v1.8b, v2.8b, v3.8b }, [x27], x28 +# CHECK-NEXT: [0,7] .D===eE--R. . add x0, x27, #1 +# CHECK-NEXT: [0,8] . D===eeeeeER st3 { v1.8h, v2.8h, v3.8h }, [x27], x28 +# CHECK-NEXT: [0,9] . D====eE---R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -4589,43 +4577,43 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st3 { v1.2s, v2.2s, v3.2s }, [x27], x28 -# CHECK-NEXT: 1. 1 5.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 5.0 0.0 0.0 st3 { v1.4h, v2.4h, v3.4h }, [x27], x28 -# CHECK-NEXT: 3. 1 9.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 8.0 0.0 0.0 st3 { v1.4s, v2.4s, v3.4s }, [x27], x28 -# CHECK-NEXT: 5. 1 13.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 13.0 0.0 0.0 st3 { v1.8b, v2.8b, v3.8b }, [x27], x28 -# CHECK-NEXT: 7. 1 17.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 16.0 0.0 0.0 st3 { v1.8h, v2.8h, v3.8h }, [x27], x28 -# CHECK-NEXT: 9. 1 21.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 10.8 0.1 0.0 +# CHECK-NEXT: 1. 1 2.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st3 { v1.4h, v2.4h, v3.4h }, [x27], x28 +# CHECK-NEXT: 3. 1 3.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 2.0 0.0 0.0 st3 { v1.4s, v2.4s, v3.4s }, [x27], x28 +# CHECK-NEXT: 5. 1 3.0 0.0 3.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 3.0 0.0 0.0 st3 { v1.8b, v2.8b, v3.8b }, [x27], x28 +# CHECK-NEXT: 7. 1 4.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 4.0 1.0 0.0 st3 { v1.8h, v2.8h, v3.8h }, [x27], x28 +# CHECK-NEXT: 9. 1 5.0 0.0 3.0 add x0, x27, #1 +# CHECK-NEXT: 1 2.9 0.2 1.2 # CHECK: [75] Code Region - G76 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 2104 +# CHECK-NEXT: Total Cycles: 606 # CHECK-NEXT: Total uOps: 3200 # CHECK: Dispatch Width: 15 -# CHECK-NEXT: uOps Per Cycle: 1.52 -# CHECK-NEXT: IPC: 0.48 +# CHECK-NEXT: uOps Per Cycle: 5.28 +# CHECK-NEXT: IPC: 1.65 # CHECK-NEXT: Block RThroughput: 5.5 # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 -# CHECK-NEXT: Index 0123456789 01234 - -# CHECK: [0,0] DeeeeeER . . . . st3 { v1.16b, v2.16b, v3.16b }, [x27], x28 -# CHECK-NEXT: [0,1] D=====eER . . . . add x0, x27, #1 -# CHECK-NEXT: [0,2] D=====eeeeER . . . st3 { v1.b, v2.b, v3.b }[0], [x27], #3 -# CHECK-NEXT: [0,3] D=========eER . . . add x0, x27, #1 -# CHECK-NEXT: [0,4] .D========eeeeER . . st3 { v1.b, v2.b, v3.b }[8], [x27], #3 -# CHECK-NEXT: [0,5] .D============eER . . add x0, x27, #1 -# CHECK-NEXT: [0,6] .D============eeeeER. . st3 { v1.b, v2.b, v3.b }[0], [x27], x28 -# CHECK-NEXT: [0,7] .D================eER . add x0, x27, #1 -# CHECK-NEXT: [0,8] . D===============eeeeER. st3 { v1.b, v2.b, v3.b }[8], [x27], x28 -# CHECK-NEXT: [0,9] . D===================eER add x0, x27, #1 +# CHECK-NEXT: 01 +# CHECK-NEXT: Index 0123456789 + +# CHECK: [0,0] DeeeeeER .. st3 { v1.16b, v2.16b, v3.16b }, [x27], x28 +# CHECK-NEXT: [0,1] D=eE---R .. add x0, x27, #1 +# CHECK-NEXT: [0,2] D=eeeeER .. st3 { v1.b, v2.b, v3.b }[0], [x27], #3 +# CHECK-NEXT: [0,3] D==eE--R .. add x0, x27, #1 +# CHECK-NEXT: [0,4] .D==eeeeER.. st3 { v1.b, v2.b, v3.b }[8], [x27], #3 +# CHECK-NEXT: [0,5] .D===eE--R.. add x0, x27, #1 +# CHECK-NEXT: [0,6] .D===eeeeER. st3 { v1.b, v2.b, v3.b }[0], [x27], x28 +# CHECK-NEXT: [0,7] .D====eE--R. add x0, x27, #1 +# CHECK-NEXT: [0,8] . D===eeeeER st3 { v1.b, v2.b, v3.b }[8], [x27], x28 +# CHECK-NEXT: [0,9] . D====eE--R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -4635,43 +4623,43 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st3 { v1.16b, v2.16b, v3.16b }, [x27], x28 -# CHECK-NEXT: 1. 1 6.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 6.0 0.0 0.0 st3 { v1.b, v2.b, v3.b }[0], [x27], #3 -# CHECK-NEXT: 3. 1 10.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 9.0 0.0 0.0 st3 { v1.b, v2.b, v3.b }[8], [x27], #3 -# CHECK-NEXT: 5. 1 13.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 13.0 0.0 0.0 st3 { v1.b, v2.b, v3.b }[0], [x27], x28 -# CHECK-NEXT: 7. 1 17.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 16.0 0.0 0.0 st3 { v1.b, v2.b, v3.b }[8], [x27], x28 -# CHECK-NEXT: 9. 1 20.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 11.1 0.1 0.0 +# CHECK-NEXT: 1. 1 2.0 0.0 3.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st3 { v1.b, v2.b, v3.b }[0], [x27], #3 +# CHECK-NEXT: 3. 1 3.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 3.0 1.0 0.0 st3 { v1.b, v2.b, v3.b }[8], [x27], #3 +# CHECK-NEXT: 5. 1 4.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 4.0 0.0 0.0 st3 { v1.b, v2.b, v3.b }[0], [x27], x28 +# CHECK-NEXT: 7. 1 5.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 4.0 0.0 0.0 st3 { v1.b, v2.b, v3.b }[8], [x27], x28 +# CHECK-NEXT: 9. 1 5.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 1 3.3 0.2 1.1 # CHECK: [76] Code Region - G77 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 2004 +# CHECK-NEXT: Total Cycles: 506 # CHECK-NEXT: Total uOps: 3000 # CHECK: Dispatch Width: 15 -# CHECK-NEXT: uOps Per Cycle: 1.50 -# CHECK-NEXT: IPC: 0.50 +# CHECK-NEXT: uOps Per Cycle: 5.93 +# CHECK-NEXT: IPC: 1.98 # CHECK-NEXT: Block RThroughput: 5.0 # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 -# CHECK-NEXT: Index 0123456789 0123 - -# CHECK: [0,0] DeeeeER . . . . st3 { v1.h, v2.h, v3.h }[0], [x27], #6 -# CHECK-NEXT: [0,1] D====eER . . . . add x0, x27, #1 -# CHECK-NEXT: [0,2] D====eeeeER . . . st3 { v1.h, v2.h, v3.h }[4], [x27], #6 -# CHECK-NEXT: [0,3] D========eER . . . add x0, x27, #1 -# CHECK-NEXT: [0,4] .D=======eeeeER. . . st3 { v1.h, v2.h, v3.h }[0], [x27], x28 -# CHECK-NEXT: [0,5] .D===========eER . . add x0, x27, #1 -# CHECK-NEXT: [0,6] .D===========eeeeER . . st3 { v1.h, v2.h, v3.h }[4], [x27], x28 -# CHECK-NEXT: [0,7] .D===============eER. . add x0, x27, #1 -# CHECK-NEXT: [0,8] . D==============eeeeER. st3 { v1.s, v2.s, v3.s }[0], [x27], #12 -# CHECK-NEXT: [0,9] . D==================eER add x0, x27, #1 +# CHECK-NEXT: 0 +# CHECK-NEXT: Index 0123456789 + +# CHECK: [0,0] DeeeeER . st3 { v1.h, v2.h, v3.h }[0], [x27], #6 +# CHECK-NEXT: [0,1] D=eE--R . add x0, x27, #1 +# CHECK-NEXT: [0,2] D=eeeeER . st3 { v1.h, v2.h, v3.h }[4], [x27], #6 +# CHECK-NEXT: [0,3] D==eE--R . add x0, x27, #1 +# CHECK-NEXT: [0,4] .D=eeeeER . st3 { v1.h, v2.h, v3.h }[0], [x27], x28 +# CHECK-NEXT: [0,5] .D==eE--R . add x0, x27, #1 +# CHECK-NEXT: [0,6] .D==eeeeER. st3 { v1.h, v2.h, v3.h }[4], [x27], x28 +# CHECK-NEXT: [0,7] .D===eE--R. add x0, x27, #1 +# CHECK-NEXT: [0,8] . D==eeeeER st3 { v1.s, v2.s, v3.s }[0], [x27], #12 +# CHECK-NEXT: [0,9] . D===eE--R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -4681,43 +4669,43 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st3 { v1.h, v2.h, v3.h }[0], [x27], #6 -# CHECK-NEXT: 1. 1 5.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 5.0 0.0 0.0 st3 { v1.h, v2.h, v3.h }[4], [x27], #6 -# CHECK-NEXT: 3. 1 9.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 8.0 0.0 0.0 st3 { v1.h, v2.h, v3.h }[0], [x27], x28 -# CHECK-NEXT: 5. 1 12.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 12.0 0.0 0.0 st3 { v1.h, v2.h, v3.h }[4], [x27], x28 -# CHECK-NEXT: 7. 1 16.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 15.0 0.0 0.0 st3 { v1.s, v2.s, v3.s }[0], [x27], #12 -# CHECK-NEXT: 9. 1 19.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 10.2 0.1 0.0 +# CHECK-NEXT: 1. 1 2.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st3 { v1.h, v2.h, v3.h }[4], [x27], #6 +# CHECK-NEXT: 3. 1 3.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 2.0 0.0 0.0 st3 { v1.h, v2.h, v3.h }[0], [x27], x28 +# CHECK-NEXT: 5. 1 3.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 3.0 0.0 0.0 st3 { v1.h, v2.h, v3.h }[4], [x27], x28 +# CHECK-NEXT: 7. 1 4.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 3.0 0.0 0.0 st3 { v1.s, v2.s, v3.s }[0], [x27], #12 +# CHECK-NEXT: 9. 1 4.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 1 2.7 0.1 1.0 # CHECK: [77] Code Region - G78 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 2204 +# CHECK-NEXT: Total Cycles: 706 # CHECK-NEXT: Total uOps: 3600 # CHECK: Dispatch Width: 15 -# CHECK-NEXT: uOps Per Cycle: 1.63 -# CHECK-NEXT: IPC: 0.45 +# CHECK-NEXT: uOps Per Cycle: 5.10 +# CHECK-NEXT: IPC: 1.42 # CHECK-NEXT: Block RThroughput: 6.5 # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 -# CHECK-NEXT: Index 0123456789 012345 - -# CHECK: [0,0] DeeeeER . . . . st3 { v1.s, v2.s, v3.s }[0], [x27], x28 -# CHECK-NEXT: [0,1] D====eER . . . . add x0, x27, #1 -# CHECK-NEXT: [0,2] D====eeeeER . . . st3 { v1.d, v2.d, v3.d }[0], [x27], #24 -# CHECK-NEXT: [0,3] D========eER . . . add x0, x27, #1 -# CHECK-NEXT: [0,4] .D=======eeeeER. . . st3 { v1.d, v2.d, v3.d }[0], [x27], x28 -# CHECK-NEXT: [0,5] .D===========eER . . add x0, x27, #1 -# CHECK-NEXT: [0,6] .D===========eeeeER . . st4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64 -# CHECK-NEXT: [0,7] . D==============eER. . add x0, x27, #1 -# CHECK-NEXT: [0,8] . D==============eeeeeeER. st4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32 -# CHECK-NEXT: [0,9] . D====================eER add x0, x27, #1 +# CHECK-NEXT: 012 +# CHECK-NEXT: Index 0123456789 + +# CHECK: [0,0] DeeeeER . . st3 { v1.s, v2.s, v3.s }[0], [x27], x28 +# CHECK-NEXT: [0,1] D=eE--R . . add x0, x27, #1 +# CHECK-NEXT: [0,2] D=eeeeER . . st3 { v1.d, v2.d, v3.d }[0], [x27], #24 +# CHECK-NEXT: [0,3] D==eE--R . . add x0, x27, #1 +# CHECK-NEXT: [0,4] .D=eeeeER . . st3 { v1.d, v2.d, v3.d }[0], [x27], x28 +# CHECK-NEXT: [0,5] .D==eE--R . . add x0, x27, #1 +# CHECK-NEXT: [0,6] .D==eeeeER. . st4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64 +# CHECK-NEXT: [0,7] . D==eE--R. . add x0, x27, #1 +# CHECK-NEXT: [0,8] . D==eeeeeeER st4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32 +# CHECK-NEXT: [0,9] . D===eE----R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -4727,43 +4715,43 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st3 { v1.s, v2.s, v3.s }[0], [x27], x28 -# CHECK-NEXT: 1. 1 5.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 5.0 0.0 0.0 st3 { v1.d, v2.d, v3.d }[0], [x27], #24 -# CHECK-NEXT: 3. 1 9.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 8.0 0.0 0.0 st3 { v1.d, v2.d, v3.d }[0], [x27], x28 -# CHECK-NEXT: 5. 1 12.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 12.0 0.0 0.0 st4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64 -# CHECK-NEXT: 7. 1 15.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 15.0 0.0 0.0 st4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32 -# CHECK-NEXT: 9. 1 21.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 10.3 0.1 0.0 +# CHECK-NEXT: 1. 1 2.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st3 { v1.d, v2.d, v3.d }[0], [x27], #24 +# CHECK-NEXT: 3. 1 3.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 2.0 0.0 0.0 st3 { v1.d, v2.d, v3.d }[0], [x27], x28 +# CHECK-NEXT: 5. 1 3.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 3.0 0.0 0.0 st4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64 +# CHECK-NEXT: 7. 1 3.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 3.0 0.0 0.0 st4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32 +# CHECK-NEXT: 9. 1 4.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 1 2.6 0.1 1.2 # CHECK: [78] Code Region - G79 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 3304 +# CHECK-NEXT: Total Cycles: 1205 # CHECK-NEXT: Total uOps: 5800 # CHECK: Dispatch Width: 15 -# CHECK-NEXT: uOps Per Cycle: 1.76 -# CHECK-NEXT: IPC: 0.30 +# CHECK-NEXT: uOps Per Cycle: 4.81 +# CHECK-NEXT: IPC: 0.83 # CHECK-NEXT: Block RThroughput: 12.0 # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 0123456 -# CHECK-NEXT: Index 0123456789 0123456789 - -# CHECK: [0,0] DeeeeeeER . . . . . .. st4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32 -# CHECK-NEXT: [0,1] D======eER. . . . . .. add x0, x27, #1 -# CHECK-NEXT: [0,2] .D=====eeeeeeeER . . . .. st4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64 -# CHECK-NEXT: [0,3] .D============eER . . . .. add x0, x27, #1 -# CHECK-NEXT: [0,4] . D===========eeeeeeER . . .. st4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32 -# CHECK-NEXT: [0,5] . D=================eER . . .. add x0, x27, #1 -# CHECK-NEXT: [0,6] . D================eeeeeeeER . .. st4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64 -# CHECK-NEXT: [0,7] . D=======================eER. .. add x0, x27, #1 -# CHECK-NEXT: [0,8] . D======================eeeeeeeER. st4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64 -# CHECK-NEXT: [0,9] . D=============================eER add x0, x27, #1 +# CHECK-NEXT: 0123456 +# CHECK-NEXT: Index 0123456789 + +# CHECK: [0,0] DeeeeeeER . .. st4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32 +# CHECK-NEXT: [0,1] D=eE----R . .. add x0, x27, #1 +# CHECK-NEXT: [0,2] .DeeeeeeeER .. st4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64 +# CHECK-NEXT: [0,3] .D=eE-----R .. add x0, x27, #1 +# CHECK-NEXT: [0,4] . D=eeeeeeER .. st4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32 +# CHECK-NEXT: [0,5] . D==eE----R .. add x0, x27, #1 +# CHECK-NEXT: [0,6] . D===eeeeeeeER. st4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64 +# CHECK-NEXT: [0,7] . D====eE-----R. add x0, x27, #1 +# CHECK-NEXT: [0,8] . D===eeeeeeeER st4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64 +# CHECK-NEXT: [0,9] . D====eE-----R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -4773,43 +4761,43 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32 -# CHECK-NEXT: 1. 1 7.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 6.0 0.0 0.0 st4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64 -# CHECK-NEXT: 3. 1 13.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 12.0 0.0 0.0 st4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32 -# CHECK-NEXT: 5. 1 18.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 17.0 0.0 0.0 st4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64 -# CHECK-NEXT: 7. 1 24.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 23.0 0.0 0.0 st4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64 -# CHECK-NEXT: 9. 1 30.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 15.1 0.1 0.0 +# CHECK-NEXT: 1. 1 2.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 1.0 0.0 0.0 st4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64 +# CHECK-NEXT: 3. 1 2.0 0.0 5.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 2.0 1.0 0.0 st4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32 +# CHECK-NEXT: 5. 1 3.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 4.0 2.0 0.0 st4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64 +# CHECK-NEXT: 7. 1 5.0 0.0 5.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 4.0 0.0 0.0 st4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64 +# CHECK-NEXT: 9. 1 5.0 0.0 5.0 add x0, x27, #1 +# CHECK-NEXT: 1 2.9 0.4 2.3 # CHECK: [79] Code Region - G80 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 2904 +# CHECK-NEXT: Total Cycles: 1006 # CHECK-NEXT: Total uOps: 4800 # CHECK: Dispatch Width: 15 -# CHECK-NEXT: uOps Per Cycle: 1.65 -# CHECK-NEXT: IPC: 0.34 +# CHECK-NEXT: uOps Per Cycle: 4.77 +# CHECK-NEXT: IPC: 0.99 # CHECK-NEXT: Block RThroughput: 9.5 # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 012 -# CHECK-NEXT: Index 0123456789 0123456789 - -# CHECK: [0,0] DeeeeER . . . . . . st4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28 -# CHECK-NEXT: [0,1] D====eER . . . . . . add x0, x27, #1 -# CHECK-NEXT: [0,2] .D===eeeeeeER . . . . . st4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28 -# CHECK-NEXT: [0,3] .D=========eER . . . . . add x0, x27, #1 -# CHECK-NEXT: [0,4] .D=========eeeeeeER . . . . st4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28 -# CHECK-NEXT: [0,5] . D==============eER. . . . add x0, x27, #1 -# CHECK-NEXT: [0,6] . D==============eeeeeeeER . . st4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28 -# CHECK-NEXT: [0,7] . D=====================eER . . add x0, x27, #1 -# CHECK-NEXT: [0,8] . D====================eeeeeeER. st4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28 -# CHECK-NEXT: [0,9] . D==========================eER add x0, x27, #1 +# CHECK-NEXT: 012345 +# CHECK-NEXT: Index 0123456789 + +# CHECK: [0,0] DeeeeER . . st4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28 +# CHECK-NEXT: [0,1] D=eE--R . . add x0, x27, #1 +# CHECK-NEXT: [0,2] .DeeeeeeER. . st4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28 +# CHECK-NEXT: [0,3] .D=eE----R. . add x0, x27, #1 +# CHECK-NEXT: [0,4] .D===eeeeeeER . st4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28 +# CHECK-NEXT: [0,5] . D===eE----R . add x0, x27, #1 +# CHECK-NEXT: [0,6] . D===eeeeeeeER. st4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28 +# CHECK-NEXT: [0,7] . D====eE-----R. add x0, x27, #1 +# CHECK-NEXT: [0,8] . D====eeeeeeER st4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28 +# CHECK-NEXT: [0,9] . D=====eE----R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -4819,43 +4807,43 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28 -# CHECK-NEXT: 1. 1 5.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 4.0 0.0 0.0 st4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28 -# CHECK-NEXT: 3. 1 10.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 10.0 0.0 0.0 st4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28 -# CHECK-NEXT: 5. 1 15.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 15.0 0.0 0.0 st4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28 -# CHECK-NEXT: 7. 1 22.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 21.0 0.0 0.0 st4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28 -# CHECK-NEXT: 9. 1 27.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 13.0 0.1 0.0 +# CHECK-NEXT: 1. 1 2.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 1.0 0.0 0.0 st4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28 +# CHECK-NEXT: 3. 1 2.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 4.0 2.0 0.0 st4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28 +# CHECK-NEXT: 5. 1 4.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 4.0 0.0 0.0 st4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28 +# CHECK-NEXT: 7. 1 5.0 0.0 5.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 5.0 1.0 0.0 st4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28 +# CHECK-NEXT: 9. 1 6.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 1 3.4 0.4 1.9 # CHECK: [80] Code Region - G81 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 3204 +# CHECK-NEXT: Total Cycles: 807 # CHECK-NEXT: Total uOps: 5200 # CHECK: Dispatch Width: 15 -# CHECK-NEXT: uOps Per Cycle: 1.62 -# CHECK-NEXT: IPC: 0.31 +# CHECK-NEXT: uOps Per Cycle: 6.44 +# CHECK-NEXT: IPC: 1.24 # CHECK-NEXT: Block RThroughput: 6.0 # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 012345 -# CHECK-NEXT: Index 0123456789 0123456789 - -# CHECK: [0,0] DeeeeeeeER. . . . . . st4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28 -# CHECK-NEXT: [0,1] D=======eER . . . . . add x0, x27, #1 -# CHECK-NEXT: [0,2] .D======eeeeeeeER . . . . st4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28 -# CHECK-NEXT: [0,3] .D=============eER . . . . add x0, x27, #1 -# CHECK-NEXT: [0,4] . D============eeeeeeER . . . st4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], #4 -# CHECK-NEXT: [0,5] . D==================eER . . . add x0, x27, #1 -# CHECK-NEXT: [0,6] . D==================eeeeeeER . . st4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], #4 -# CHECK-NEXT: [0,7] . D=======================eER. . add x0, x27, #1 -# CHECK-NEXT: [0,8] . D=======================eeeeeeER. st4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], x28 -# CHECK-NEXT: [0,9] . D=============================eER add x0, x27, #1 +# CHECK-NEXT: 01234 +# CHECK-NEXT: Index 0123456789 + +# CHECK: [0,0] DeeeeeeeER. . st4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28 +# CHECK-NEXT: [0,1] D=eE-----R. . add x0, x27, #1 +# CHECK-NEXT: [0,2] .DeeeeeeeER . st4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28 +# CHECK-NEXT: [0,3] .D=eE-----R . add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeeeeeER . st4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], #4 +# CHECK-NEXT: [0,5] . D=eE----R . add x0, x27, #1 +# CHECK-NEXT: [0,6] . D===eeeeeeER. st4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], #4 +# CHECK-NEXT: [0,7] . D===eE----R. add x0, x27, #1 +# CHECK-NEXT: [0,8] . D===eeeeeeER st4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], x28 +# CHECK-NEXT: [0,9] . D====eE----R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -4865,43 +4853,43 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28 -# CHECK-NEXT: 1. 1 8.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 7.0 0.0 0.0 st4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28 -# CHECK-NEXT: 3. 1 14.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 13.0 0.0 0.0 st4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], #4 -# CHECK-NEXT: 5. 1 19.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 19.0 0.0 0.0 st4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], #4 -# CHECK-NEXT: 7. 1 24.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 24.0 0.0 0.0 st4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], x28 -# CHECK-NEXT: 9. 1 30.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 15.9 0.1 0.0 +# CHECK-NEXT: 1. 1 2.0 0.0 5.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 1.0 0.0 0.0 st4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28 +# CHECK-NEXT: 3. 1 2.0 0.0 5.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 0.0 0.0 st4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], #4 +# CHECK-NEXT: 5. 1 2.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 4.0 2.0 0.0 st4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], #4 +# CHECK-NEXT: 7. 1 4.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 4.0 0.0 0.0 st4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], x28 +# CHECK-NEXT: 9. 1 5.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 1 2.6 0.3 2.2 # CHECK: [81] Code Region - G82 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 3004 +# CHECK-NEXT: Total Cycles: 508 # CHECK-NEXT: Total uOps: 4000 # CHECK: Dispatch Width: 15 -# CHECK-NEXT: uOps Per Cycle: 1.33 -# CHECK-NEXT: IPC: 0.33 +# CHECK-NEXT: uOps Per Cycle: 7.87 +# CHECK-NEXT: IPC: 1.97 # CHECK-NEXT: Block RThroughput: 5.0 # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 0123 -# CHECK-NEXT: Index 0123456789 0123456789 - -# CHECK: [0,0] DeeeeeeER . . . . . . st4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], x28 -# CHECK-NEXT: [0,1] D======eER. . . . . . add x0, x27, #1 -# CHECK-NEXT: [0,2] D======eeeeeeER. . . . . st4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], #8 -# CHECK-NEXT: [0,3] .D===========eER . . . . add x0, x27, #1 -# CHECK-NEXT: [0,4] .D===========eeeeeeER . . . st4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], #8 -# CHECK-NEXT: [0,5] .D=================eER . . . add x0, x27, #1 -# CHECK-NEXT: [0,6] . D================eeeeeeER . . st4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], x28 -# CHECK-NEXT: [0,7] . D======================eER . . add x0, x27, #1 -# CHECK-NEXT: [0,8] . D======================eeeeeeER. st4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], x28 -# CHECK-NEXT: [0,9] . D===========================eER add x0, x27, #1 +# CHECK-NEXT: 012 +# CHECK-NEXT: Index 0123456789 + +# CHECK: [0,0] DeeeeeeER . . st4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], x28 +# CHECK-NEXT: [0,1] D=eE----R . . add x0, x27, #1 +# CHECK-NEXT: [0,2] D=eeeeeeER. . st4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], #8 +# CHECK-NEXT: [0,3] .D=eE----R. . add x0, x27, #1 +# CHECK-NEXT: [0,4] .D=eeeeeeER . st4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], #8 +# CHECK-NEXT: [0,5] .D==eE----R . add x0, x27, #1 +# CHECK-NEXT: [0,6] . D=eeeeeeER. st4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], x28 +# CHECK-NEXT: [0,7] . D==eE----R. add x0, x27, #1 +# CHECK-NEXT: [0,8] . D==eeeeeeER st4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], x28 +# CHECK-NEXT: [0,9] . D==eE----R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -4911,41 +4899,41 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], x28 -# CHECK-NEXT: 1. 1 7.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 7.0 0.0 0.0 st4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], #8 -# CHECK-NEXT: 3. 1 12.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 12.0 0.0 0.0 st4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], #8 -# CHECK-NEXT: 5. 1 18.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 17.0 0.0 0.0 st4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], x28 -# CHECK-NEXT: 7. 1 23.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 23.0 0.0 0.0 st4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], x28 -# CHECK-NEXT: 9. 1 28.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 14.8 0.1 0.0 +# CHECK-NEXT: 1. 1 2.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], #8 +# CHECK-NEXT: 3. 1 2.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 2.0 0.0 0.0 st4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], #8 +# CHECK-NEXT: 5. 1 3.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 2.0 0.0 0.0 st4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], x28 +# CHECK-NEXT: 7. 1 3.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 3.0 0.0 0.0 st4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], x28 +# CHECK-NEXT: 9. 1 3.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 1 2.3 0.1 2.0 # CHECK: [82] Code Region - G83 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 800 -# CHECK-NEXT: Total Cycles: 2004 +# CHECK-NEXT: Total Cycles: 506 # CHECK-NEXT: Total uOps: 2800 # CHECK: Dispatch Width: 15 -# CHECK-NEXT: uOps Per Cycle: 1.40 -# CHECK-NEXT: IPC: 0.40 +# CHECK-NEXT: uOps Per Cycle: 5.53 +# CHECK-NEXT: IPC: 1.58 # CHECK-NEXT: Block RThroughput: 2.0 # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 -# CHECK-NEXT: Index 0123456789 0123 +# CHECK-NEXT: 0 +# CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeeeeER . . . . st4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], #16 -# CHECK-NEXT: [0,1] D======eER. . . . add x0, x27, #1 -# CHECK-NEXT: [0,2] D======eeeeeeER. . . st4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], x28 -# CHECK-NEXT: [0,3] .D===========eER . . add x0, x27, #1 -# CHECK-NEXT: [0,4] .D===========eeeeER . . st4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], #32 -# CHECK-NEXT: [0,5] .D===============eER. . add x0, x27, #1 -# CHECK-NEXT: [0,6] .D===============eeeeER. st4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], x28 -# CHECK-NEXT: [0,7] .D===================eER add x0, x27, #1 +# CHECK: [0,0] DeeeeeeER . st4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], #16 +# CHECK-NEXT: [0,1] D=eE----R . add x0, x27, #1 +# CHECK-NEXT: [0,2] D=eeeeeeER. st4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], x28 +# CHECK-NEXT: [0,3] .D=eE----R. add x0, x27, #1 +# CHECK-NEXT: [0,4] .D==eeeeER. st4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], #32 +# CHECK-NEXT: [0,5] .D===eE--R. add x0, x27, #1 +# CHECK-NEXT: [0,6] .D===eeeeER st4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], x28 +# CHECK-NEXT: [0,7] .D====eE--R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -4955,34 +4943,34 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], #16 -# CHECK-NEXT: 1. 1 7.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 7.0 0.0 0.0 st4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], x28 -# CHECK-NEXT: 3. 1 12.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 12.0 0.0 0.0 st4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], #32 -# CHECK-NEXT: 5. 1 16.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 16.0 0.0 0.0 st4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], x28 -# CHECK-NEXT: 7. 1 20.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 11.4 0.1 0.0 +# CHECK-NEXT: 1. 1 2.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], x28 +# CHECK-NEXT: 3. 1 2.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 3.0 1.0 0.0 st4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], #32 +# CHECK-NEXT: 5. 1 4.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 4.0 0.0 0.0 st4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], x28 +# CHECK-NEXT: 7. 1 5.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 1 2.9 0.3 1.5 # CHECK: [83] Code Region - G84 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 400 -# CHECK-NEXT: Total Cycles: 404 +# CHECK-NEXT: Total Cycles: 204 # CHECK-NEXT: Total uOps: 800 # CHECK: Dispatch Width: 15 -# CHECK-NEXT: uOps Per Cycle: 1.98 -# CHECK-NEXT: IPC: 0.99 +# CHECK-NEXT: uOps Per Cycle: 3.92 +# CHECK-NEXT: IPC: 1.96 # CHECK-NEXT: Block RThroughput: 1.0 # CHECK: Timeline view: -# CHECK-NEXT: Index 01234567 +# CHECK-NEXT: Index 012345 -# CHECK: [0,0] DeeER. . stp s1, s2, [x27], #248 -# CHECK-NEXT: [0,1] D==eER . add x0, x27, #1 -# CHECK-NEXT: [0,2] D==eeER. stp d1, d2, [x27], #496 -# CHECK-NEXT: [0,3] D====eER add x0, x27, #1 +# CHECK: [0,0] DeeER. stp s1, s2, [x27], #248 +# CHECK-NEXT: [0,1] D=eER. add x0, x27, #1 +# CHECK-NEXT: [0,2] D=eeER stp d1, d2, [x27], #496 +# CHECK-NEXT: [0,3] D==eER add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -4992,37 +4980,36 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 stp s1, s2, [x27], #248 -# CHECK-NEXT: 1. 1 3.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 3.0 0.0 0.0 stp d1, d2, [x27], #496 -# CHECK-NEXT: 3. 1 5.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 3.0 0.3 0.0 +# CHECK-NEXT: 1. 1 2.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 2.0 0.0 0.0 stp d1, d2, [x27], #496 +# CHECK-NEXT: 3. 1 3.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 1 2.0 0.3 0.0 # CHECK: [84] Code Region - G85 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 904 +# CHECK-NEXT: Total Cycles: 504 # CHECK-NEXT: Total uOps: 2200 # CHECK: Dispatch Width: 15 -# CHECK-NEXT: uOps Per Cycle: 2.43 -# CHECK-NEXT: IPC: 1.11 +# CHECK-NEXT: uOps Per Cycle: 4.37 +# CHECK-NEXT: IPC: 1.98 # CHECK-NEXT: Block RThroughput: 3.5 # CHECK: Timeline view: -# CHECK-NEXT: 012 -# CHECK-NEXT: Index 0123456789 +# CHECK-NEXT: Index 012345678 -# CHECK: [0,0] DeeER. . . stp q1, q2, [x27], #992 -# CHECK-NEXT: [0,1] D==eER . . add x0, x27, #1 -# CHECK-NEXT: [0,2] D==eeER . . stp s1, s2, [x27, #248]! -# CHECK-NEXT: [0,3] D====eER . . add x0, x27, #1 -# CHECK-NEXT: [0,4] D====eeER . . stp d1, d2, [x27, #496]! -# CHECK-NEXT: [0,5] D======eER. . add x0, x27, #1 -# CHECK-NEXT: [0,6] .D=====eeER . stp q1, q2, [x27, #992]! -# CHECK-NEXT: [0,7] .D=======eER. add x0, x27, #1 -# CHECK-NEXT: [0,8] .D=======eER. stp w1, w2, [x27], #248 -# CHECK-NEXT: [0,9] .D========eER add x0, x27, #1 +# CHECK: [0,0] DeeER. . stp q1, q2, [x27], #992 +# CHECK-NEXT: [0,1] D=eER. . add x0, x27, #1 +# CHECK-NEXT: [0,2] D=eeER . stp s1, s2, [x27, #248]! +# CHECK-NEXT: [0,3] D==eER . add x0, x27, #1 +# CHECK-NEXT: [0,4] D==eeER . stp d1, d2, [x27, #496]! +# CHECK-NEXT: [0,5] D===eER . add x0, x27, #1 +# CHECK-NEXT: [0,6] .D==eeER. stp q1, q2, [x27, #992]! +# CHECK-NEXT: [0,7] .D===eER. add x0, x27, #1 +# CHECK-NEXT: [0,8] .D===eER. stp w1, w2, [x27], #248 +# CHECK-NEXT: [0,9] .D====eER add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -5032,43 +5019,42 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 stp q1, q2, [x27], #992 -# CHECK-NEXT: 1. 1 3.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 3.0 0.0 0.0 stp s1, s2, [x27, #248]! -# CHECK-NEXT: 3. 1 5.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 5.0 0.0 0.0 stp d1, d2, [x27, #496]! -# CHECK-NEXT: 5. 1 7.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 6.0 0.0 0.0 stp q1, q2, [x27, #992]! -# CHECK-NEXT: 7. 1 8.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 8.0 0.0 0.0 stp w1, w2, [x27], #248 -# CHECK-NEXT: 9. 1 9.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 5.5 0.1 0.0 +# CHECK-NEXT: 1. 1 2.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 2.0 0.0 0.0 stp s1, s2, [x27, #248]! +# CHECK-NEXT: 3. 1 3.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 3.0 0.0 0.0 stp d1, d2, [x27, #496]! +# CHECK-NEXT: 5. 1 4.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 3.0 0.0 0.0 stp q1, q2, [x27, #992]! +# CHECK-NEXT: 7. 1 4.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 4.0 0.0 0.0 stp w1, w2, [x27], #248 +# CHECK-NEXT: 9. 1 5.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 1 3.1 0.1 0.0 # CHECK: [85] Code Region - G86 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 704 +# CHECK-NEXT: Total Cycles: 504 # CHECK-NEXT: Total uOps: 2000 # CHECK: Dispatch Width: 15 -# CHECK-NEXT: uOps Per Cycle: 2.84 -# CHECK-NEXT: IPC: 1.42 +# CHECK-NEXT: uOps Per Cycle: 3.97 +# CHECK-NEXT: IPC: 1.98 # CHECK-NEXT: Block RThroughput: 2.5 # CHECK: Timeline view: -# CHECK-NEXT: 0 -# CHECK-NEXT: Index 0123456789 +# CHECK-NEXT: Index 012345678 -# CHECK: [0,0] DeER . . stp x1, x2, [x27], #496 -# CHECK-NEXT: [0,1] D=eER. . add x0, x27, #1 -# CHECK-NEXT: [0,2] D=eER. . stp w1, w2, [x27, #248]! -# CHECK-NEXT: [0,3] D==eER . add x0, x27, #1 -# CHECK-NEXT: [0,4] D==eER . stp x1, x2, [x27, #496]! -# CHECK-NEXT: [0,5] D===eER . add x0, x27, #1 -# CHECK-NEXT: [0,6] D===eeER . str b1, [x27], #254 -# CHECK-NEXT: [0,7] .D====eER . add x0, x27, #1 -# CHECK-NEXT: [0,8] .D====eeER. str h1, [x27], #254 -# CHECK-NEXT: [0,9] .D======eER add x0, x27, #1 +# CHECK: [0,0] DeER . . stp x1, x2, [x27], #496 +# CHECK-NEXT: [0,1] D=eER. . add x0, x27, #1 +# CHECK-NEXT: [0,2] D=eER. . stp w1, w2, [x27, #248]! +# CHECK-NEXT: [0,3] D==eER . add x0, x27, #1 +# CHECK-NEXT: [0,4] D==eER . stp x1, x2, [x27, #496]! +# CHECK-NEXT: [0,5] D===eER . add x0, x27, #1 +# CHECK-NEXT: [0,6] D===eeER. str b1, [x27], #254 +# CHECK-NEXT: [0,7] .D===eER. add x0, x27, #1 +# CHECK-NEXT: [0,8] .D===eeER str h1, [x27], #254 +# CHECK-NEXT: [0,9] .D====eER add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -5084,37 +5070,36 @@ add x0, x27, 1 # CHECK-NEXT: 4. 1 3.0 0.0 0.0 stp x1, x2, [x27, #496]! # CHECK-NEXT: 5. 1 4.0 0.0 0.0 add x0, x27, #1 # CHECK-NEXT: 6. 1 4.0 0.0 0.0 str b1, [x27], #254 -# CHECK-NEXT: 7. 1 5.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 5.0 0.0 0.0 str h1, [x27], #254 -# CHECK-NEXT: 9. 1 7.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 3.6 0.1 0.0 +# CHECK-NEXT: 7. 1 4.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 4.0 0.0 0.0 str h1, [x27], #254 +# CHECK-NEXT: 9. 1 5.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 1 3.2 0.1 0.0 # CHECK: [86] Code Region - G87 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 1004 +# CHECK-NEXT: Total Cycles: 504 # CHECK-NEXT: Total uOps: 2000 # CHECK: Dispatch Width: 15 -# CHECK-NEXT: uOps Per Cycle: 1.99 -# CHECK-NEXT: IPC: 1.00 +# CHECK-NEXT: uOps Per Cycle: 3.97 +# CHECK-NEXT: IPC: 1.98 # CHECK-NEXT: Block RThroughput: 2.5 # CHECK: Timeline view: -# CHECK-NEXT: 0123 -# CHECK-NEXT: Index 0123456789 +# CHECK-NEXT: Index 012345678 -# CHECK: [0,0] DeeER. . . str s1, [x27], #254 -# CHECK-NEXT: [0,1] D==eER . . add x0, x27, #1 -# CHECK-NEXT: [0,2] D==eeER . . str d1, [x27], #254 -# CHECK-NEXT: [0,3] D====eER . . add x0, x27, #1 -# CHECK-NEXT: [0,4] D====eeER . . str q1, [x27], #254 -# CHECK-NEXT: [0,5] D======eER. . add x0, x27, #1 -# CHECK-NEXT: [0,6] D======eeER . str b1, [x27, #254]! -# CHECK-NEXT: [0,7] .D=======eER . add x0, x27, #1 -# CHECK-NEXT: [0,8] .D=======eeER. str h1, [x27, #254]! -# CHECK-NEXT: [0,9] .D=========eER add x0, x27, #1 +# CHECK: [0,0] DeeER. . str s1, [x27], #254 +# CHECK-NEXT: [0,1] D=eER. . add x0, x27, #1 +# CHECK-NEXT: [0,2] D=eeER . str d1, [x27], #254 +# CHECK-NEXT: [0,3] D==eER . add x0, x27, #1 +# CHECK-NEXT: [0,4] D==eeER . str q1, [x27], #254 +# CHECK-NEXT: [0,5] D===eER . add x0, x27, #1 +# CHECK-NEXT: [0,6] D===eeER. str b1, [x27, #254]! +# CHECK-NEXT: [0,7] .D===eER. add x0, x27, #1 +# CHECK-NEXT: [0,8] .D===eeER str h1, [x27, #254]! +# CHECK-NEXT: [0,9] .D====eER add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -5124,43 +5109,42 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 str s1, [x27], #254 -# CHECK-NEXT: 1. 1 3.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 3.0 0.0 0.0 str d1, [x27], #254 -# CHECK-NEXT: 3. 1 5.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 5.0 0.0 0.0 str q1, [x27], #254 -# CHECK-NEXT: 5. 1 7.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 7.0 0.0 0.0 str b1, [x27, #254]! -# CHECK-NEXT: 7. 1 8.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 8.0 0.0 0.0 str h1, [x27, #254]! -# CHECK-NEXT: 9. 1 10.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 5.7 0.1 0.0 +# CHECK-NEXT: 1. 1 2.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 2.0 0.0 0.0 str d1, [x27], #254 +# CHECK-NEXT: 3. 1 3.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 3.0 0.0 0.0 str q1, [x27], #254 +# CHECK-NEXT: 5. 1 4.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 4.0 0.0 0.0 str b1, [x27, #254]! +# CHECK-NEXT: 7. 1 4.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 4.0 0.0 0.0 str h1, [x27, #254]! +# CHECK-NEXT: 9. 1 5.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 1 3.2 0.1 0.0 # CHECK: [87] Code Region - G88 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 804 +# CHECK-NEXT: Total Cycles: 504 # CHECK-NEXT: Total uOps: 2000 # CHECK: Dispatch Width: 15 -# CHECK-NEXT: uOps Per Cycle: 2.49 -# CHECK-NEXT: IPC: 1.24 +# CHECK-NEXT: uOps Per Cycle: 3.97 +# CHECK-NEXT: IPC: 1.98 # CHECK-NEXT: Block RThroughput: 2.5 # CHECK: Timeline view: -# CHECK-NEXT: 01 -# CHECK-NEXT: Index 0123456789 +# CHECK-NEXT: Index 012345678 -# CHECK: [0,0] DeeER. .. str s1, [x27, #254]! -# CHECK-NEXT: [0,1] D==eER .. add x0, x27, #1 -# CHECK-NEXT: [0,2] D==eeER .. str d1, [x27, #254]! -# CHECK-NEXT: [0,3] D====eER .. add x0, x27, #1 -# CHECK-NEXT: [0,4] D====eeER .. str q1, [x27, #254]! -# CHECK-NEXT: [0,5] D======eER.. add x0, x27, #1 -# CHECK-NEXT: [0,6] D======eER.. str w1, [x27], #254 -# CHECK-NEXT: [0,7] .D======eER. add x0, x27, #1 -# CHECK-NEXT: [0,8] .D======eER. str x1, [x27], #254 -# CHECK-NEXT: [0,9] .D=======eER add x0, x27, #1 +# CHECK: [0,0] DeeER. . str s1, [x27, #254]! +# CHECK-NEXT: [0,1] D=eER. . add x0, x27, #1 +# CHECK-NEXT: [0,2] D=eeER . str d1, [x27, #254]! +# CHECK-NEXT: [0,3] D==eER . add x0, x27, #1 +# CHECK-NEXT: [0,4] D==eeER . str q1, [x27, #254]! +# CHECK-NEXT: [0,5] D===eER . add x0, x27, #1 +# CHECK-NEXT: [0,6] D===eER . str w1, [x27], #254 +# CHECK-NEXT: [0,7] .D===eER. add x0, x27, #1 +# CHECK-NEXT: [0,8] .D===eER. str x1, [x27], #254 +# CHECK-NEXT: [0,9] .D====eER add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -5170,16 +5154,16 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 str s1, [x27, #254]! -# CHECK-NEXT: 1. 1 3.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 3.0 0.0 0.0 str d1, [x27, #254]! -# CHECK-NEXT: 3. 1 5.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 5.0 0.0 0.0 str q1, [x27, #254]! -# CHECK-NEXT: 5. 1 7.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 7.0 0.0 0.0 str w1, [x27], #254 -# CHECK-NEXT: 7. 1 7.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 7.0 0.0 0.0 str x1, [x27], #254 -# CHECK-NEXT: 9. 1 8.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 5.3 0.1 0.0 +# CHECK-NEXT: 1. 1 2.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 2.0 0.0 0.0 str d1, [x27, #254]! +# CHECK-NEXT: 3. 1 3.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 3.0 0.0 0.0 str q1, [x27, #254]! +# CHECK-NEXT: 5. 1 4.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 4.0 0.0 0.0 str w1, [x27], #254 +# CHECK-NEXT: 7. 1 4.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 4.0 0.0 0.0 str x1, [x27], #254 +# CHECK-NEXT: 9. 1 5.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 1 3.2 0.1 0.0 # CHECK: [88] Code Region - G89