diff --git a/llvm/lib/Target/AArch64/AArch64SchedNeoverseN1.td b/llvm/lib/Target/AArch64/AArch64SchedNeoverseN1.td index 524fa33f498bb..50142afccd48d 100644 --- a/llvm/lib/Target/AArch64/AArch64SchedNeoverseN1.td +++ b/llvm/lib/Target/AArch64/AArch64SchedNeoverseN1.td @@ -15,7 +15,9 @@ //===----------------------------------------------------------------------===// def NeoverseN1Model : SchedMachineModel { - let IssueWidth = 8; // Maximum micro-ops dispatch rate. + let IssueWidth = 3; // This value comes from the decode bandwidth + // and empirical measurements showed that this + // value is better. let MicroOpBufferSize = 128; // NOTE: Copied from Cortex-A76. let LoadLatency = 4; // Optimistic load latency. let MispredictPenalty = 11; // Cycles cost of branch mispredicted. diff --git a/llvm/lib/Target/AArch64/AArch64SchedNeoverseN3.td b/llvm/lib/Target/AArch64/AArch64SchedNeoverseN3.td index e44d40f8d7020..cd0d8a9186d5b 100644 --- a/llvm/lib/Target/AArch64/AArch64SchedNeoverseN3.td +++ b/llvm/lib/Target/AArch64/AArch64SchedNeoverseN3.td @@ -11,7 +11,7 @@ //===----------------------------------------------------------------------===// def NeoverseN3Model : SchedMachineModel { - let IssueWidth = 10; // Micro-ops dispatched at a time. + let IssueWidth = 5; // Micro-ops dispatched at a time. let MicroOpBufferSize = 160; // Entries in micro-op re-order buffer. NOTE: Copied from N2. let LoadLatency = 4; // Optimistic load latency. let MispredictPenalty = 10; // Extra cycles for mispredicted branch. NOTE: Copied from N2. diff --git a/llvm/lib/Target/AArch64/AArch64SchedNeoverseV1.td b/llvm/lib/Target/AArch64/AArch64SchedNeoverseV1.td index 44625a2034d9d..f28df44bfdb38 100644 --- a/llvm/lib/Target/AArch64/AArch64SchedNeoverseV1.td +++ b/llvm/lib/Target/AArch64/AArch64SchedNeoverseV1.td @@ -19,7 +19,9 @@ //===----------------------------------------------------------------------===// def NeoverseV1Model : SchedMachineModel { - let IssueWidth = 15; // Maximum micro-ops dispatch rate. + let IssueWidth = 8; // This value comes from the decode bandwidth + // and empirical measurements showed that this + // value is better. let MicroOpBufferSize = 256; // Micro-op re-order buffer. let LoadLatency = 4; // Optimistic load latency. let MispredictPenalty = 11; // Cycles cost of branch mispredicted. diff --git a/llvm/test/tools/llvm-mca/AArch64/Neoverse/N1-writeback.s b/llvm/test/tools/llvm-mca/AArch64/Neoverse/N1-writeback.s index 8fe21167a5bd3..127c8c30fc2c6 100644 --- a/llvm/test/tools/llvm-mca/AArch64/Neoverse/N1-writeback.s +++ b/llvm/test/tools/llvm-mca/AArch64/Neoverse/N1-writeback.s @@ -1165,10 +1165,10 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 507 # CHECK-NEXT: Total uOps: 1500 -# CHECK: Dispatch Width: 8 +# CHECK: Dispatch Width: 3 # CHECK-NEXT: uOps Per Cycle: 2.96 # CHECK-NEXT: IPC: 1.97 -# CHECK-NEXT: Block RThroughput: 3.3 +# CHECK-NEXT: Block RThroughput: 5.0 # CHECK: Timeline view: # CHECK-NEXT: 01 @@ -1176,14 +1176,14 @@ add x0, x27, 1 # CHECK: [0,0] DeeeeeER .. ld1 { v1.1d }, [x27], #8 # CHECK-NEXT: [0,1] D=eE---R .. add x0, x27, #1 -# CHECK-NEXT: [0,2] D=eeeeeER .. ld1 { v1.2d }, [x27], #16 -# CHECK-NEXT: [0,3] D==eE---R .. add x0, x27, #1 -# CHECK-NEXT: [0,4] D==eeeeeER.. ld1 { v1.2s }, [x27], #8 -# CHECK-NEXT: [0,5] .D==eE---R.. add x0, x27, #1 -# CHECK-NEXT: [0,6] .D==eeeeeER. ld1 { v1.4h }, [x27], #8 -# CHECK-NEXT: [0,7] .D===eE---R. add x0, x27, #1 -# CHECK-NEXT: [0,8] .D===eeeeeER ld1 { v1.4s }, [x27], #16 -# CHECK-NEXT: [0,9] .D====eE---R add x0, x27, #1 +# CHECK-NEXT: [0,2] .DeeeeeER .. ld1 { v1.2d }, [x27], #16 +# CHECK-NEXT: [0,3] .D=eE---R .. add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeeeeER.. ld1 { v1.2s }, [x27], #8 +# CHECK-NEXT: [0,5] . D=eE---R.. add x0, x27, #1 +# CHECK-NEXT: [0,6] . DeeeeeER. ld1 { v1.4h }, [x27], #8 +# CHECK-NEXT: [0,7] . D=eE---R. add x0, x27, #1 +# CHECK-NEXT: [0,8] . DeeeeeER ld1 { v1.4s }, [x27], #16 +# CHECK-NEXT: [0,9] . D=eE---R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -1194,15 +1194,15 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.1d }, [x27], #8 # CHECK-NEXT: 1. 1 2.0 0.0 3.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld1 { v1.2d }, [x27], #16 -# CHECK-NEXT: 3. 1 3.0 0.0 3.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ld1 { v1.2s }, [x27], #8 -# CHECK-NEXT: 5. 1 3.0 0.0 3.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ld1 { v1.4h }, [x27], #8 -# CHECK-NEXT: 7. 1 4.0 0.0 3.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 4.0 0.0 0.0 ld1 { v1.4s }, [x27], #16 -# CHECK-NEXT: 9. 1 5.0 0.0 3.0 add x0, x27, #1 -# CHECK-NEXT: 1 3.0 0.1 1.5 +# CHECK-NEXT: 2. 1 1.0 0.0 0.0 ld1 { v1.2d }, [x27], #16 +# CHECK-NEXT: 3. 1 2.0 0.0 3.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 0.0 0.0 ld1 { v1.2s }, [x27], #8 +# CHECK-NEXT: 5. 1 2.0 0.0 3.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 0.0 0.0 ld1 { v1.4h }, [x27], #8 +# CHECK-NEXT: 7. 1 2.0 0.0 3.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 0.0 0.0 ld1 { v1.4s }, [x27], #16 +# CHECK-NEXT: 9. 1 2.0 0.0 3.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.5 0.1 1.5 # CHECK: [1] Code Region - G02 @@ -1211,10 +1211,10 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 507 # CHECK-NEXT: Total uOps: 1500 -# CHECK: Dispatch Width: 8 +# CHECK: Dispatch Width: 3 # CHECK-NEXT: uOps Per Cycle: 2.96 # CHECK-NEXT: IPC: 1.97 -# CHECK-NEXT: Block RThroughput: 3.3 +# CHECK-NEXT: Block RThroughput: 5.0 # CHECK: Timeline view: # CHECK-NEXT: 01 @@ -1222,14 +1222,14 @@ add x0, x27, 1 # CHECK: [0,0] DeeeeeER .. ld1 { v1.8b }, [x27], #8 # CHECK-NEXT: [0,1] D=eE---R .. add x0, x27, #1 -# CHECK-NEXT: [0,2] D=eeeeeER .. ld1 { v1.8h }, [x27], #16 -# CHECK-NEXT: [0,3] D==eE---R .. add x0, x27, #1 -# CHECK-NEXT: [0,4] D==eeeeeER.. ld1 { v1.16b }, [x27], #16 -# CHECK-NEXT: [0,5] .D==eE---R.. add x0, x27, #1 -# CHECK-NEXT: [0,6] .D==eeeeeER. ld1 { v1.1d }, [x27], x28 -# CHECK-NEXT: [0,7] .D===eE---R. add x0, x27, #1 -# CHECK-NEXT: [0,8] .D===eeeeeER ld1 { v1.2d }, [x27], x28 -# CHECK-NEXT: [0,9] .D====eE---R add x0, x27, #1 +# CHECK-NEXT: [0,2] .DeeeeeER .. ld1 { v1.8h }, [x27], #16 +# CHECK-NEXT: [0,3] .D=eE---R .. add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeeeeER.. ld1 { v1.16b }, [x27], #16 +# CHECK-NEXT: [0,5] . D=eE---R.. add x0, x27, #1 +# CHECK-NEXT: [0,6] . DeeeeeER. ld1 { v1.1d }, [x27], x28 +# CHECK-NEXT: [0,7] . D=eE---R. add x0, x27, #1 +# CHECK-NEXT: [0,8] . DeeeeeER ld1 { v1.2d }, [x27], x28 +# CHECK-NEXT: [0,9] . D=eE---R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -1240,15 +1240,15 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.8b }, [x27], #8 # CHECK-NEXT: 1. 1 2.0 0.0 3.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld1 { v1.8h }, [x27], #16 -# CHECK-NEXT: 3. 1 3.0 0.0 3.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ld1 { v1.16b }, [x27], #16 -# CHECK-NEXT: 5. 1 3.0 0.0 3.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ld1 { v1.1d }, [x27], x28 -# CHECK-NEXT: 7. 1 4.0 0.0 3.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 4.0 0.0 0.0 ld1 { v1.2d }, [x27], x28 -# CHECK-NEXT: 9. 1 5.0 0.0 3.0 add x0, x27, #1 -# CHECK-NEXT: 1 3.0 0.1 1.5 +# CHECK-NEXT: 2. 1 1.0 0.0 0.0 ld1 { v1.8h }, [x27], #16 +# CHECK-NEXT: 3. 1 2.0 0.0 3.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 0.0 0.0 ld1 { v1.16b }, [x27], #16 +# CHECK-NEXT: 5. 1 2.0 0.0 3.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 0.0 0.0 ld1 { v1.1d }, [x27], x28 +# CHECK-NEXT: 7. 1 2.0 0.0 3.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 0.0 0.0 ld1 { v1.2d }, [x27], x28 +# CHECK-NEXT: 9. 1 2.0 0.0 3.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.5 0.1 1.5 # CHECK: [2] Code Region - G03 @@ -1257,10 +1257,10 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 507 # CHECK-NEXT: Total uOps: 1500 -# CHECK: Dispatch Width: 8 +# CHECK: Dispatch Width: 3 # CHECK-NEXT: uOps Per Cycle: 2.96 # CHECK-NEXT: IPC: 1.97 -# CHECK-NEXT: Block RThroughput: 3.3 +# CHECK-NEXT: Block RThroughput: 5.0 # CHECK: Timeline view: # CHECK-NEXT: 01 @@ -1268,14 +1268,14 @@ add x0, x27, 1 # CHECK: [0,0] DeeeeeER .. ld1 { v1.2s }, [x27], x28 # CHECK-NEXT: [0,1] D=eE---R .. add x0, x27, #1 -# CHECK-NEXT: [0,2] D=eeeeeER .. ld1 { v1.4h }, [x27], x28 -# CHECK-NEXT: [0,3] D==eE---R .. add x0, x27, #1 -# CHECK-NEXT: [0,4] D==eeeeeER.. ld1 { v1.4s }, [x27], x28 -# CHECK-NEXT: [0,5] .D==eE---R.. add x0, x27, #1 -# CHECK-NEXT: [0,6] .D==eeeeeER. ld1 { v1.8b }, [x27], x28 -# CHECK-NEXT: [0,7] .D===eE---R. add x0, x27, #1 -# CHECK-NEXT: [0,8] .D===eeeeeER ld1 { v1.8h }, [x27], x28 -# CHECK-NEXT: [0,9] .D====eE---R add x0, x27, #1 +# CHECK-NEXT: [0,2] .DeeeeeER .. ld1 { v1.4h }, [x27], x28 +# CHECK-NEXT: [0,3] .D=eE---R .. add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeeeeER.. ld1 { v1.4s }, [x27], x28 +# CHECK-NEXT: [0,5] . D=eE---R.. add x0, x27, #1 +# CHECK-NEXT: [0,6] . DeeeeeER. ld1 { v1.8b }, [x27], x28 +# CHECK-NEXT: [0,7] . D=eE---R. add x0, x27, #1 +# CHECK-NEXT: [0,8] . DeeeeeER ld1 { v1.8h }, [x27], x28 +# CHECK-NEXT: [0,9] . D=eE---R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -1286,42 +1286,42 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.2s }, [x27], x28 # CHECK-NEXT: 1. 1 2.0 0.0 3.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld1 { v1.4h }, [x27], x28 -# CHECK-NEXT: 3. 1 3.0 0.0 3.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ld1 { v1.4s }, [x27], x28 -# CHECK-NEXT: 5. 1 3.0 0.0 3.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ld1 { v1.8b }, [x27], x28 -# CHECK-NEXT: 7. 1 4.0 0.0 3.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 4.0 0.0 0.0 ld1 { v1.8h }, [x27], x28 -# CHECK-NEXT: 9. 1 5.0 0.0 3.0 add x0, x27, #1 -# CHECK-NEXT: 1 3.0 0.1 1.5 +# CHECK-NEXT: 2. 1 1.0 0.0 0.0 ld1 { v1.4h }, [x27], x28 +# CHECK-NEXT: 3. 1 2.0 0.0 3.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 0.0 0.0 ld1 { v1.4s }, [x27], x28 +# CHECK-NEXT: 5. 1 2.0 0.0 3.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 0.0 0.0 ld1 { v1.8b }, [x27], x28 +# CHECK-NEXT: 7. 1 2.0 0.0 3.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 0.0 0.0 ld1 { v1.8h }, [x27], x28 +# CHECK-NEXT: 9. 1 2.0 0.0 3.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.5 0.1 1.5 # CHECK: [3] Code Region - G04 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 507 +# CHECK-NEXT: Total Cycles: 906 # CHECK-NEXT: Total uOps: 1900 -# CHECK: Dispatch Width: 8 -# CHECK-NEXT: uOps Per Cycle: 3.75 -# CHECK-NEXT: IPC: 1.97 -# CHECK-NEXT: Block RThroughput: 4.5 +# CHECK: Dispatch Width: 3 +# CHECK-NEXT: uOps Per Cycle: 2.10 +# CHECK-NEXT: IPC: 1.10 +# CHECK-NEXT: Block RThroughput: 6.3 # CHECK: Timeline view: -# CHECK-NEXT: 01 +# CHECK-NEXT: 01234 # CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeeeER .. ld1 { v1.16b }, [x27], x28 -# CHECK-NEXT: [0,1] D=eE---R .. add x0, x27, #1 -# CHECK-NEXT: [0,2] D=eeeeeER .. ld1 { v1.1d, v2.1d }, [x27], #16 -# CHECK-NEXT: [0,3] D==eE---R .. add x0, x27, #1 -# CHECK-NEXT: [0,4] .D=eeeeeER.. ld1 { v1.2d, v2.2d }, [x27], #32 -# CHECK-NEXT: [0,5] .D==eE---R.. add x0, x27, #1 -# CHECK-NEXT: [0,6] .D==eeeeeER. ld1 { v1.2s, v2.2s }, [x27], #16 -# CHECK-NEXT: [0,7] .D===eE---R. add x0, x27, #1 -# CHECK-NEXT: [0,8] . D==eeeeeER ld1 { v1.4h, v2.4h }, [x27], #16 -# CHECK-NEXT: [0,9] . D===eE---R add x0, x27, #1 +# CHECK: [0,0] DeeeeeER . . ld1 { v1.16b }, [x27], x28 +# CHECK-NEXT: [0,1] D=eE---R . . add x0, x27, #1 +# CHECK-NEXT: [0,2] .DeeeeeER . . ld1 { v1.1d, v2.1d }, [x27], #16 +# CHECK-NEXT: [0,3] . DeE---R . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeeeeER . ld1 { v1.2d, v2.2d }, [x27], #32 +# CHECK-NEXT: [0,5] . DeE---R . add x0, x27, #1 +# CHECK-NEXT: [0,6] . DeeeeeER . ld1 { v1.2s, v2.2s }, [x27], #16 +# CHECK-NEXT: [0,7] . .DeE---R . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . DeeeeeER ld1 { v1.4h, v2.4h }, [x27], #16 +# CHECK-NEXT: [0,9] . . DeE---R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -1332,42 +1332,42 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.16b }, [x27], x28 # CHECK-NEXT: 1. 1 2.0 0.0 3.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld1 { v1.1d, v2.1d }, [x27], #16 -# CHECK-NEXT: 3. 1 3.0 0.0 3.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 2.0 0.0 0.0 ld1 { v1.2d, v2.2d }, [x27], #32 -# CHECK-NEXT: 5. 1 3.0 0.0 3.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ld1 { v1.2s, v2.2s }, [x27], #16 -# CHECK-NEXT: 7. 1 4.0 0.0 3.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 3.0 0.0 0.0 ld1 { v1.4h, v2.4h }, [x27], #16 -# CHECK-NEXT: 9. 1 4.0 0.0 3.0 add x0, x27, #1 -# CHECK-NEXT: 1 2.7 0.1 1.5 +# CHECK-NEXT: 2. 1 1.0 0.0 0.0 ld1 { v1.1d, v2.1d }, [x27], #16 +# CHECK-NEXT: 3. 1 1.0 0.0 3.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 1.0 0.0 ld1 { v1.2d, v2.2d }, [x27], #32 +# CHECK-NEXT: 5. 1 1.0 0.0 3.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 1.0 0.0 ld1 { v1.2s, v2.2s }, [x27], #16 +# CHECK-NEXT: 7. 1 1.0 0.0 3.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 1.0 0.0 ld1 { v1.4h, v2.4h }, [x27], #16 +# CHECK-NEXT: 9. 1 1.0 0.0 3.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.1 0.4 1.5 # CHECK: [4] Code Region - G05 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 507 +# CHECK-NEXT: Total Cycles: 1006 # CHECK-NEXT: Total uOps: 2000 -# CHECK: Dispatch Width: 8 -# CHECK-NEXT: uOps Per Cycle: 3.94 -# CHECK-NEXT: IPC: 1.97 -# CHECK-NEXT: Block RThroughput: 5.0 +# CHECK: Dispatch Width: 3 +# CHECK-NEXT: uOps Per Cycle: 1.99 +# CHECK-NEXT: IPC: 0.99 +# CHECK-NEXT: Block RThroughput: 6.7 # CHECK: Timeline view: -# CHECK-NEXT: 01 +# CHECK-NEXT: 012345 # CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeeeER .. ld1 { v1.4s, v2.4s }, [x27], #32 -# CHECK-NEXT: [0,1] D=eE---R .. add x0, x27, #1 -# CHECK-NEXT: [0,2] D=eeeeeER .. ld1 { v1.8b, v2.8b }, [x27], #16 -# CHECK-NEXT: [0,3] D==eE---R .. add x0, x27, #1 -# CHECK-NEXT: [0,4] .D=eeeeeER.. ld1 { v1.8h, v2.8h }, [x27], #32 -# CHECK-NEXT: [0,5] .D==eE---R.. add x0, x27, #1 -# CHECK-NEXT: [0,6] .D==eeeeeER. ld1 { v1.16b, v2.16b }, [x27], #32 -# CHECK-NEXT: [0,7] .D===eE---R. add x0, x27, #1 -# CHECK-NEXT: [0,8] . D==eeeeeER ld1 { v1.1d, v2.1d }, [x27], x28 -# CHECK-NEXT: [0,9] . D===eE---R add x0, x27, #1 +# CHECK: [0,0] DeeeeeER . . ld1 { v1.4s, v2.4s }, [x27], #32 +# CHECK-NEXT: [0,1] .DeE---R . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeeeeER. . ld1 { v1.8b, v2.8b }, [x27], #16 +# CHECK-NEXT: [0,3] . DeE---R. . add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeeeeER . ld1 { v1.8h, v2.8h }, [x27], #32 +# CHECK-NEXT: [0,5] . DeE---R . add x0, x27, #1 +# CHECK-NEXT: [0,6] . .DeeeeeER . ld1 { v1.16b, v2.16b }, [x27], #32 +# CHECK-NEXT: [0,7] . . DeE---R . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . DeeeeeER ld1 { v1.1d, v2.1d }, [x27], x28 +# CHECK-NEXT: [0,9] . . DeE---R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -1377,43 +1377,43 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.4s, v2.4s }, [x27], #32 -# CHECK-NEXT: 1. 1 2.0 0.0 3.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld1 { v1.8b, v2.8b }, [x27], #16 -# CHECK-NEXT: 3. 1 3.0 0.0 3.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 2.0 0.0 0.0 ld1 { v1.8h, v2.8h }, [x27], #32 -# CHECK-NEXT: 5. 1 3.0 0.0 3.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ld1 { v1.16b, v2.16b }, [x27], #32 -# CHECK-NEXT: 7. 1 4.0 0.0 3.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 3.0 0.0 0.0 ld1 { v1.1d, v2.1d }, [x27], x28 -# CHECK-NEXT: 9. 1 4.0 0.0 3.0 add x0, x27, #1 -# CHECK-NEXT: 1 2.7 0.1 1.5 +# CHECK-NEXT: 1. 1 1.0 0.0 3.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 1.0 1.0 0.0 ld1 { v1.8b, v2.8b }, [x27], #16 +# CHECK-NEXT: 3. 1 1.0 0.0 3.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 1.0 0.0 ld1 { v1.8h, v2.8h }, [x27], #32 +# CHECK-NEXT: 5. 1 1.0 0.0 3.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 1.0 0.0 ld1 { v1.16b, v2.16b }, [x27], #32 +# CHECK-NEXT: 7. 1 1.0 0.0 3.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 1.0 0.0 ld1 { v1.1d, v2.1d }, [x27], x28 +# CHECK-NEXT: 9. 1 1.0 0.0 3.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.0 0.5 1.5 # CHECK: [5] Code Region - G06 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 507 +# CHECK-NEXT: Total Cycles: 1006 # CHECK-NEXT: Total uOps: 2000 -# CHECK: Dispatch Width: 8 -# CHECK-NEXT: uOps Per Cycle: 3.94 -# CHECK-NEXT: IPC: 1.97 -# CHECK-NEXT: Block RThroughput: 5.0 +# CHECK: Dispatch Width: 3 +# CHECK-NEXT: uOps Per Cycle: 1.99 +# CHECK-NEXT: IPC: 0.99 +# CHECK-NEXT: Block RThroughput: 6.7 # CHECK: Timeline view: -# CHECK-NEXT: 01 +# CHECK-NEXT: 012345 # CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeeeER .. ld1 { v1.2d, v2.2d }, [x27], x28 -# CHECK-NEXT: [0,1] D=eE---R .. add x0, x27, #1 -# CHECK-NEXT: [0,2] D=eeeeeER .. ld1 { v1.2s, v2.2s }, [x27], x28 -# CHECK-NEXT: [0,3] D==eE---R .. add x0, x27, #1 -# CHECK-NEXT: [0,4] .D=eeeeeER.. ld1 { v1.4h, v2.4h }, [x27], x28 -# CHECK-NEXT: [0,5] .D==eE---R.. add x0, x27, #1 -# CHECK-NEXT: [0,6] .D==eeeeeER. ld1 { v1.4s, v2.4s }, [x27], x28 -# CHECK-NEXT: [0,7] .D===eE---R. add x0, x27, #1 -# CHECK-NEXT: [0,8] . D==eeeeeER ld1 { v1.8b, v2.8b }, [x27], x28 -# CHECK-NEXT: [0,9] . D===eE---R add x0, x27, #1 +# CHECK: [0,0] DeeeeeER . . ld1 { v1.2d, v2.2d }, [x27], x28 +# CHECK-NEXT: [0,1] .DeE---R . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeeeeER. . ld1 { v1.2s, v2.2s }, [x27], x28 +# CHECK-NEXT: [0,3] . DeE---R. . add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeeeeER . ld1 { v1.4h, v2.4h }, [x27], x28 +# CHECK-NEXT: [0,5] . DeE---R . add x0, x27, #1 +# CHECK-NEXT: [0,6] . .DeeeeeER . ld1 { v1.4s, v2.4s }, [x27], x28 +# CHECK-NEXT: [0,7] . . DeE---R . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . DeeeeeER ld1 { v1.8b, v2.8b }, [x27], x28 +# CHECK-NEXT: [0,9] . . DeE---R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -1423,43 +1423,43 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.2d, v2.2d }, [x27], x28 -# CHECK-NEXT: 1. 1 2.0 0.0 3.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld1 { v1.2s, v2.2s }, [x27], x28 -# CHECK-NEXT: 3. 1 3.0 0.0 3.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 2.0 0.0 0.0 ld1 { v1.4h, v2.4h }, [x27], x28 -# CHECK-NEXT: 5. 1 3.0 0.0 3.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ld1 { v1.4s, v2.4s }, [x27], x28 -# CHECK-NEXT: 7. 1 4.0 0.0 3.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 3.0 0.0 0.0 ld1 { v1.8b, v2.8b }, [x27], x28 -# CHECK-NEXT: 9. 1 4.0 0.0 3.0 add x0, x27, #1 -# CHECK-NEXT: 1 2.7 0.1 1.5 +# CHECK-NEXT: 1. 1 1.0 0.0 3.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 1.0 1.0 0.0 ld1 { v1.2s, v2.2s }, [x27], x28 +# CHECK-NEXT: 3. 1 1.0 0.0 3.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 1.0 0.0 ld1 { v1.4h, v2.4h }, [x27], x28 +# CHECK-NEXT: 5. 1 1.0 0.0 3.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 1.0 0.0 ld1 { v1.4s, v2.4s }, [x27], x28 +# CHECK-NEXT: 7. 1 1.0 0.0 3.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 1.0 0.0 ld1 { v1.8b, v2.8b }, [x27], x28 +# CHECK-NEXT: 9. 1 1.0 0.0 3.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.0 0.5 1.5 # CHECK: [6] Code Region - G07 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 707 +# CHECK-NEXT: Total Cycles: 1007 # CHECK-NEXT: Total uOps: 2300 -# CHECK: Dispatch Width: 8 -# CHECK-NEXT: uOps Per Cycle: 3.25 -# CHECK-NEXT: IPC: 1.41 -# CHECK-NEXT: Block RThroughput: 6.5 +# CHECK: Dispatch Width: 3 +# CHECK-NEXT: uOps Per Cycle: 2.28 +# CHECK-NEXT: IPC: 0.99 +# CHECK-NEXT: Block RThroughput: 7.7 # CHECK: Timeline view: -# CHECK-NEXT: 0123 +# CHECK-NEXT: 0123456 # CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeeeER . . ld1 { v1.8h, v2.8h }, [x27], x28 -# CHECK-NEXT: [0,1] D=eE---R . . add x0, x27, #1 -# CHECK-NEXT: [0,2] D=eeeeeER . . ld1 { v1.16b, v2.16b }, [x27], x28 -# CHECK-NEXT: [0,3] D==eE---R . . add x0, x27, #1 -# CHECK-NEXT: [0,4] .D=eeeeeeER . ld1 { v1.1d, v2.1d, v3.1d }, [x27], #24 -# CHECK-NEXT: [0,5] .D==eE----R . add x0, x27, #1 -# CHECK-NEXT: [0,6] . D=eeeeeeER . ld1 { v1.2d, v2.2d, v3.2d }, [x27], #48 -# CHECK-NEXT: [0,7] . D==eE----R . add x0, x27, #1 -# CHECK-NEXT: [0,8] . D==eeeeeeER ld1 { v1.2s, v2.2s, v3.2s }, [x27], #24 -# CHECK-NEXT: [0,9] . D===eE----R add x0, x27, #1 +# CHECK: [0,0] DeeeeeER . .. ld1 { v1.8h, v2.8h }, [x27], x28 +# CHECK-NEXT: [0,1] .DeE---R . .. add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeeeeER. .. ld1 { v1.16b, v2.16b }, [x27], x28 +# CHECK-NEXT: [0,3] . DeE---R. .. add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeeeeeER .. ld1 { v1.1d, v2.1d, v3.1d }, [x27], #24 +# CHECK-NEXT: [0,5] . DeE----R .. add x0, x27, #1 +# CHECK-NEXT: [0,6] . .DeeeeeeER.. ld1 { v1.2d, v2.2d, v3.2d }, [x27], #48 +# CHECK-NEXT: [0,7] . . DeE----R.. add x0, x27, #1 +# CHECK-NEXT: [0,8] . . DeeeeeeER ld1 { v1.2s, v2.2s, v3.2s }, [x27], #24 +# CHECK-NEXT: [0,9] . . DeE----R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -1469,43 +1469,43 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.8h, v2.8h }, [x27], x28 -# CHECK-NEXT: 1. 1 2.0 0.0 3.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld1 { v1.16b, v2.16b }, [x27], x28 -# CHECK-NEXT: 3. 1 3.0 0.0 3.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 2.0 0.0 0.0 ld1 { v1.1d, v2.1d, v3.1d }, [x27], #24 -# CHECK-NEXT: 5. 1 3.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 2.0 0.0 0.0 ld1 { v1.2d, v2.2d, v3.2d }, [x27], #48 -# CHECK-NEXT: 7. 1 3.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 3.0 1.0 0.0 ld1 { v1.2s, v2.2s, v3.2s }, [x27], #24 -# CHECK-NEXT: 9. 1 4.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 1 2.5 0.2 1.8 +# CHECK-NEXT: 1. 1 1.0 0.0 3.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 1.0 1.0 0.0 ld1 { v1.16b, v2.16b }, [x27], x28 +# CHECK-NEXT: 3. 1 1.0 0.0 3.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 1.0 0.0 ld1 { v1.1d, v2.1d, v3.1d }, [x27], #24 +# CHECK-NEXT: 5. 1 1.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 1.0 0.0 ld1 { v1.2d, v2.2d, v3.2d }, [x27], #48 +# CHECK-NEXT: 7. 1 1.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 1.0 0.0 ld1 { v1.2s, v2.2s, v3.2s }, [x27], #24 +# CHECK-NEXT: 9. 1 1.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.0 0.5 1.8 # CHECK: [7] Code Region - G08 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 757 +# CHECK-NEXT: Total Cycles: 1007 # CHECK-NEXT: Total uOps: 2500 -# CHECK: Dispatch Width: 8 -# CHECK-NEXT: uOps Per Cycle: 3.30 -# CHECK-NEXT: IPC: 1.32 -# CHECK-NEXT: Block RThroughput: 7.5 +# CHECK: Dispatch Width: 3 +# CHECK-NEXT: uOps Per Cycle: 2.48 +# CHECK-NEXT: IPC: 0.99 +# CHECK-NEXT: Block RThroughput: 8.3 # CHECK: Timeline view: -# CHECK-NEXT: 01234 +# CHECK-NEXT: 0123456 # CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeeeeER . . ld1 { v1.4h, v2.4h, v3.4h }, [x27], #24 -# CHECK-NEXT: [0,1] D=eE----R . . add x0, x27, #1 -# CHECK-NEXT: [0,2] .DeeeeeeER. . ld1 { v1.4s, v2.4s, v3.4s }, [x27], #48 -# CHECK-NEXT: [0,3] .D=eE----R. . add x0, x27, #1 -# CHECK-NEXT: [0,4] . D=eeeeeeER . ld1 { v1.8b, v2.8b, v3.8b }, [x27], #24 -# CHECK-NEXT: [0,5] . D==eE----R . add x0, x27, #1 -# CHECK-NEXT: [0,6] . D=eeeeeeER . ld1 { v1.8h, v2.8h, v3.8h }, [x27], #48 -# CHECK-NEXT: [0,7] . D==eE----R . add x0, x27, #1 -# CHECK-NEXT: [0,8] . D==eeeeeeER ld1 { v1.16b, v2.16b, v3.16b }, [x27], #48 -# CHECK-NEXT: [0,9] . D===eE----R add x0, x27, #1 +# CHECK: [0,0] DeeeeeeER . .. ld1 { v1.4h, v2.4h, v3.4h }, [x27], #24 +# CHECK-NEXT: [0,1] .DeE----R . .. add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeeeeeER .. ld1 { v1.4s, v2.4s, v3.4s }, [x27], #48 +# CHECK-NEXT: [0,3] . DeE----R .. add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeeeeeER .. ld1 { v1.8b, v2.8b, v3.8b }, [x27], #24 +# CHECK-NEXT: [0,5] . DeE----R .. add x0, x27, #1 +# CHECK-NEXT: [0,6] . .DeeeeeeER.. ld1 { v1.8h, v2.8h, v3.8h }, [x27], #48 +# CHECK-NEXT: [0,7] . . DeE----R.. add x0, x27, #1 +# CHECK-NEXT: [0,8] . . DeeeeeeER ld1 { v1.16b, v2.16b, v3.16b }, [x27], #48 +# CHECK-NEXT: [0,9] . . DeE----R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -1515,43 +1515,43 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.4h, v2.4h, v3.4h }, [x27], #24 -# CHECK-NEXT: 1. 1 2.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 1.0 0.0 0.0 ld1 { v1.4s, v2.4s, v3.4s }, [x27], #48 -# CHECK-NEXT: 3. 1 2.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 2.0 1.0 0.0 ld1 { v1.8b, v2.8b, v3.8b }, [x27], #24 -# CHECK-NEXT: 5. 1 3.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 2.0 0.0 0.0 ld1 { v1.8h, v2.8h, v3.8h }, [x27], #48 -# CHECK-NEXT: 7. 1 3.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 3.0 1.0 0.0 ld1 { v1.16b, v2.16b, v3.16b }, [x27], #48 -# CHECK-NEXT: 9. 1 4.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 1 2.3 0.3 2.0 +# CHECK-NEXT: 1. 1 1.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 1.0 1.0 0.0 ld1 { v1.4s, v2.4s, v3.4s }, [x27], #48 +# CHECK-NEXT: 3. 1 1.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 1.0 0.0 ld1 { v1.8b, v2.8b, v3.8b }, [x27], #24 +# CHECK-NEXT: 5. 1 1.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 1.0 0.0 ld1 { v1.8h, v2.8h, v3.8h }, [x27], #48 +# CHECK-NEXT: 7. 1 1.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 1.0 0.0 ld1 { v1.16b, v2.16b, v3.16b }, [x27], #48 +# CHECK-NEXT: 9. 1 1.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.0 0.5 2.0 # CHECK: [8] Code Region - G09 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 757 +# CHECK-NEXT: Total Cycles: 1007 # CHECK-NEXT: Total uOps: 2500 -# CHECK: Dispatch Width: 8 -# CHECK-NEXT: uOps Per Cycle: 3.30 -# CHECK-NEXT: IPC: 1.32 -# CHECK-NEXT: Block RThroughput: 7.5 +# CHECK: Dispatch Width: 3 +# CHECK-NEXT: uOps Per Cycle: 2.48 +# CHECK-NEXT: IPC: 0.99 +# CHECK-NEXT: Block RThroughput: 8.3 # CHECK: Timeline view: -# CHECK-NEXT: 01234 +# CHECK-NEXT: 0123456 # CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeeeeER . . ld1 { v1.1d, v2.1d, v3.1d }, [x27], x28 -# CHECK-NEXT: [0,1] D=eE----R . . add x0, x27, #1 -# CHECK-NEXT: [0,2] .DeeeeeeER. . ld1 { v1.2d, v2.2d, v3.2d }, [x27], x28 -# CHECK-NEXT: [0,3] .D=eE----R. . add x0, x27, #1 -# CHECK-NEXT: [0,4] . D=eeeeeeER . ld1 { v1.2s, v2.2s, v3.2s }, [x27], x28 -# CHECK-NEXT: [0,5] . D==eE----R . add x0, x27, #1 -# CHECK-NEXT: [0,6] . D=eeeeeeER . ld1 { v1.4h, v2.4h, v3.4h }, [x27], x28 -# CHECK-NEXT: [0,7] . D==eE----R . add x0, x27, #1 -# CHECK-NEXT: [0,8] . D==eeeeeeER ld1 { v1.4s, v2.4s, v3.4s }, [x27], x28 -# CHECK-NEXT: [0,9] . D===eE----R add x0, x27, #1 +# CHECK: [0,0] DeeeeeeER . .. ld1 { v1.1d, v2.1d, v3.1d }, [x27], x28 +# CHECK-NEXT: [0,1] .DeE----R . .. add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeeeeeER .. ld1 { v1.2d, v2.2d, v3.2d }, [x27], x28 +# CHECK-NEXT: [0,3] . DeE----R .. add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeeeeeER .. ld1 { v1.2s, v2.2s, v3.2s }, [x27], x28 +# CHECK-NEXT: [0,5] . DeE----R .. add x0, x27, #1 +# CHECK-NEXT: [0,6] . .DeeeeeeER.. ld1 { v1.4h, v2.4h, v3.4h }, [x27], x28 +# CHECK-NEXT: [0,7] . . DeE----R.. add x0, x27, #1 +# CHECK-NEXT: [0,8] . . DeeeeeeER ld1 { v1.4s, v2.4s, v3.4s }, [x27], x28 +# CHECK-NEXT: [0,9] . . DeE----R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -1561,43 +1561,43 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.1d, v2.1d, v3.1d }, [x27], x28 -# CHECK-NEXT: 1. 1 2.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 1.0 0.0 0.0 ld1 { v1.2d, v2.2d, v3.2d }, [x27], x28 -# CHECK-NEXT: 3. 1 2.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 2.0 1.0 0.0 ld1 { v1.2s, v2.2s, v3.2s }, [x27], x28 -# CHECK-NEXT: 5. 1 3.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 2.0 0.0 0.0 ld1 { v1.4h, v2.4h, v3.4h }, [x27], x28 -# CHECK-NEXT: 7. 1 3.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 3.0 1.0 0.0 ld1 { v1.4s, v2.4s, v3.4s }, [x27], x28 -# CHECK-NEXT: 9. 1 4.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 1 2.3 0.3 2.0 +# CHECK-NEXT: 1. 1 1.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 1.0 1.0 0.0 ld1 { v1.2d, v2.2d, v3.2d }, [x27], x28 +# CHECK-NEXT: 3. 1 1.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 1.0 0.0 ld1 { v1.2s, v2.2s, v3.2s }, [x27], x28 +# CHECK-NEXT: 5. 1 1.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 1.0 0.0 ld1 { v1.4h, v2.4h, v3.4h }, [x27], x28 +# CHECK-NEXT: 7. 1 1.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 1.0 0.0 ld1 { v1.4s, v2.4s, v3.4s }, [x27], x28 +# CHECK-NEXT: 9. 1 1.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.0 0.5 2.0 # CHECK: [9] Code Region - G10 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 856 +# CHECK-NEXT: Total Cycles: 1007 # CHECK-NEXT: Total uOps: 2700 -# CHECK: Dispatch Width: 8 -# CHECK-NEXT: uOps Per Cycle: 3.15 -# CHECK-NEXT: IPC: 1.17 -# CHECK-NEXT: Block RThroughput: 8.5 +# CHECK: Dispatch Width: 3 +# CHECK-NEXT: uOps Per Cycle: 2.68 +# CHECK-NEXT: IPC: 0.99 +# CHECK-NEXT: Block RThroughput: 9.0 # CHECK: Timeline view: -# CHECK-NEXT: 01234 +# CHECK-NEXT: 0123456 # CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeeeeER . . ld1 { v1.8b, v2.8b, v3.8b }, [x27], x28 -# CHECK-NEXT: [0,1] D=eE----R . . add x0, x27, #1 -# CHECK-NEXT: [0,2] .DeeeeeeER. . ld1 { v1.8h, v2.8h, v3.8h }, [x27], x28 -# CHECK-NEXT: [0,3] .D=eE----R. . add x0, x27, #1 -# CHECK-NEXT: [0,4] . D=eeeeeeER . ld1 { v1.16b, v2.16b, v3.16b }, [x27], x28 -# CHECK-NEXT: [0,5] . D==eE----R . add x0, x27, #1 -# CHECK-NEXT: [0,6] . D=eeeeeeER . ld1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32 -# CHECK-NEXT: [0,7] . D==eE----R . add x0, x27, #1 -# CHECK-NEXT: [0,8] . D==eeeeeeER ld1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64 -# CHECK-NEXT: [0,9] . D===eE----R add x0, x27, #1 +# CHECK: [0,0] DeeeeeeER . .. ld1 { v1.8b, v2.8b, v3.8b }, [x27], x28 +# CHECK-NEXT: [0,1] .DeE----R . .. add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeeeeeER .. ld1 { v1.8h, v2.8h, v3.8h }, [x27], x28 +# CHECK-NEXT: [0,3] . DeE----R .. add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeeeeeER .. ld1 { v1.16b, v2.16b, v3.16b }, [x27], x28 +# CHECK-NEXT: [0,5] . DeE----R .. add x0, x27, #1 +# CHECK-NEXT: [0,6] . .DeeeeeeER.. ld1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32 +# CHECK-NEXT: [0,7] . . DeE----R.. add x0, x27, #1 +# CHECK-NEXT: [0,8] . . DeeeeeeER ld1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64 +# CHECK-NEXT: [0,9] . . DeE----R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -1607,25 +1607,25 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.8b, v2.8b, v3.8b }, [x27], x28 -# CHECK-NEXT: 1. 1 2.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 1.0 0.0 0.0 ld1 { v1.8h, v2.8h, v3.8h }, [x27], x28 -# CHECK-NEXT: 3. 1 2.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 2.0 1.0 0.0 ld1 { v1.16b, v2.16b, v3.16b }, [x27], x28 -# CHECK-NEXT: 5. 1 3.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 2.0 0.0 0.0 ld1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32 -# CHECK-NEXT: 7. 1 3.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 3.0 1.0 0.0 ld1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64 -# CHECK-NEXT: 9. 1 4.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 1 2.3 0.3 2.0 +# CHECK-NEXT: 1. 1 1.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 1.0 1.0 0.0 ld1 { v1.8h, v2.8h, v3.8h }, [x27], x28 +# CHECK-NEXT: 3. 1 1.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 1.0 0.0 ld1 { v1.16b, v2.16b, v3.16b }, [x27], x28 +# CHECK-NEXT: 5. 1 1.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 1.0 0.0 ld1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32 +# CHECK-NEXT: 7. 1 1.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 1.0 0.0 ld1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64 +# CHECK-NEXT: 9. 1 1.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.0 0.5 2.0 # CHECK: [10] Code Region - G11 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 1006 +# CHECK-NEXT: Total Cycles: 1007 # CHECK-NEXT: Total uOps: 3000 -# CHECK: Dispatch Width: 8 +# CHECK: Dispatch Width: 3 # CHECK-NEXT: uOps Per Cycle: 2.98 # CHECK-NEXT: IPC: 0.99 # CHECK-NEXT: Block RThroughput: 10.0 @@ -1635,15 +1635,15 @@ add x0, x27, 1 # CHECK-NEXT: Index 0123456789 # CHECK: [0,0] DeeeeeeER . .. ld1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32 -# CHECK-NEXT: [0,1] D=eE----R . .. add x0, x27, #1 -# CHECK-NEXT: [0,2] .DeeeeeeER. .. ld1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32 -# CHECK-NEXT: [0,3] .D=eE----R. .. add x0, x27, #1 -# CHECK-NEXT: [0,4] . D==eeeeeeER .. ld1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64 -# CHECK-NEXT: [0,5] . D===eE----R .. add x0, x27, #1 -# CHECK-NEXT: [0,6] . D==eeeeeeER .. ld1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32 -# CHECK-NEXT: [0,7] . D===eE----R .. add x0, x27, #1 -# CHECK-NEXT: [0,8] . D====eeeeeeER ld1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64 -# CHECK-NEXT: [0,9] . D=====eE----R add x0, x27, #1 +# CHECK-NEXT: [0,1] .DeE----R . .. add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeeeeeER .. ld1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32 +# CHECK-NEXT: [0,3] . DeE----R .. add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeeeeeER .. ld1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64 +# CHECK-NEXT: [0,5] . DeE----R .. add x0, x27, #1 +# CHECK-NEXT: [0,6] . .DeeeeeeER.. ld1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32 +# CHECK-NEXT: [0,7] . . DeE----R.. add x0, x27, #1 +# CHECK-NEXT: [0,8] . . DeeeeeeER ld1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64 +# CHECK-NEXT: [0,9] . . DeE----R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -1653,25 +1653,25 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32 -# CHECK-NEXT: 1. 1 2.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 1.0 0.0 0.0 ld1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32 -# CHECK-NEXT: 3. 1 2.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 3.0 2.0 0.0 ld1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64 -# CHECK-NEXT: 5. 1 4.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ld1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32 -# CHECK-NEXT: 7. 1 4.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 5.0 2.0 0.0 ld1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64 -# CHECK-NEXT: 9. 1 6.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 1 3.1 0.5 2.0 +# CHECK-NEXT: 1. 1 1.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 1.0 1.0 0.0 ld1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32 +# CHECK-NEXT: 3. 1 1.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 1.0 0.0 ld1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64 +# CHECK-NEXT: 5. 1 1.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 1.0 0.0 ld1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32 +# CHECK-NEXT: 7. 1 1.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 1.0 0.0 ld1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64 +# CHECK-NEXT: 9. 1 1.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.0 0.5 2.0 # CHECK: [11] Code Region - G12 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 1006 +# CHECK-NEXT: Total Cycles: 1007 # CHECK-NEXT: Total uOps: 3000 -# CHECK: Dispatch Width: 8 +# CHECK: Dispatch Width: 3 # CHECK-NEXT: uOps Per Cycle: 2.98 # CHECK-NEXT: IPC: 0.99 # CHECK-NEXT: Block RThroughput: 10.0 @@ -1681,15 +1681,15 @@ add x0, x27, 1 # CHECK-NEXT: Index 0123456789 # CHECK: [0,0] DeeeeeeER . .. ld1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64 -# CHECK-NEXT: [0,1] D=eE----R . .. add x0, x27, #1 -# CHECK-NEXT: [0,2] .DeeeeeeER. .. ld1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28 -# CHECK-NEXT: [0,3] .D=eE----R. .. add x0, x27, #1 -# CHECK-NEXT: [0,4] . D==eeeeeeER .. ld1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28 -# CHECK-NEXT: [0,5] . D===eE----R .. add x0, x27, #1 -# CHECK-NEXT: [0,6] . D==eeeeeeER .. ld1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28 -# CHECK-NEXT: [0,7] . D===eE----R .. add x0, x27, #1 -# CHECK-NEXT: [0,8] . D====eeeeeeER ld1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28 -# CHECK-NEXT: [0,9] . D=====eE----R add x0, x27, #1 +# CHECK-NEXT: [0,1] .DeE----R . .. add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeeeeeER .. ld1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28 +# CHECK-NEXT: [0,3] . DeE----R .. add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeeeeeER .. ld1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28 +# CHECK-NEXT: [0,5] . DeE----R .. add x0, x27, #1 +# CHECK-NEXT: [0,6] . .DeeeeeeER.. ld1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28 +# CHECK-NEXT: [0,7] . . DeE----R.. add x0, x27, #1 +# CHECK-NEXT: [0,8] . . DeeeeeeER ld1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28 +# CHECK-NEXT: [0,9] . . DeE----R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -1699,43 +1699,43 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64 -# CHECK-NEXT: 1. 1 2.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 1.0 0.0 0.0 ld1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28 -# CHECK-NEXT: 3. 1 2.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 3.0 2.0 0.0 ld1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28 -# CHECK-NEXT: 5. 1 4.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ld1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28 -# CHECK-NEXT: 7. 1 4.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 5.0 2.0 0.0 ld1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28 -# CHECK-NEXT: 9. 1 6.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 1 3.1 0.5 2.0 +# CHECK-NEXT: 1. 1 1.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 1.0 1.0 0.0 ld1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28 +# CHECK-NEXT: 3. 1 1.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 1.0 0.0 ld1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28 +# CHECK-NEXT: 5. 1 1.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 1.0 0.0 ld1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28 +# CHECK-NEXT: 7. 1 1.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 1.0 0.0 ld1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28 +# CHECK-NEXT: 9. 1 1.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.0 0.5 2.0 # CHECK: [12] Code Region - G13 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 1209 +# CHECK-NEXT: Total Cycles: 1210 # CHECK-NEXT: Total uOps: 2800 -# CHECK: Dispatch Width: 8 -# CHECK-NEXT: uOps Per Cycle: 2.32 +# CHECK: Dispatch Width: 3 +# CHECK-NEXT: uOps Per Cycle: 2.31 # CHECK-NEXT: IPC: 0.83 -# CHECK-NEXT: Block RThroughput: 8.5 +# CHECK-NEXT: Block RThroughput: 9.3 # CHECK: Timeline view: # CHECK-NEXT: 0123456789 -# CHECK-NEXT: Index 0123456789 0 - -# CHECK: [0,0] DeeeeeeER . . . ld1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28 -# CHECK-NEXT: [0,1] D=eE----R . . . add x0, x27, #1 -# CHECK-NEXT: [0,2] .DeeeeeeER. . . ld1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28 -# CHECK-NEXT: [0,3] .D=eE----R. . . add x0, x27, #1 -# CHECK-NEXT: [0,4] . D==eeeeeeER . . ld1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28 -# CHECK-NEXT: [0,5] . D===eE----R . . add x0, x27, #1 -# CHECK-NEXT: [0,6] . D==eeeeeeER . . ld1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28 -# CHECK-NEXT: [0,7] . D===eE----R . . add x0, x27, #1 -# CHECK-NEXT: [0,8] . D=======eeeeeeeER ld1 { v1.b }[0], [x27], #1 -# CHECK-NEXT: [0,9] . D========eE-----R add x0, x27, #1 +# CHECK-NEXT: Index 0123456789 01 + +# CHECK: [0,0] DeeeeeeER . . .. ld1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28 +# CHECK-NEXT: [0,1] .DeE----R . . .. add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeeeeeER . .. ld1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28 +# CHECK-NEXT: [0,3] . DeE----R . .. add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeeeeeER . .. ld1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28 +# CHECK-NEXT: [0,5] . DeE----R . .. add x0, x27, #1 +# CHECK-NEXT: [0,6] . .DeeeeeeER. .. ld1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28 +# CHECK-NEXT: [0,7] . . DeE----R. .. add x0, x27, #1 +# CHECK-NEXT: [0,8] . . D====eeeeeeeER ld1 { v1.b }[0], [x27], #1 +# CHECK-NEXT: [0,9] . . D====eE-----R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -1745,16 +1745,16 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28 -# CHECK-NEXT: 1. 1 2.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 1.0 0.0 0.0 ld1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28 -# CHECK-NEXT: 3. 1 2.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 3.0 2.0 0.0 ld1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28 -# CHECK-NEXT: 5. 1 4.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ld1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28 -# CHECK-NEXT: 7. 1 4.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 8.0 0.0 0.0 ld1 { v1.b }[0], [x27], #1 -# CHECK-NEXT: 9. 1 9.0 0.0 5.0 add x0, x27, #1 -# CHECK-NEXT: 1 3.7 0.3 2.1 +# CHECK-NEXT: 1. 1 1.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 1.0 1.0 0.0 ld1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28 +# CHECK-NEXT: 3. 1 1.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 1.0 0.0 ld1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28 +# CHECK-NEXT: 5. 1 1.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 1.0 0.0 ld1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28 +# CHECK-NEXT: 7. 1 1.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 5.0 0.0 0.0 ld1 { v1.b }[0], [x27], #1 +# CHECK-NEXT: 9. 1 5.0 0.0 5.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.8 0.4 2.1 # CHECK: [13] Code Region - G14 @@ -1763,25 +1763,25 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 3503 # CHECK-NEXT: Total uOps: 2000 -# CHECK: Dispatch Width: 8 +# CHECK: Dispatch Width: 3 # CHECK-NEXT: uOps Per Cycle: 0.57 # CHECK-NEXT: IPC: 0.29 -# CHECK-NEXT: Block RThroughput: 3.3 +# CHECK-NEXT: Block RThroughput: 6.7 # CHECK: Timeline view: # CHECK-NEXT: 0123456789 01234567 # CHECK-NEXT: Index 0123456789 0123456789 # CHECK: [0,0] DeeeeeeeER. . . . . . . ld1 { v1.b }[8], [x27], #1 -# CHECK-NEXT: [0,1] D=eE-----R. . . . . . . add x0, x27, #1 -# CHECK-NEXT: [0,2] D=======eeeeeeeER . . . . . ld1 { v1.b }[0], [x27], x28 -# CHECK-NEXT: [0,3] D========eE-----R . . . . . add x0, x27, #1 -# CHECK-NEXT: [0,4] .D=============eeeeeeeER . . . . ld1 { v1.b }[8], [x27], x28 -# CHECK-NEXT: [0,5] .D==============eE-----R . . . . add x0, x27, #1 -# CHECK-NEXT: [0,6] .D====================eeeeeeeER . . ld1 { v1.h }[0], [x27], #2 -# CHECK-NEXT: [0,7] .D=====================eE-----R . . add x0, x27, #1 -# CHECK-NEXT: [0,8] . D==========================eeeeeeeER ld1 { v1.h }[4], [x27], #2 -# CHECK-NEXT: [0,9] . D===========================eE-----R add x0, x27, #1 +# CHECK-NEXT: [0,1] .DeE-----R. . . . . . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . D=====eeeeeeeER . . . . . ld1 { v1.b }[0], [x27], x28 +# CHECK-NEXT: [0,3] . D=====eE-----R . . . . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . D==========eeeeeeeER . . . . ld1 { v1.b }[8], [x27], x28 +# CHECK-NEXT: [0,5] . D==========eE-----R . . . . add x0, x27, #1 +# CHECK-NEXT: [0,6] . .D===============eeeeeeeER . . ld1 { v1.h }[0], [x27], #2 +# CHECK-NEXT: [0,7] . . D===============eE-----R . . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . D====================eeeeeeeER ld1 { v1.h }[4], [x27], #2 +# CHECK-NEXT: [0,9] . . D====================eE-----R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -1791,16 +1791,16 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.b }[8], [x27], #1 -# CHECK-NEXT: 1. 1 2.0 0.0 5.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 8.0 0.0 0.0 ld1 { v1.b }[0], [x27], x28 -# CHECK-NEXT: 3. 1 9.0 0.0 5.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 14.0 0.0 0.0 ld1 { v1.b }[8], [x27], x28 -# CHECK-NEXT: 5. 1 15.0 0.0 5.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 21.0 0.0 0.0 ld1 { v1.h }[0], [x27], #2 -# CHECK-NEXT: 7. 1 22.0 0.0 5.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 27.0 0.0 0.0 ld1 { v1.h }[4], [x27], #2 -# CHECK-NEXT: 9. 1 28.0 0.0 5.0 add x0, x27, #1 -# CHECK-NEXT: 1 14.7 0.1 2.5 +# CHECK-NEXT: 1. 1 1.0 0.0 5.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 6.0 0.0 0.0 ld1 { v1.b }[0], [x27], x28 +# CHECK-NEXT: 3. 1 6.0 0.0 5.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 11.0 0.0 0.0 ld1 { v1.b }[8], [x27], x28 +# CHECK-NEXT: 5. 1 11.0 0.0 5.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 16.0 0.0 0.0 ld1 { v1.h }[0], [x27], #2 +# CHECK-NEXT: 7. 1 16.0 0.0 5.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 21.0 0.0 0.0 ld1 { v1.h }[4], [x27], #2 +# CHECK-NEXT: 9. 1 21.0 0.0 5.0 add x0, x27, #1 +# CHECK-NEXT: 1 11.0 0.1 2.5 # CHECK: [14] Code Region - G15 @@ -1809,25 +1809,25 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 3503 # CHECK-NEXT: Total uOps: 2000 -# CHECK: Dispatch Width: 8 +# CHECK: Dispatch Width: 3 # CHECK-NEXT: uOps Per Cycle: 0.57 # CHECK-NEXT: IPC: 0.29 -# CHECK-NEXT: Block RThroughput: 3.3 +# CHECK-NEXT: Block RThroughput: 6.7 # CHECK: Timeline view: # CHECK-NEXT: 0123456789 01234567 # CHECK-NEXT: Index 0123456789 0123456789 # CHECK: [0,0] DeeeeeeeER. . . . . . . ld1 { v1.h }[0], [x27], x28 -# CHECK-NEXT: [0,1] D=eE-----R. . . . . . . add x0, x27, #1 -# CHECK-NEXT: [0,2] D=======eeeeeeeER . . . . . ld1 { v1.h }[4], [x27], x28 -# CHECK-NEXT: [0,3] D========eE-----R . . . . . add x0, x27, #1 -# CHECK-NEXT: [0,4] .D=============eeeeeeeER . . . . ld1 { v1.s }[0], [x27], #4 -# CHECK-NEXT: [0,5] .D==============eE-----R . . . . add x0, x27, #1 -# CHECK-NEXT: [0,6] .D====================eeeeeeeER . . ld1 { v1.s }[0], [x27], x28 -# CHECK-NEXT: [0,7] .D=====================eE-----R . . add x0, x27, #1 -# CHECK-NEXT: [0,8] . D==========================eeeeeeeER ld1 { v1.d }[0], [x27], #8 -# CHECK-NEXT: [0,9] . D===========================eE-----R add x0, x27, #1 +# CHECK-NEXT: [0,1] .DeE-----R. . . . . . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . D=====eeeeeeeER . . . . . ld1 { v1.h }[4], [x27], x28 +# CHECK-NEXT: [0,3] . D=====eE-----R . . . . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . D==========eeeeeeeER . . . . ld1 { v1.s }[0], [x27], #4 +# CHECK-NEXT: [0,5] . D==========eE-----R . . . . add x0, x27, #1 +# CHECK-NEXT: [0,6] . .D===============eeeeeeeER . . ld1 { v1.s }[0], [x27], x28 +# CHECK-NEXT: [0,7] . . D===============eE-----R . . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . D====================eeeeeeeER ld1 { v1.d }[0], [x27], #8 +# CHECK-NEXT: [0,9] . . D====================eE-----R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -1837,43 +1837,43 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.h }[0], [x27], x28 -# CHECK-NEXT: 1. 1 2.0 0.0 5.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 8.0 0.0 0.0 ld1 { v1.h }[4], [x27], x28 -# CHECK-NEXT: 3. 1 9.0 0.0 5.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 14.0 0.0 0.0 ld1 { v1.s }[0], [x27], #4 -# CHECK-NEXT: 5. 1 15.0 0.0 5.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 21.0 0.0 0.0 ld1 { v1.s }[0], [x27], x28 -# CHECK-NEXT: 7. 1 22.0 0.0 5.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 27.0 0.0 0.0 ld1 { v1.d }[0], [x27], #8 -# CHECK-NEXT: 9. 1 28.0 0.0 5.0 add x0, x27, #1 -# CHECK-NEXT: 1 14.7 0.1 2.5 +# CHECK-NEXT: 1. 1 1.0 0.0 5.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 6.0 0.0 0.0 ld1 { v1.h }[4], [x27], x28 +# CHECK-NEXT: 3. 1 6.0 0.0 5.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 11.0 0.0 0.0 ld1 { v1.s }[0], [x27], #4 +# CHECK-NEXT: 5. 1 11.0 0.0 5.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 16.0 0.0 0.0 ld1 { v1.s }[0], [x27], x28 +# CHECK-NEXT: 7. 1 16.0 0.0 5.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 21.0 0.0 0.0 ld1 { v1.d }[0], [x27], #8 +# CHECK-NEXT: 9. 1 21.0 0.0 5.0 add x0, x27, #1 +# CHECK-NEXT: 1 11.0 0.1 2.5 # CHECK: [15] Code Region - G16 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 1103 +# CHECK-NEXT: Total Cycles: 1107 # CHECK-NEXT: Total uOps: 2000 -# CHECK: Dispatch Width: 8 +# CHECK: Dispatch Width: 3 # CHECK-NEXT: uOps Per Cycle: 1.81 -# CHECK-NEXT: IPC: 0.91 -# CHECK-NEXT: Block RThroughput: 3.3 +# CHECK-NEXT: IPC: 0.90 +# CHECK-NEXT: Block RThroughput: 6.7 # CHECK: Timeline view: -# CHECK-NEXT: 0123 +# CHECK-NEXT: 01234567 # CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeeeeeER. . ld1 { v1.d }[0], [x27], x28 -# CHECK-NEXT: [0,1] D=eE-----R. . add x0, x27, #1 -# CHECK-NEXT: [0,2] D=eeeeeeeER . ld1r { v1.1d }, [x27], #8 -# CHECK-NEXT: [0,3] D==eE-----R . add x0, x27, #1 -# CHECK-NEXT: [0,4] .D=eeeeeeeER . ld1r { v1.2d }, [x27], #8 -# CHECK-NEXT: [0,5] .D==eE-----R . add x0, x27, #1 -# CHECK-NEXT: [0,6] .D==eeeeeeeER. ld1r { v1.2s }, [x27], #4 -# CHECK-NEXT: [0,7] .D===eE-----R. add x0, x27, #1 -# CHECK-NEXT: [0,8] . D==eeeeeeeER ld1r { v1.4h }, [x27], #2 -# CHECK-NEXT: [0,9] . D===eE-----R add x0, x27, #1 +# CHECK: [0,0] DeeeeeeeER. . . ld1 { v1.d }[0], [x27], x28 +# CHECK-NEXT: [0,1] .DeE-----R. . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeeeeeeER . . ld1r { v1.1d }, [x27], #8 +# CHECK-NEXT: [0,3] . DeE-----R . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeeeeeeER . . ld1r { v1.2d }, [x27], #8 +# CHECK-NEXT: [0,5] . DeE-----R . . add x0, x27, #1 +# CHECK-NEXT: [0,6] . .DeeeeeeeER . ld1r { v1.2s }, [x27], #4 +# CHECK-NEXT: [0,7] . . DeE-----R . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . DeeeeeeeER ld1r { v1.4h }, [x27], #2 +# CHECK-NEXT: [0,9] . . DeE-----R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -1883,43 +1883,43 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.d }[0], [x27], x28 -# CHECK-NEXT: 1. 1 2.0 0.0 5.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld1r { v1.1d }, [x27], #8 -# CHECK-NEXT: 3. 1 3.0 0.0 5.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 2.0 0.0 0.0 ld1r { v1.2d }, [x27], #8 -# CHECK-NEXT: 5. 1 3.0 0.0 5.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ld1r { v1.2s }, [x27], #4 -# CHECK-NEXT: 7. 1 4.0 0.0 5.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 3.0 0.0 0.0 ld1r { v1.4h }, [x27], #2 -# CHECK-NEXT: 9. 1 4.0 0.0 5.0 add x0, x27, #1 -# CHECK-NEXT: 1 2.7 0.1 2.5 +# CHECK-NEXT: 1. 1 1.0 0.0 5.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 1.0 1.0 0.0 ld1r { v1.1d }, [x27], #8 +# CHECK-NEXT: 3. 1 1.0 0.0 5.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 1.0 0.0 ld1r { v1.2d }, [x27], #8 +# CHECK-NEXT: 5. 1 1.0 0.0 5.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 1.0 0.0 ld1r { v1.2s }, [x27], #4 +# CHECK-NEXT: 7. 1 1.0 0.0 5.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 1.0 0.0 ld1r { v1.4h }, [x27], #2 +# CHECK-NEXT: 9. 1 1.0 0.0 5.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.0 0.5 2.5 # CHECK: [16] Code Region - G17 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 509 +# CHECK-NEXT: Total Cycles: 1008 # CHECK-NEXT: Total uOps: 2000 -# CHECK: Dispatch Width: 8 -# CHECK-NEXT: uOps Per Cycle: 3.93 -# CHECK-NEXT: IPC: 1.96 -# CHECK-NEXT: Block RThroughput: 3.3 +# CHECK: Dispatch Width: 3 +# CHECK-NEXT: uOps Per Cycle: 1.98 +# CHECK-NEXT: IPC: 0.99 +# CHECK-NEXT: Block RThroughput: 6.7 # CHECK: Timeline view: -# CHECK-NEXT: 0123 +# CHECK-NEXT: 01234567 # CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeeeeeER. . ld1r { v1.4s }, [x27], #4 -# CHECK-NEXT: [0,1] D=eE-----R. . add x0, x27, #1 -# CHECK-NEXT: [0,2] D=eeeeeeeER . ld1r { v1.8b }, [x27], #1 -# CHECK-NEXT: [0,3] D==eE-----R . add x0, x27, #1 -# CHECK-NEXT: [0,4] .D=eeeeeeeER . ld1r { v1.8h }, [x27], #2 -# CHECK-NEXT: [0,5] .D==eE-----R . add x0, x27, #1 -# CHECK-NEXT: [0,6] .D==eeeeeeeER. ld1r { v1.16b }, [x27], #1 -# CHECK-NEXT: [0,7] .D===eE-----R. add x0, x27, #1 -# CHECK-NEXT: [0,8] . D==eeeeeeeER ld1r { v1.1d }, [x27], x28 -# CHECK-NEXT: [0,9] . D===eE-----R add x0, x27, #1 +# CHECK: [0,0] DeeeeeeeER. . . ld1r { v1.4s }, [x27], #4 +# CHECK-NEXT: [0,1] .DeE-----R. . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeeeeeeER . . ld1r { v1.8b }, [x27], #1 +# CHECK-NEXT: [0,3] . DeE-----R . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeeeeeeER . . ld1r { v1.8h }, [x27], #2 +# CHECK-NEXT: [0,5] . DeE-----R . . add x0, x27, #1 +# CHECK-NEXT: [0,6] . .DeeeeeeeER . ld1r { v1.16b }, [x27], #1 +# CHECK-NEXT: [0,7] . . DeE-----R . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . DeeeeeeeER ld1r { v1.1d }, [x27], x28 +# CHECK-NEXT: [0,9] . . DeE-----R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -1929,43 +1929,43 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1r { v1.4s }, [x27], #4 -# CHECK-NEXT: 1. 1 2.0 0.0 5.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld1r { v1.8b }, [x27], #1 -# CHECK-NEXT: 3. 1 3.0 0.0 5.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 2.0 0.0 0.0 ld1r { v1.8h }, [x27], #2 -# CHECK-NEXT: 5. 1 3.0 0.0 5.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ld1r { v1.16b }, [x27], #1 -# CHECK-NEXT: 7. 1 4.0 0.0 5.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 3.0 0.0 0.0 ld1r { v1.1d }, [x27], x28 -# CHECK-NEXT: 9. 1 4.0 0.0 5.0 add x0, x27, #1 -# CHECK-NEXT: 1 2.7 0.1 2.5 +# CHECK-NEXT: 1. 1 1.0 0.0 5.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 1.0 1.0 0.0 ld1r { v1.8b }, [x27], #1 +# CHECK-NEXT: 3. 1 1.0 0.0 5.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 1.0 0.0 ld1r { v1.8h }, [x27], #2 +# CHECK-NEXT: 5. 1 1.0 0.0 5.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 1.0 0.0 ld1r { v1.16b }, [x27], #1 +# CHECK-NEXT: 7. 1 1.0 0.0 5.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 1.0 0.0 ld1r { v1.1d }, [x27], x28 +# CHECK-NEXT: 9. 1 1.0 0.0 5.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.0 0.5 2.5 # CHECK: [17] Code Region - G18 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 509 +# CHECK-NEXT: Total Cycles: 1008 # CHECK-NEXT: Total uOps: 2000 -# CHECK: Dispatch Width: 8 -# CHECK-NEXT: uOps Per Cycle: 3.93 -# CHECK-NEXT: IPC: 1.96 -# CHECK-NEXT: Block RThroughput: 3.3 +# CHECK: Dispatch Width: 3 +# CHECK-NEXT: uOps Per Cycle: 1.98 +# CHECK-NEXT: IPC: 0.99 +# CHECK-NEXT: Block RThroughput: 6.7 # CHECK: Timeline view: -# CHECK-NEXT: 0123 +# CHECK-NEXT: 01234567 # CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeeeeeER. . ld1r { v1.2d }, [x27], x28 -# CHECK-NEXT: [0,1] D=eE-----R. . add x0, x27, #1 -# CHECK-NEXT: [0,2] D=eeeeeeeER . ld1r { v1.2s }, [x27], x28 -# CHECK-NEXT: [0,3] D==eE-----R . add x0, x27, #1 -# CHECK-NEXT: [0,4] .D=eeeeeeeER . ld1r { v1.4h }, [x27], x28 -# CHECK-NEXT: [0,5] .D==eE-----R . add x0, x27, #1 -# CHECK-NEXT: [0,6] .D==eeeeeeeER. ld1r { v1.4s }, [x27], x28 -# CHECK-NEXT: [0,7] .D===eE-----R. add x0, x27, #1 -# CHECK-NEXT: [0,8] . D==eeeeeeeER ld1r { v1.8b }, [x27], x28 -# CHECK-NEXT: [0,9] . D===eE-----R add x0, x27, #1 +# CHECK: [0,0] DeeeeeeeER. . . ld1r { v1.2d }, [x27], x28 +# CHECK-NEXT: [0,1] .DeE-----R. . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeeeeeeER . . ld1r { v1.2s }, [x27], x28 +# CHECK-NEXT: [0,3] . DeE-----R . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeeeeeeER . . ld1r { v1.4h }, [x27], x28 +# CHECK-NEXT: [0,5] . DeE-----R . . add x0, x27, #1 +# CHECK-NEXT: [0,6] . .DeeeeeeeER . ld1r { v1.4s }, [x27], x28 +# CHECK-NEXT: [0,7] . . DeE-----R . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . DeeeeeeeER ld1r { v1.8b }, [x27], x28 +# CHECK-NEXT: [0,9] . . DeE-----R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -1975,43 +1975,43 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1r { v1.2d }, [x27], x28 -# CHECK-NEXT: 1. 1 2.0 0.0 5.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld1r { v1.2s }, [x27], x28 -# CHECK-NEXT: 3. 1 3.0 0.0 5.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 2.0 0.0 0.0 ld1r { v1.4h }, [x27], x28 -# CHECK-NEXT: 5. 1 3.0 0.0 5.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ld1r { v1.4s }, [x27], x28 -# CHECK-NEXT: 7. 1 4.0 0.0 5.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 3.0 0.0 0.0 ld1r { v1.8b }, [x27], x28 -# CHECK-NEXT: 9. 1 4.0 0.0 5.0 add x0, x27, #1 -# CHECK-NEXT: 1 2.7 0.1 2.5 +# CHECK-NEXT: 1. 1 1.0 0.0 5.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 1.0 1.0 0.0 ld1r { v1.2s }, [x27], x28 +# CHECK-NEXT: 3. 1 1.0 0.0 5.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 1.0 0.0 ld1r { v1.4h }, [x27], x28 +# CHECK-NEXT: 5. 1 1.0 0.0 5.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 1.0 0.0 ld1r { v1.4s }, [x27], x28 +# CHECK-NEXT: 7. 1 1.0 0.0 5.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 1.0 0.0 ld1r { v1.8b }, [x27], x28 +# CHECK-NEXT: 9. 1 1.0 0.0 5.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.0 0.5 2.5 # CHECK: [18] Code Region - G19 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 509 +# CHECK-NEXT: Total Cycles: 1008 # CHECK-NEXT: Total uOps: 2600 -# CHECK: Dispatch Width: 8 -# CHECK-NEXT: uOps Per Cycle: 5.11 -# CHECK-NEXT: IPC: 1.96 -# CHECK-NEXT: Block RThroughput: 4.0 +# CHECK: Dispatch Width: 3 +# CHECK-NEXT: uOps Per Cycle: 2.58 +# CHECK-NEXT: IPC: 0.99 +# CHECK-NEXT: Block RThroughput: 8.7 # CHECK: Timeline view: -# CHECK-NEXT: 0123 +# CHECK-NEXT: 01234567 # CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeeeeeER. . ld1r { v1.8h }, [x27], x28 -# CHECK-NEXT: [0,1] D=eE-----R. . add x0, x27, #1 -# CHECK-NEXT: [0,2] D=eeeeeeeER . ld1r { v1.16b }, [x27], x28 -# CHECK-NEXT: [0,3] D==eE-----R . add x0, x27, #1 -# CHECK-NEXT: [0,4] .D=eeeeeeeER . ld2 { v1.2d, v2.2d }, [x27], #32 -# CHECK-NEXT: [0,5] .D==eE-----R . add x0, x27, #1 -# CHECK-NEXT: [0,6] . D=eeeeeeeER. ld2 { v1.2s, v2.2s }, [x27], #16 -# CHECK-NEXT: [0,7] . D==eE-----R. add x0, x27, #1 -# CHECK-NEXT: [0,8] . D=eeeeeeeER ld2 { v1.4h, v2.4h }, [x27], #16 -# CHECK-NEXT: [0,9] . D==eE-----R add x0, x27, #1 +# CHECK: [0,0] DeeeeeeeER. . . ld1r { v1.8h }, [x27], x28 +# CHECK-NEXT: [0,1] .DeE-----R. . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeeeeeeER . . ld1r { v1.16b }, [x27], x28 +# CHECK-NEXT: [0,3] . DeE-----R . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeeeeeeER . . ld2 { v1.2d, v2.2d }, [x27], #32 +# CHECK-NEXT: [0,5] . DeE-----R . . add x0, x27, #1 +# CHECK-NEXT: [0,6] . .DeeeeeeeER . ld2 { v1.2s, v2.2s }, [x27], #16 +# CHECK-NEXT: [0,7] . . DeE-----R . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . DeeeeeeeER ld2 { v1.4h, v2.4h }, [x27], #16 +# CHECK-NEXT: [0,9] . . DeE-----R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -2021,43 +2021,43 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1r { v1.8h }, [x27], x28 -# CHECK-NEXT: 1. 1 2.0 0.0 5.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld1r { v1.16b }, [x27], x28 -# CHECK-NEXT: 3. 1 3.0 0.0 5.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 2.0 0.0 0.0 ld2 { v1.2d, v2.2d }, [x27], #32 -# CHECK-NEXT: 5. 1 3.0 0.0 5.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 2.0 0.0 0.0 ld2 { v1.2s, v2.2s }, [x27], #16 -# CHECK-NEXT: 7. 1 3.0 0.0 5.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 2.0 0.0 0.0 ld2 { v1.4h, v2.4h }, [x27], #16 -# CHECK-NEXT: 9. 1 3.0 0.0 5.0 add x0, x27, #1 -# CHECK-NEXT: 1 2.3 0.1 2.5 +# CHECK-NEXT: 1. 1 1.0 0.0 5.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 1.0 1.0 0.0 ld1r { v1.16b }, [x27], x28 +# CHECK-NEXT: 3. 1 1.0 0.0 5.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 1.0 0.0 ld2 { v1.2d, v2.2d }, [x27], #32 +# CHECK-NEXT: 5. 1 1.0 0.0 5.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 1.0 0.0 ld2 { v1.2s, v2.2s }, [x27], #16 +# CHECK-NEXT: 7. 1 1.0 0.0 5.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 1.0 0.0 ld2 { v1.4h, v2.4h }, [x27], #16 +# CHECK-NEXT: 9. 1 1.0 0.0 5.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.0 0.5 2.5 # CHECK: [19] Code Region - G20 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 509 +# CHECK-NEXT: Total Cycles: 1008 # CHECK-NEXT: Total uOps: 3000 -# CHECK: Dispatch Width: 8 -# CHECK-NEXT: uOps Per Cycle: 5.89 -# CHECK-NEXT: IPC: 1.96 -# CHECK-NEXT: Block RThroughput: 5.0 +# CHECK: Dispatch Width: 3 +# CHECK-NEXT: uOps Per Cycle: 2.98 +# CHECK-NEXT: IPC: 0.99 +# CHECK-NEXT: Block RThroughput: 10.0 # CHECK: Timeline view: -# CHECK-NEXT: 0123 +# CHECK-NEXT: 01234567 # CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeeeeeER. . ld2 { v1.4s, v2.4s }, [x27], #32 -# CHECK-NEXT: [0,1] D=eE-----R. . add x0, x27, #1 -# CHECK-NEXT: [0,2] .DeeeeeeeER . ld2 { v1.8b, v2.8b }, [x27], #16 -# CHECK-NEXT: [0,3] .D=eE-----R . add x0, x27, #1 -# CHECK-NEXT: [0,4] . DeeeeeeeER . ld2 { v1.8h, v2.8h }, [x27], #32 -# CHECK-NEXT: [0,5] . D=eE-----R . add x0, x27, #1 -# CHECK-NEXT: [0,6] . DeeeeeeeER. ld2 { v1.16b, v2.16b }, [x27], #32 -# CHECK-NEXT: [0,7] . D=eE-----R. add x0, x27, #1 -# CHECK-NEXT: [0,8] . DeeeeeeeER ld2 { v1.2d, v2.2d }, [x27], x28 -# CHECK-NEXT: [0,9] . D=eE-----R add x0, x27, #1 +# CHECK: [0,0] DeeeeeeeER. . . ld2 { v1.4s, v2.4s }, [x27], #32 +# CHECK-NEXT: [0,1] .DeE-----R. . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeeeeeeER . . ld2 { v1.8b, v2.8b }, [x27], #16 +# CHECK-NEXT: [0,3] . DeE-----R . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeeeeeeER . . ld2 { v1.8h, v2.8h }, [x27], #32 +# CHECK-NEXT: [0,5] . DeE-----R . . add x0, x27, #1 +# CHECK-NEXT: [0,6] . .DeeeeeeeER . ld2 { v1.16b, v2.16b }, [x27], #32 +# CHECK-NEXT: [0,7] . . DeE-----R . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . DeeeeeeeER ld2 { v1.2d, v2.2d }, [x27], x28 +# CHECK-NEXT: [0,9] . . DeE-----R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -2067,43 +2067,43 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld2 { v1.4s, v2.4s }, [x27], #32 -# CHECK-NEXT: 1. 1 2.0 0.0 5.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 1.0 0.0 0.0 ld2 { v1.8b, v2.8b }, [x27], #16 -# CHECK-NEXT: 3. 1 2.0 0.0 5.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 1.0 0.0 0.0 ld2 { v1.8h, v2.8h }, [x27], #32 -# CHECK-NEXT: 5. 1 2.0 0.0 5.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 1.0 0.0 0.0 ld2 { v1.16b, v2.16b }, [x27], #32 -# CHECK-NEXT: 7. 1 2.0 0.0 5.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 1.0 0.0 0.0 ld2 { v1.2d, v2.2d }, [x27], x28 -# CHECK-NEXT: 9. 1 2.0 0.0 5.0 add x0, x27, #1 -# CHECK-NEXT: 1 1.5 0.1 2.5 +# CHECK-NEXT: 1. 1 1.0 0.0 5.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 1.0 1.0 0.0 ld2 { v1.8b, v2.8b }, [x27], #16 +# CHECK-NEXT: 3. 1 1.0 0.0 5.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 1.0 0.0 ld2 { v1.8h, v2.8h }, [x27], #32 +# CHECK-NEXT: 5. 1 1.0 0.0 5.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 1.0 0.0 ld2 { v1.16b, v2.16b }, [x27], #32 +# CHECK-NEXT: 7. 1 1.0 0.0 5.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 1.0 0.0 ld2 { v1.2d, v2.2d }, [x27], x28 +# CHECK-NEXT: 9. 1 1.0 0.0 5.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.0 0.5 2.5 # CHECK: [20] Code Region - G21 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 509 +# CHECK-NEXT: Total Cycles: 1008 # CHECK-NEXT: Total uOps: 3000 -# CHECK: Dispatch Width: 8 -# CHECK-NEXT: uOps Per Cycle: 5.89 -# CHECK-NEXT: IPC: 1.96 -# CHECK-NEXT: Block RThroughput: 5.0 +# CHECK: Dispatch Width: 3 +# CHECK-NEXT: uOps Per Cycle: 2.98 +# CHECK-NEXT: IPC: 0.99 +# CHECK-NEXT: Block RThroughput: 10.0 # CHECK: Timeline view: -# CHECK-NEXT: 0123 +# CHECK-NEXT: 01234567 # CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeeeeeER. . ld2 { v1.2s, v2.2s }, [x27], x28 -# CHECK-NEXT: [0,1] D=eE-----R. . add x0, x27, #1 -# CHECK-NEXT: [0,2] .DeeeeeeeER . ld2 { v1.4h, v2.4h }, [x27], x28 -# CHECK-NEXT: [0,3] .D=eE-----R . add x0, x27, #1 -# CHECK-NEXT: [0,4] . DeeeeeeeER . ld2 { v1.4s, v2.4s }, [x27], x28 -# CHECK-NEXT: [0,5] . D=eE-----R . add x0, x27, #1 -# CHECK-NEXT: [0,6] . DeeeeeeeER. ld2 { v1.8b, v2.8b }, [x27], x28 -# CHECK-NEXT: [0,7] . D=eE-----R. add x0, x27, #1 -# CHECK-NEXT: [0,8] . DeeeeeeeER ld2 { v1.8h, v2.8h }, [x27], x28 -# CHECK-NEXT: [0,9] . D=eE-----R add x0, x27, #1 +# CHECK: [0,0] DeeeeeeeER. . . ld2 { v1.2s, v2.2s }, [x27], x28 +# CHECK-NEXT: [0,1] .DeE-----R. . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeeeeeeER . . ld2 { v1.4h, v2.4h }, [x27], x28 +# CHECK-NEXT: [0,3] . DeE-----R . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeeeeeeER . . ld2 { v1.4s, v2.4s }, [x27], x28 +# CHECK-NEXT: [0,5] . DeE-----R . . add x0, x27, #1 +# CHECK-NEXT: [0,6] . .DeeeeeeeER . ld2 { v1.8b, v2.8b }, [x27], x28 +# CHECK-NEXT: [0,7] . . DeE-----R . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . DeeeeeeeER ld2 { v1.8h, v2.8h }, [x27], x28 +# CHECK-NEXT: [0,9] . . DeE-----R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -2113,16 +2113,16 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld2 { v1.2s, v2.2s }, [x27], x28 -# CHECK-NEXT: 1. 1 2.0 0.0 5.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 1.0 0.0 0.0 ld2 { v1.4h, v2.4h }, [x27], x28 -# CHECK-NEXT: 3. 1 2.0 0.0 5.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 1.0 0.0 0.0 ld2 { v1.4s, v2.4s }, [x27], x28 -# CHECK-NEXT: 5. 1 2.0 0.0 5.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 1.0 0.0 0.0 ld2 { v1.8b, v2.8b }, [x27], x28 -# CHECK-NEXT: 7. 1 2.0 0.0 5.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 1.0 0.0 0.0 ld2 { v1.8h, v2.8h }, [x27], x28 -# CHECK-NEXT: 9. 1 2.0 0.0 5.0 add x0, x27, #1 -# CHECK-NEXT: 1 1.5 0.1 2.5 +# CHECK-NEXT: 1. 1 1.0 0.0 5.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 1.0 1.0 0.0 ld2 { v1.4h, v2.4h }, [x27], x28 +# CHECK-NEXT: 3. 1 1.0 0.0 5.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 1.0 0.0 ld2 { v1.4s, v2.4s }, [x27], x28 +# CHECK-NEXT: 5. 1 1.0 0.0 5.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 1.0 0.0 ld2 { v1.8b, v2.8b }, [x27], x28 +# CHECK-NEXT: 7. 1 1.0 0.0 5.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 1.0 0.0 ld2 { v1.8h, v2.8h }, [x27], x28 +# CHECK-NEXT: 9. 1 1.0 0.0 5.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.0 0.5 2.5 # CHECK: [21] Code Region - G22 @@ -2131,25 +2131,25 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 2909 # CHECK-NEXT: Total uOps: 3000 -# CHECK: Dispatch Width: 8 +# CHECK: Dispatch Width: 3 # CHECK-NEXT: uOps Per Cycle: 1.03 # CHECK-NEXT: IPC: 0.34 -# CHECK-NEXT: Block RThroughput: 5.0 +# CHECK-NEXT: Block RThroughput: 10.0 # CHECK: Timeline view: # CHECK-NEXT: 0123456789 01234567 # CHECK-NEXT: Index 0123456789 0123456789 # CHECK: [0,0] DeeeeeeeER. . . . . . . ld2 { v1.16b, v2.16b }, [x27], x28 -# CHECK-NEXT: [0,1] D=eE-----R. . . . . . . add x0, x27, #1 -# CHECK-NEXT: [0,2] .D======eeeeeeeER . . . . . ld2 { v1.b, v2.b }[0], [x27], #2 -# CHECK-NEXT: [0,3] .D=======eE-----R . . . . . add x0, x27, #1 -# CHECK-NEXT: [0,4] . D============eeeeeeeER . . . . ld2 { v1.b, v2.b }[8], [x27], #2 -# CHECK-NEXT: [0,5] . D=============eE-----R . . . . add x0, x27, #1 -# CHECK-NEXT: [0,6] . D==================eeeeeeeER . . ld2 { v1.b, v2.b }[0], [x27], x28 -# CHECK-NEXT: [0,7] . D===================eE-----R . . add x0, x27, #1 -# CHECK-NEXT: [0,8] . D========================eeeeeeeER ld2 { v1.b, v2.b }[8], [x27], x28 -# CHECK-NEXT: [0,9] . D=========================eE-----R add x0, x27, #1 +# CHECK-NEXT: [0,1] .DeE-----R. . . . . . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . D=====eeeeeeeER . . . . . ld2 { v1.b, v2.b }[0], [x27], #2 +# CHECK-NEXT: [0,3] . D=====eE-----R . . . . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . D==========eeeeeeeER . . . . ld2 { v1.b, v2.b }[8], [x27], #2 +# CHECK-NEXT: [0,5] . D==========eE-----R . . . . add x0, x27, #1 +# CHECK-NEXT: [0,6] . .D===============eeeeeeeER . . ld2 { v1.b, v2.b }[0], [x27], x28 +# CHECK-NEXT: [0,7] . . D===============eE-----R . . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . D====================eeeeeeeER ld2 { v1.b, v2.b }[8], [x27], x28 +# CHECK-NEXT: [0,9] . . D====================eE-----R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -2159,16 +2159,16 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld2 { v1.16b, v2.16b }, [x27], x28 -# CHECK-NEXT: 1. 1 2.0 0.0 5.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 7.0 0.0 0.0 ld2 { v1.b, v2.b }[0], [x27], #2 -# CHECK-NEXT: 3. 1 8.0 0.0 5.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 13.0 0.0 0.0 ld2 { v1.b, v2.b }[8], [x27], #2 -# CHECK-NEXT: 5. 1 14.0 0.0 5.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 19.0 0.0 0.0 ld2 { v1.b, v2.b }[0], [x27], x28 -# CHECK-NEXT: 7. 1 20.0 0.0 5.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 25.0 0.0 0.0 ld2 { v1.b, v2.b }[8], [x27], x28 -# CHECK-NEXT: 9. 1 26.0 0.0 5.0 add x0, x27, #1 -# CHECK-NEXT: 1 13.5 0.1 2.5 +# CHECK-NEXT: 1. 1 1.0 0.0 5.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 6.0 0.0 0.0 ld2 { v1.b, v2.b }[0], [x27], #2 +# CHECK-NEXT: 3. 1 6.0 0.0 5.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 11.0 0.0 0.0 ld2 { v1.b, v2.b }[8], [x27], #2 +# CHECK-NEXT: 5. 1 11.0 0.0 5.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 16.0 0.0 0.0 ld2 { v1.b, v2.b }[0], [x27], x28 +# CHECK-NEXT: 7. 1 16.0 0.0 5.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 21.0 0.0 0.0 ld2 { v1.b, v2.b }[8], [x27], x28 +# CHECK-NEXT: 9. 1 21.0 0.0 5.0 add x0, x27, #1 +# CHECK-NEXT: 1 11.0 0.1 2.5 # CHECK: [22] Code Region - G23 @@ -2177,25 +2177,25 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 3503 # CHECK-NEXT: Total uOps: 3000 -# CHECK: Dispatch Width: 8 +# CHECK: Dispatch Width: 3 # CHECK-NEXT: uOps Per Cycle: 0.86 # CHECK-NEXT: IPC: 0.29 -# CHECK-NEXT: Block RThroughput: 5.0 +# CHECK-NEXT: Block RThroughput: 10.0 # CHECK: Timeline view: # CHECK-NEXT: 0123456789 01234567 # CHECK-NEXT: Index 0123456789 0123456789 # CHECK: [0,0] DeeeeeeeER. . . . . . . ld2 { v1.h, v2.h }[0], [x27], #4 -# CHECK-NEXT: [0,1] D=eE-----R. . . . . . . add x0, x27, #1 -# CHECK-NEXT: [0,2] .D======eeeeeeeER . . . . . ld2 { v1.h, v2.h }[4], [x27], #4 -# CHECK-NEXT: [0,3] .D=======eE-----R . . . . . add x0, x27, #1 -# CHECK-NEXT: [0,4] . D============eeeeeeeER . . . . ld2 { v1.h, v2.h }[0], [x27], x28 -# CHECK-NEXT: [0,5] . D=============eE-----R . . . . add x0, x27, #1 -# CHECK-NEXT: [0,6] . D==================eeeeeeeER . . ld2 { v1.h, v2.h }[4], [x27], x28 -# CHECK-NEXT: [0,7] . D===================eE-----R . . add x0, x27, #1 -# CHECK-NEXT: [0,8] . D========================eeeeeeeER ld2 { v1.s, v2.s }[0], [x27], #8 -# CHECK-NEXT: [0,9] . D=========================eE-----R add x0, x27, #1 +# CHECK-NEXT: [0,1] .DeE-----R. . . . . . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . D=====eeeeeeeER . . . . . ld2 { v1.h, v2.h }[4], [x27], #4 +# CHECK-NEXT: [0,3] . D=====eE-----R . . . . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . D==========eeeeeeeER . . . . ld2 { v1.h, v2.h }[0], [x27], x28 +# CHECK-NEXT: [0,5] . D==========eE-----R . . . . add x0, x27, #1 +# CHECK-NEXT: [0,6] . .D===============eeeeeeeER . . ld2 { v1.h, v2.h }[4], [x27], x28 +# CHECK-NEXT: [0,7] . . D===============eE-----R . . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . D====================eeeeeeeER ld2 { v1.s, v2.s }[0], [x27], #8 +# CHECK-NEXT: [0,9] . . D====================eE-----R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -2205,16 +2205,16 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld2 { v1.h, v2.h }[0], [x27], #4 -# CHECK-NEXT: 1. 1 2.0 0.0 5.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 7.0 0.0 0.0 ld2 { v1.h, v2.h }[4], [x27], #4 -# CHECK-NEXT: 3. 1 8.0 0.0 5.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 13.0 0.0 0.0 ld2 { v1.h, v2.h }[0], [x27], x28 -# CHECK-NEXT: 5. 1 14.0 0.0 5.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 19.0 0.0 0.0 ld2 { v1.h, v2.h }[4], [x27], x28 -# CHECK-NEXT: 7. 1 20.0 0.0 5.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 25.0 0.0 0.0 ld2 { v1.s, v2.s }[0], [x27], #8 -# CHECK-NEXT: 9. 1 26.0 0.0 5.0 add x0, x27, #1 -# CHECK-NEXT: 1 13.5 0.1 2.5 +# CHECK-NEXT: 1. 1 1.0 0.0 5.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 6.0 0.0 0.0 ld2 { v1.h, v2.h }[4], [x27], #4 +# CHECK-NEXT: 3. 1 6.0 0.0 5.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 11.0 0.0 0.0 ld2 { v1.h, v2.h }[0], [x27], x28 +# CHECK-NEXT: 5. 1 11.0 0.0 5.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 16.0 0.0 0.0 ld2 { v1.h, v2.h }[4], [x27], x28 +# CHECK-NEXT: 7. 1 16.0 0.0 5.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 21.0 0.0 0.0 ld2 { v1.s, v2.s }[0], [x27], #8 +# CHECK-NEXT: 9. 1 21.0 0.0 5.0 add x0, x27, #1 +# CHECK-NEXT: 1 11.0 0.1 2.5 # CHECK: [23] Code Region - G24 @@ -2223,25 +2223,25 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 2303 # CHECK-NEXT: Total uOps: 3000 -# CHECK: Dispatch Width: 8 +# CHECK: Dispatch Width: 3 # CHECK-NEXT: uOps Per Cycle: 1.30 # CHECK-NEXT: IPC: 0.43 -# CHECK-NEXT: Block RThroughput: 5.0 +# CHECK-NEXT: Block RThroughput: 10.0 # CHECK: Timeline view: # CHECK-NEXT: 0123456789 # CHECK-NEXT: Index 0123456789 012345 # CHECK: [0,0] DeeeeeeeER. . . . ld2 { v1.s, v2.s }[0], [x27], x28 -# CHECK-NEXT: [0,1] D=eE-----R. . . . add x0, x27, #1 -# CHECK-NEXT: [0,2] .D======eeeeeeeER . . ld2 { v1.d, v2.d }[0], [x27], #16 -# CHECK-NEXT: [0,3] .D=======eE-----R . . add x0, x27, #1 -# CHECK-NEXT: [0,4] . D============eeeeeeeER . ld2 { v1.d, v2.d }[0], [x27], x28 -# CHECK-NEXT: [0,5] . D=============eE-----R . add x0, x27, #1 -# CHECK-NEXT: [0,6] . D============eeeeeeeER. ld2r { v1.1d, v2.1d }, [x27], #16 -# CHECK-NEXT: [0,7] . D=============eE-----R. add x0, x27, #1 -# CHECK-NEXT: [0,8] . D============eeeeeeeER ld2r { v1.2d, v2.2d }, [x27], #16 -# CHECK-NEXT: [0,9] . D=============eE-----R add x0, x27, #1 +# CHECK-NEXT: [0,1] .DeE-----R. . . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . D=====eeeeeeeER . . ld2 { v1.d, v2.d }[0], [x27], #16 +# CHECK-NEXT: [0,3] . D=====eE-----R . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . D==========eeeeeeeER . ld2 { v1.d, v2.d }[0], [x27], x28 +# CHECK-NEXT: [0,5] . D==========eE-----R . add x0, x27, #1 +# CHECK-NEXT: [0,6] . .D=========eeeeeeeER. ld2r { v1.1d, v2.1d }, [x27], #16 +# CHECK-NEXT: [0,7] . . D=========eE-----R. add x0, x27, #1 +# CHECK-NEXT: [0,8] . . D========eeeeeeeER ld2r { v1.2d, v2.2d }, [x27], #16 +# CHECK-NEXT: [0,9] . . D========eE-----R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -2251,43 +2251,43 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld2 { v1.s, v2.s }[0], [x27], x28 -# CHECK-NEXT: 1. 1 2.0 0.0 5.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 7.0 0.0 0.0 ld2 { v1.d, v2.d }[0], [x27], #16 -# CHECK-NEXT: 3. 1 8.0 0.0 5.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 13.0 0.0 0.0 ld2 { v1.d, v2.d }[0], [x27], x28 -# CHECK-NEXT: 5. 1 14.0 0.0 5.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 13.0 0.0 0.0 ld2r { v1.1d, v2.1d }, [x27], #16 -# CHECK-NEXT: 7. 1 14.0 0.0 5.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 13.0 0.0 0.0 ld2r { v1.2d, v2.2d }, [x27], #16 -# CHECK-NEXT: 9. 1 14.0 0.0 5.0 add x0, x27, #1 -# CHECK-NEXT: 1 9.9 0.1 2.5 +# CHECK-NEXT: 1. 1 1.0 0.0 5.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 6.0 0.0 0.0 ld2 { v1.d, v2.d }[0], [x27], #16 +# CHECK-NEXT: 3. 1 6.0 0.0 5.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 11.0 0.0 0.0 ld2 { v1.d, v2.d }[0], [x27], x28 +# CHECK-NEXT: 5. 1 11.0 0.0 5.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 10.0 0.0 0.0 ld2r { v1.1d, v2.1d }, [x27], #16 +# CHECK-NEXT: 7. 1 10.0 0.0 5.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 9.0 0.0 0.0 ld2r { v1.2d, v2.2d }, [x27], #16 +# CHECK-NEXT: 9. 1 9.0 0.0 5.0 add x0, x27, #1 +# CHECK-NEXT: 1 7.4 0.1 2.5 # CHECK: [24] Code Region - G25 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 509 +# CHECK-NEXT: Total Cycles: 1008 # CHECK-NEXT: Total uOps: 3000 -# CHECK: Dispatch Width: 8 -# CHECK-NEXT: uOps Per Cycle: 5.89 -# CHECK-NEXT: IPC: 1.96 -# CHECK-NEXT: Block RThroughput: 5.0 +# CHECK: Dispatch Width: 3 +# CHECK-NEXT: uOps Per Cycle: 2.98 +# CHECK-NEXT: IPC: 0.99 +# CHECK-NEXT: Block RThroughput: 10.0 # CHECK: Timeline view: -# CHECK-NEXT: 0123 +# CHECK-NEXT: 01234567 # CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeeeeeER. . ld2r { v1.2s, v2.2s }, [x27], #8 -# CHECK-NEXT: [0,1] D=eE-----R. . add x0, x27, #1 -# CHECK-NEXT: [0,2] .DeeeeeeeER . ld2r { v1.4h, v2.4h }, [x27], #4 -# CHECK-NEXT: [0,3] .D=eE-----R . add x0, x27, #1 -# CHECK-NEXT: [0,4] . DeeeeeeeER . ld2r { v1.4s, v2.4s }, [x27], #8 -# CHECK-NEXT: [0,5] . D=eE-----R . add x0, x27, #1 -# CHECK-NEXT: [0,6] . DeeeeeeeER. ld2r { v1.8b, v2.8b }, [x27], #2 -# CHECK-NEXT: [0,7] . D=eE-----R. add x0, x27, #1 -# CHECK-NEXT: [0,8] . DeeeeeeeER ld2r { v1.8h, v2.8h }, [x27], #4 -# CHECK-NEXT: [0,9] . D=eE-----R add x0, x27, #1 +# CHECK: [0,0] DeeeeeeeER. . . ld2r { v1.2s, v2.2s }, [x27], #8 +# CHECK-NEXT: [0,1] .DeE-----R. . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeeeeeeER . . ld2r { v1.4h, v2.4h }, [x27], #4 +# CHECK-NEXT: [0,3] . DeE-----R . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeeeeeeER . . ld2r { v1.4s, v2.4s }, [x27], #8 +# CHECK-NEXT: [0,5] . DeE-----R . . add x0, x27, #1 +# CHECK-NEXT: [0,6] . .DeeeeeeeER . ld2r { v1.8b, v2.8b }, [x27], #2 +# CHECK-NEXT: [0,7] . . DeE-----R . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . DeeeeeeeER ld2r { v1.8h, v2.8h }, [x27], #4 +# CHECK-NEXT: [0,9] . . DeE-----R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -2297,43 +2297,43 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld2r { v1.2s, v2.2s }, [x27], #8 -# CHECK-NEXT: 1. 1 2.0 0.0 5.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 1.0 0.0 0.0 ld2r { v1.4h, v2.4h }, [x27], #4 -# CHECK-NEXT: 3. 1 2.0 0.0 5.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 1.0 0.0 0.0 ld2r { v1.4s, v2.4s }, [x27], #8 -# CHECK-NEXT: 5. 1 2.0 0.0 5.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 1.0 0.0 0.0 ld2r { v1.8b, v2.8b }, [x27], #2 -# CHECK-NEXT: 7. 1 2.0 0.0 5.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 1.0 0.0 0.0 ld2r { v1.8h, v2.8h }, [x27], #4 -# CHECK-NEXT: 9. 1 2.0 0.0 5.0 add x0, x27, #1 -# CHECK-NEXT: 1 1.5 0.1 2.5 +# CHECK-NEXT: 1. 1 1.0 0.0 5.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 1.0 1.0 0.0 ld2r { v1.4h, v2.4h }, [x27], #4 +# CHECK-NEXT: 3. 1 1.0 0.0 5.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 1.0 0.0 ld2r { v1.4s, v2.4s }, [x27], #8 +# CHECK-NEXT: 5. 1 1.0 0.0 5.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 1.0 0.0 ld2r { v1.8b, v2.8b }, [x27], #2 +# CHECK-NEXT: 7. 1 1.0 0.0 5.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 1.0 0.0 ld2r { v1.8h, v2.8h }, [x27], #4 +# CHECK-NEXT: 9. 1 1.0 0.0 5.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.0 0.5 2.5 # CHECK: [25] Code Region - G26 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 509 +# CHECK-NEXT: Total Cycles: 1008 # CHECK-NEXT: Total uOps: 3000 -# CHECK: Dispatch Width: 8 -# CHECK-NEXT: uOps Per Cycle: 5.89 -# CHECK-NEXT: IPC: 1.96 -# CHECK-NEXT: Block RThroughput: 5.0 +# CHECK: Dispatch Width: 3 +# CHECK-NEXT: uOps Per Cycle: 2.98 +# CHECK-NEXT: IPC: 0.99 +# CHECK-NEXT: Block RThroughput: 10.0 # CHECK: Timeline view: -# CHECK-NEXT: 0123 +# CHECK-NEXT: 01234567 # CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeeeeeER. . ld2r { v1.16b, v2.16b }, [x27], #2 -# CHECK-NEXT: [0,1] D=eE-----R. . add x0, x27, #1 -# CHECK-NEXT: [0,2] .DeeeeeeeER . ld2r { v1.1d, v2.1d }, [x27], x28 -# CHECK-NEXT: [0,3] .D=eE-----R . add x0, x27, #1 -# CHECK-NEXT: [0,4] . DeeeeeeeER . ld2r { v1.2d, v2.2d }, [x27], x28 -# CHECK-NEXT: [0,5] . D=eE-----R . add x0, x27, #1 -# CHECK-NEXT: [0,6] . DeeeeeeeER. ld2r { v1.2s, v2.2s }, [x27], x28 -# CHECK-NEXT: [0,7] . D=eE-----R. add x0, x27, #1 -# CHECK-NEXT: [0,8] . DeeeeeeeER ld2r { v1.4h, v2.4h }, [x27], x28 -# CHECK-NEXT: [0,9] . D=eE-----R add x0, x27, #1 +# CHECK: [0,0] DeeeeeeeER. . . ld2r { v1.16b, v2.16b }, [x27], #2 +# CHECK-NEXT: [0,1] .DeE-----R. . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeeeeeeER . . ld2r { v1.1d, v2.1d }, [x27], x28 +# CHECK-NEXT: [0,3] . DeE-----R . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeeeeeeER . . ld2r { v1.2d, v2.2d }, [x27], x28 +# CHECK-NEXT: [0,5] . DeE-----R . . add x0, x27, #1 +# CHECK-NEXT: [0,6] . .DeeeeeeeER . ld2r { v1.2s, v2.2s }, [x27], x28 +# CHECK-NEXT: [0,7] . . DeE-----R . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . DeeeeeeeER ld2r { v1.4h, v2.4h }, [x27], x28 +# CHECK-NEXT: [0,9] . . DeE-----R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -2343,43 +2343,43 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld2r { v1.16b, v2.16b }, [x27], #2 -# CHECK-NEXT: 1. 1 2.0 0.0 5.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 1.0 0.0 0.0 ld2r { v1.1d, v2.1d }, [x27], x28 -# CHECK-NEXT: 3. 1 2.0 0.0 5.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 1.0 0.0 0.0 ld2r { v1.2d, v2.2d }, [x27], x28 -# CHECK-NEXT: 5. 1 2.0 0.0 5.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 1.0 0.0 0.0 ld2r { v1.2s, v2.2s }, [x27], x28 -# CHECK-NEXT: 7. 1 2.0 0.0 5.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 1.0 0.0 0.0 ld2r { v1.4h, v2.4h }, [x27], x28 -# CHECK-NEXT: 9. 1 2.0 0.0 5.0 add x0, x27, #1 -# CHECK-NEXT: 1 1.5 0.1 2.5 +# CHECK-NEXT: 1. 1 1.0 0.0 5.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 1.0 1.0 0.0 ld2r { v1.1d, v2.1d }, [x27], x28 +# CHECK-NEXT: 3. 1 1.0 0.0 5.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 1.0 0.0 ld2r { v1.2d, v2.2d }, [x27], x28 +# CHECK-NEXT: 5. 1 1.0 0.0 5.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 1.0 0.0 ld2r { v1.2s, v2.2s }, [x27], x28 +# CHECK-NEXT: 7. 1 1.0 0.0 5.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 1.0 0.0 ld2r { v1.4h, v2.4h }, [x27], x28 +# CHECK-NEXT: 9. 1 1.0 0.0 5.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.0 0.5 2.5 # CHECK: [26] Code Region - G27 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 609 +# CHECK-NEXT: Total Cycles: 1108 # CHECK-NEXT: Total uOps: 3200 -# CHECK: Dispatch Width: 8 -# CHECK-NEXT: uOps Per Cycle: 5.25 -# CHECK-NEXT: IPC: 1.64 -# CHECK-NEXT: Block RThroughput: 5.5 +# CHECK: Dispatch Width: 3 +# CHECK-NEXT: uOps Per Cycle: 2.89 +# CHECK-NEXT: IPC: 0.90 +# CHECK-NEXT: Block RThroughput: 10.7 # CHECK: Timeline view: -# CHECK-NEXT: 01234 +# CHECK-NEXT: 012345678 # CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeeeeeER. . ld2r { v1.4s, v2.4s }, [x27], x28 -# CHECK-NEXT: [0,1] D=eE-----R. . add x0, x27, #1 -# CHECK-NEXT: [0,2] .DeeeeeeeER . ld2r { v1.8b, v2.8b }, [x27], x28 -# CHECK-NEXT: [0,3] .D=eE-----R . add x0, x27, #1 -# CHECK-NEXT: [0,4] . DeeeeeeeER . ld2r { v1.8h, v2.8h }, [x27], x28 -# CHECK-NEXT: [0,5] . D=eE-----R . add x0, x27, #1 -# CHECK-NEXT: [0,6] . DeeeeeeeER . ld2r { v1.16b, v2.16b }, [x27], x28 -# CHECK-NEXT: [0,7] . D=eE-----R . add x0, x27, #1 -# CHECK-NEXT: [0,8] . DeeeeeeeeER ld3 { v1.2d, v2.2d, v3.2d }, [x27], #48 -# CHECK-NEXT: [0,9] . D=eE------R add x0, x27, #1 +# CHECK: [0,0] DeeeeeeeER. . . ld2r { v1.4s, v2.4s }, [x27], x28 +# CHECK-NEXT: [0,1] .DeE-----R. . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeeeeeeER . . ld2r { v1.8b, v2.8b }, [x27], x28 +# CHECK-NEXT: [0,3] . DeE-----R . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeeeeeeER . . ld2r { v1.8h, v2.8h }, [x27], x28 +# CHECK-NEXT: [0,5] . DeE-----R . . add x0, x27, #1 +# CHECK-NEXT: [0,6] . .DeeeeeeeER . ld2r { v1.16b, v2.16b }, [x27], x28 +# CHECK-NEXT: [0,7] . . DeE-----R . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . DeeeeeeeeER ld3 { v1.2d, v2.2d, v3.2d }, [x27], #48 +# CHECK-NEXT: [0,9] . . DeE-----R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -2389,43 +2389,43 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld2r { v1.4s, v2.4s }, [x27], x28 -# CHECK-NEXT: 1. 1 2.0 0.0 5.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 1.0 0.0 0.0 ld2r { v1.8b, v2.8b }, [x27], x28 -# CHECK-NEXT: 3. 1 2.0 0.0 5.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 1.0 0.0 0.0 ld2r { v1.8h, v2.8h }, [x27], x28 -# CHECK-NEXT: 5. 1 2.0 0.0 5.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 1.0 0.0 0.0 ld2r { v1.16b, v2.16b }, [x27], x28 -# CHECK-NEXT: 7. 1 2.0 0.0 5.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 1.0 0.0 0.0 ld3 { v1.2d, v2.2d, v3.2d }, [x27], #48 -# CHECK-NEXT: 9. 1 2.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 1 1.5 0.1 2.6 +# CHECK-NEXT: 1. 1 1.0 0.0 5.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 1.0 1.0 0.0 ld2r { v1.8b, v2.8b }, [x27], x28 +# CHECK-NEXT: 3. 1 1.0 0.0 5.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 1.0 0.0 ld2r { v1.8h, v2.8h }, [x27], x28 +# CHECK-NEXT: 5. 1 1.0 0.0 5.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 1.0 0.0 ld2r { v1.16b, v2.16b }, [x27], x28 +# CHECK-NEXT: 7. 1 1.0 0.0 5.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 1.0 0.0 ld3 { v1.2d, v2.2d, v3.2d }, [x27], #48 +# CHECK-NEXT: 9. 1 1.0 1.0 5.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.0 0.6 2.5 # CHECK: [27] Code Region - G28 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 759 +# CHECK-NEXT: Total Cycles: 1508 # CHECK-NEXT: Total uOps: 4000 -# CHECK: Dispatch Width: 8 -# CHECK-NEXT: uOps Per Cycle: 5.27 -# CHECK-NEXT: IPC: 1.32 -# CHECK-NEXT: Block RThroughput: 7.5 +# CHECK: Dispatch Width: 3 +# CHECK-NEXT: uOps Per Cycle: 2.65 +# CHECK-NEXT: IPC: 0.66 +# CHECK-NEXT: Block RThroughput: 13.3 # CHECK: Timeline view: -# CHECK-NEXT: 0123456 -# CHECK-NEXT: Index 0123456789 - -# CHECK: [0,0] DeeeeeeeeER .. ld3 { v1.2s, v2.2s, v3.2s }, [x27], #24 -# CHECK-NEXT: [0,1] D=eE------R .. add x0, x27, #1 -# CHECK-NEXT: [0,2] .DeeeeeeeeER .. ld3 { v1.4h, v2.4h, v3.4h }, [x27], #24 -# CHECK-NEXT: [0,3] .D=eE------R .. add x0, x27, #1 -# CHECK-NEXT: [0,4] . D=eeeeeeeeER .. ld3 { v1.4s, v2.4s, v3.4s }, [x27], #48 -# CHECK-NEXT: [0,5] . D==eE------R .. add x0, x27, #1 -# CHECK-NEXT: [0,6] . D=eeeeeeeeER.. ld3 { v1.8b, v2.8b, v3.8b }, [x27], #24 -# CHECK-NEXT: [0,7] . D==eE------R.. add x0, x27, #1 -# CHECK-NEXT: [0,8] . D==eeeeeeeeER ld3 { v1.8h, v2.8h, v3.8h }, [x27], #48 -# CHECK-NEXT: [0,9] . D===eE------R add x0, x27, #1 +# CHECK-NEXT: 0123456789 +# CHECK-NEXT: Index 0123456789 012 + +# CHECK: [0,0] DeeeeeeeeER . . . ld3 { v1.2s, v2.2s, v3.2s }, [x27], #24 +# CHECK-NEXT: [0,1] . DeE-----R . . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeeeeeeeER . . . ld3 { v1.4h, v2.4h, v3.4h }, [x27], #24 +# CHECK-NEXT: [0,3] . DeE-----R . . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . .DeeeeeeeeER . . ld3 { v1.4s, v2.4s, v3.4s }, [x27], #48 +# CHECK-NEXT: [0,5] . . DeE-----R . . add x0, x27, #1 +# CHECK-NEXT: [0,6] . . DeeeeeeeeER. . ld3 { v1.8b, v2.8b, v3.8b }, [x27], #24 +# CHECK-NEXT: [0,7] . . .DeE-----R. . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . . DeeeeeeeeER ld3 { v1.8h, v2.8h, v3.8h }, [x27], #48 +# CHECK-NEXT: [0,9] . . . DeE-----R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -2435,43 +2435,43 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld3 { v1.2s, v2.2s, v3.2s }, [x27], #24 -# CHECK-NEXT: 1. 1 2.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 1.0 0.0 0.0 ld3 { v1.4h, v2.4h, v3.4h }, [x27], #24 -# CHECK-NEXT: 3. 1 2.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 2.0 1.0 0.0 ld3 { v1.4s, v2.4s, v3.4s }, [x27], #48 -# CHECK-NEXT: 5. 1 3.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 2.0 0.0 0.0 ld3 { v1.8b, v2.8b, v3.8b }, [x27], #24 -# CHECK-NEXT: 7. 1 3.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 3.0 1.0 0.0 ld3 { v1.8h, v2.8h, v3.8h }, [x27], #48 -# CHECK-NEXT: 9. 1 4.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 1 2.3 0.3 3.0 +# CHECK-NEXT: 1. 1 1.0 1.0 5.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 1.0 1.0 0.0 ld3 { v1.4h, v2.4h, v3.4h }, [x27], #24 +# CHECK-NEXT: 3. 1 1.0 1.0 5.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 1.0 0.0 ld3 { v1.4s, v2.4s, v3.4s }, [x27], #48 +# CHECK-NEXT: 5. 1 1.0 1.0 5.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 1.0 0.0 ld3 { v1.8b, v2.8b, v3.8b }, [x27], #24 +# CHECK-NEXT: 7. 1 1.0 1.0 5.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 1.0 0.0 ld3 { v1.8h, v2.8h, v3.8h }, [x27], #48 +# CHECK-NEXT: 9. 1 1.0 1.0 5.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.0 1.0 2.5 # CHECK: [28] Code Region - G29 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 759 +# CHECK-NEXT: Total Cycles: 1508 # CHECK-NEXT: Total uOps: 4000 -# CHECK: Dispatch Width: 8 -# CHECK-NEXT: uOps Per Cycle: 5.27 -# CHECK-NEXT: IPC: 1.32 -# CHECK-NEXT: Block RThroughput: 7.5 +# CHECK: Dispatch Width: 3 +# CHECK-NEXT: uOps Per Cycle: 2.65 +# CHECK-NEXT: IPC: 0.66 +# CHECK-NEXT: Block RThroughput: 13.3 # CHECK: Timeline view: -# CHECK-NEXT: 0123456 -# CHECK-NEXT: Index 0123456789 - -# CHECK: [0,0] DeeeeeeeeER .. ld3 { v1.16b, v2.16b, v3.16b }, [x27], #48 -# CHECK-NEXT: [0,1] D=eE------R .. add x0, x27, #1 -# CHECK-NEXT: [0,2] .DeeeeeeeeER .. ld3 { v1.2d, v2.2d, v3.2d }, [x27], x28 -# CHECK-NEXT: [0,3] .D=eE------R .. add x0, x27, #1 -# CHECK-NEXT: [0,4] . D=eeeeeeeeER .. ld3 { v1.2s, v2.2s, v3.2s }, [x27], x28 -# CHECK-NEXT: [0,5] . D==eE------R .. add x0, x27, #1 -# CHECK-NEXT: [0,6] . D=eeeeeeeeER.. ld3 { v1.4h, v2.4h, v3.4h }, [x27], x28 -# CHECK-NEXT: [0,7] . D==eE------R.. add x0, x27, #1 -# CHECK-NEXT: [0,8] . D==eeeeeeeeER ld3 { v1.4s, v2.4s, v3.4s }, [x27], x28 -# CHECK-NEXT: [0,9] . D===eE------R add x0, x27, #1 +# CHECK-NEXT: 0123456789 +# CHECK-NEXT: Index 0123456789 012 + +# CHECK: [0,0] DeeeeeeeeER . . . ld3 { v1.16b, v2.16b, v3.16b }, [x27], #48 +# CHECK-NEXT: [0,1] . DeE-----R . . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeeeeeeeER . . . ld3 { v1.2d, v2.2d, v3.2d }, [x27], x28 +# CHECK-NEXT: [0,3] . DeE-----R . . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . .DeeeeeeeeER . . ld3 { v1.2s, v2.2s, v3.2s }, [x27], x28 +# CHECK-NEXT: [0,5] . . DeE-----R . . add x0, x27, #1 +# CHECK-NEXT: [0,6] . . DeeeeeeeeER. . ld3 { v1.4h, v2.4h, v3.4h }, [x27], x28 +# CHECK-NEXT: [0,7] . . .DeE-----R. . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . . DeeeeeeeeER ld3 { v1.4s, v2.4s, v3.4s }, [x27], x28 +# CHECK-NEXT: [0,9] . . . DeE-----R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -2481,43 +2481,43 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld3 { v1.16b, v2.16b, v3.16b }, [x27], #48 -# CHECK-NEXT: 1. 1 2.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 1.0 0.0 0.0 ld3 { v1.2d, v2.2d, v3.2d }, [x27], x28 -# CHECK-NEXT: 3. 1 2.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 2.0 1.0 0.0 ld3 { v1.2s, v2.2s, v3.2s }, [x27], x28 -# CHECK-NEXT: 5. 1 3.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 2.0 0.0 0.0 ld3 { v1.4h, v2.4h, v3.4h }, [x27], x28 -# CHECK-NEXT: 7. 1 3.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 3.0 1.0 0.0 ld3 { v1.4s, v2.4s, v3.4s }, [x27], x28 -# CHECK-NEXT: 9. 1 4.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 1 2.3 0.3 3.0 +# CHECK-NEXT: 1. 1 1.0 1.0 5.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 1.0 1.0 0.0 ld3 { v1.2d, v2.2d, v3.2d }, [x27], x28 +# CHECK-NEXT: 3. 1 1.0 1.0 5.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 1.0 0.0 ld3 { v1.2s, v2.2s, v3.2s }, [x27], x28 +# CHECK-NEXT: 5. 1 1.0 1.0 5.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 1.0 0.0 ld3 { v1.4h, v2.4h, v3.4h }, [x27], x28 +# CHECK-NEXT: 7. 1 1.0 1.0 5.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 1.0 0.0 ld3 { v1.4s, v2.4s, v3.4s }, [x27], x28 +# CHECK-NEXT: 9. 1 1.0 1.0 5.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.0 1.0 2.5 # CHECK: [29] Code Region - G30 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 1909 +# CHECK-NEXT: Total Cycles: 1912 # CHECK-NEXT: Total uOps: 3800 -# CHECK: Dispatch Width: 8 +# CHECK: Dispatch Width: 3 # CHECK-NEXT: uOps Per Cycle: 1.99 # CHECK-NEXT: IPC: 0.52 -# CHECK-NEXT: Block RThroughput: 7.5 +# CHECK-NEXT: Block RThroughput: 12.7 # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 -# CHECK-NEXT: Index 0123456789 01234567 +# CHECK-NEXT: 0123456789 0 +# CHECK-NEXT: Index 0123456789 0123456789 -# CHECK: [0,0] DeeeeeeeeER . . . . ld3 { v1.8b, v2.8b, v3.8b }, [x27], x28 -# CHECK-NEXT: [0,1] D=eE------R . . . . add x0, x27, #1 -# CHECK-NEXT: [0,2] .DeeeeeeeeER . . . . ld3 { v1.8h, v2.8h, v3.8h }, [x27], x28 -# CHECK-NEXT: [0,3] .D=eE------R . . . . add x0, x27, #1 -# CHECK-NEXT: [0,4] . D=eeeeeeeeER . . . . ld3 { v1.16b, v2.16b, v3.16b }, [x27], x28 -# CHECK-NEXT: [0,5] . D==eE------R . . . . add x0, x27, #1 -# CHECK-NEXT: [0,6] . D========eeeeeeeER . . ld3 { v1.b, v2.b, v3.b }[0], [x27], #3 -# CHECK-NEXT: [0,7] . D=========eE-----R . . add x0, x27, #1 -# CHECK-NEXT: [0,8] . D==============eeeeeeeER ld3 { v1.b, v2.b, v3.b }[8], [x27], #3 -# CHECK-NEXT: [0,9] . D===============eE-----R add x0, x27, #1 +# CHECK: [0,0] DeeeeeeeeER . . . . ld3 { v1.8b, v2.8b, v3.8b }, [x27], x28 +# CHECK-NEXT: [0,1] . DeE-----R . . . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeeeeeeeER . . . . ld3 { v1.8h, v2.8h, v3.8h }, [x27], x28 +# CHECK-NEXT: [0,3] . DeE-----R . . . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . .DeeeeeeeeER . . . ld3 { v1.16b, v2.16b, v3.16b }, [x27], x28 +# CHECK-NEXT: [0,5] . . DeE-----R . . . add x0, x27, #1 +# CHECK-NEXT: [0,6] . . D=====eeeeeeeER . . ld3 { v1.b, v2.b, v3.b }[0], [x27], #3 +# CHECK-NEXT: [0,7] . . .D====eE-----R . . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . . D=========eeeeeeeER ld3 { v1.b, v2.b, v3.b }[8], [x27], #3 +# CHECK-NEXT: [0,9] . . . D========eE-----R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -2527,16 +2527,16 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld3 { v1.8b, v2.8b, v3.8b }, [x27], x28 -# CHECK-NEXT: 1. 1 2.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 1.0 0.0 0.0 ld3 { v1.8h, v2.8h, v3.8h }, [x27], x28 -# CHECK-NEXT: 3. 1 2.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 2.0 1.0 0.0 ld3 { v1.16b, v2.16b, v3.16b }, [x27], x28 -# CHECK-NEXT: 5. 1 3.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 9.0 0.0 0.0 ld3 { v1.b, v2.b, v3.b }[0], [x27], #3 -# CHECK-NEXT: 7. 1 10.0 0.0 5.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 15.0 0.0 0.0 ld3 { v1.b, v2.b, v3.b }[8], [x27], #3 -# CHECK-NEXT: 9. 1 16.0 0.0 5.0 add x0, x27, #1 -# CHECK-NEXT: 1 6.1 0.2 2.8 +# CHECK-NEXT: 1. 1 1.0 1.0 5.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 1.0 1.0 0.0 ld3 { v1.8h, v2.8h, v3.8h }, [x27], x28 +# CHECK-NEXT: 3. 1 1.0 1.0 5.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 1.0 0.0 ld3 { v1.16b, v2.16b, v3.16b }, [x27], x28 +# CHECK-NEXT: 5. 1 1.0 1.0 5.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 6.0 0.0 0.0 ld3 { v1.b, v2.b, v3.b }[0], [x27], #3 +# CHECK-NEXT: 7. 1 5.0 0.0 5.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 10.0 0.0 0.0 ld3 { v1.b, v2.b, v3.b }[8], [x27], #3 +# CHECK-NEXT: 9. 1 9.0 0.0 5.0 add x0, x27, #1 +# CHECK-NEXT: 1 3.6 0.6 2.5 # CHECK: [30] Code Region - G31 @@ -2545,25 +2545,25 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 3503 # CHECK-NEXT: Total uOps: 3500 -# CHECK: Dispatch Width: 8 +# CHECK: Dispatch Width: 3 # CHECK-NEXT: uOps Per Cycle: 1.00 # CHECK-NEXT: IPC: 0.29 -# CHECK-NEXT: Block RThroughput: 7.5 +# CHECK-NEXT: Block RThroughput: 11.7 # CHECK: Timeline view: # CHECK-NEXT: 0123456789 01234567 # CHECK-NEXT: Index 0123456789 0123456789 # CHECK: [0,0] DeeeeeeeER. . . . . . . ld3 { v1.b, v2.b, v3.b }[0], [x27], x28 -# CHECK-NEXT: [0,1] D=eE-----R. . . . . . . add x0, x27, #1 -# CHECK-NEXT: [0,2] .D======eeeeeeeER . . . . . ld3 { v1.b, v2.b, v3.b }[8], [x27], x28 -# CHECK-NEXT: [0,3] .D=======eE-----R . . . . . add x0, x27, #1 -# CHECK-NEXT: [0,4] . D============eeeeeeeER . . . . ld3 { v1.h, v2.h, v3.h }[0], [x27], #6 -# CHECK-NEXT: [0,5] . D=============eE-----R . . . . add x0, x27, #1 -# CHECK-NEXT: [0,6] . D==================eeeeeeeER . . ld3 { v1.h, v2.h, v3.h }[4], [x27], #6 -# CHECK-NEXT: [0,7] . D===================eE-----R . . add x0, x27, #1 -# CHECK-NEXT: [0,8] . D========================eeeeeeeER ld3 { v1.h, v2.h, v3.h }[0], [x27], x28 -# CHECK-NEXT: [0,9] . D=========================eE-----R add x0, x27, #1 +# CHECK-NEXT: [0,1] . DeE----R. . . . . . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . D====eeeeeeeER . . . . . ld3 { v1.b, v2.b, v3.b }[8], [x27], x28 +# CHECK-NEXT: [0,3] . D===eE-----R . . . . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . .D========eeeeeeeER . . . . ld3 { v1.h, v2.h, v3.h }[0], [x27], #6 +# CHECK-NEXT: [0,5] . . D=======eE-----R . . . . add x0, x27, #1 +# CHECK-NEXT: [0,6] . . D============eeeeeeeER . . ld3 { v1.h, v2.h, v3.h }[4], [x27], #6 +# CHECK-NEXT: [0,7] . . .D===========eE-----R . . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . . D================eeeeeeeER ld3 { v1.h, v2.h, v3.h }[0], [x27], x28 +# CHECK-NEXT: [0,9] . . . D===============eE-----R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -2573,16 +2573,16 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld3 { v1.b, v2.b, v3.b }[0], [x27], x28 -# CHECK-NEXT: 1. 1 2.0 0.0 5.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 7.0 0.0 0.0 ld3 { v1.b, v2.b, v3.b }[8], [x27], x28 -# CHECK-NEXT: 3. 1 8.0 0.0 5.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 13.0 0.0 0.0 ld3 { v1.h, v2.h, v3.h }[0], [x27], #6 -# CHECK-NEXT: 5. 1 14.0 0.0 5.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 19.0 0.0 0.0 ld3 { v1.h, v2.h, v3.h }[4], [x27], #6 -# CHECK-NEXT: 7. 1 20.0 0.0 5.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 25.0 0.0 0.0 ld3 { v1.h, v2.h, v3.h }[0], [x27], x28 -# CHECK-NEXT: 9. 1 26.0 0.0 5.0 add x0, x27, #1 -# CHECK-NEXT: 1 13.5 0.1 2.5 +# CHECK-NEXT: 1. 1 1.0 1.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 5.0 0.0 0.0 ld3 { v1.b, v2.b, v3.b }[8], [x27], x28 +# CHECK-NEXT: 3. 1 4.0 0.0 5.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 9.0 0.0 0.0 ld3 { v1.h, v2.h, v3.h }[0], [x27], #6 +# CHECK-NEXT: 5. 1 8.0 0.0 5.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 13.0 0.0 0.0 ld3 { v1.h, v2.h, v3.h }[4], [x27], #6 +# CHECK-NEXT: 7. 1 12.0 0.0 5.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 17.0 0.0 0.0 ld3 { v1.h, v2.h, v3.h }[0], [x27], x28 +# CHECK-NEXT: 9. 1 16.0 0.0 5.0 add x0, x27, #1 +# CHECK-NEXT: 1 8.6 0.2 2.4 # CHECK: [31] Code Region - G32 @@ -2591,25 +2591,25 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 3503 # CHECK-NEXT: Total uOps: 3500 -# CHECK: Dispatch Width: 8 +# CHECK: Dispatch Width: 3 # CHECK-NEXT: uOps Per Cycle: 1.00 # CHECK-NEXT: IPC: 0.29 -# CHECK-NEXT: Block RThroughput: 7.5 +# CHECK-NEXT: Block RThroughput: 11.7 # CHECK: Timeline view: # CHECK-NEXT: 0123456789 01234567 # CHECK-NEXT: Index 0123456789 0123456789 # CHECK: [0,0] DeeeeeeeER. . . . . . . ld3 { v1.h, v2.h, v3.h }[4], [x27], x28 -# CHECK-NEXT: [0,1] D=eE-----R. . . . . . . add x0, x27, #1 -# CHECK-NEXT: [0,2] .D======eeeeeeeER . . . . . ld3 { v1.s, v2.s, v3.s }[0], [x27], #12 -# CHECK-NEXT: [0,3] .D=======eE-----R . . . . . add x0, x27, #1 -# CHECK-NEXT: [0,4] . D============eeeeeeeER . . . . ld3 { v1.s, v2.s, v3.s }[0], [x27], x28 -# CHECK-NEXT: [0,5] . D=============eE-----R . . . . add x0, x27, #1 -# CHECK-NEXT: [0,6] . D==================eeeeeeeER . . ld3 { v1.d, v2.d, v3.d }[0], [x27], #24 -# CHECK-NEXT: [0,7] . D===================eE-----R . . add x0, x27, #1 -# CHECK-NEXT: [0,8] . D========================eeeeeeeER ld3 { v1.d, v2.d, v3.d }[0], [x27], x28 -# CHECK-NEXT: [0,9] . D=========================eE-----R add x0, x27, #1 +# CHECK-NEXT: [0,1] . DeE----R. . . . . . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . D====eeeeeeeER . . . . . ld3 { v1.s, v2.s, v3.s }[0], [x27], #12 +# CHECK-NEXT: [0,3] . D===eE-----R . . . . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . .D========eeeeeeeER . . . . ld3 { v1.s, v2.s, v3.s }[0], [x27], x28 +# CHECK-NEXT: [0,5] . . D=======eE-----R . . . . add x0, x27, #1 +# CHECK-NEXT: [0,6] . . D============eeeeeeeER . . ld3 { v1.d, v2.d, v3.d }[0], [x27], #24 +# CHECK-NEXT: [0,7] . . .D===========eE-----R . . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . . D================eeeeeeeER ld3 { v1.d, v2.d, v3.d }[0], [x27], x28 +# CHECK-NEXT: [0,9] . . . D===============eE-----R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -2619,43 +2619,43 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld3 { v1.h, v2.h, v3.h }[4], [x27], x28 -# CHECK-NEXT: 1. 1 2.0 0.0 5.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 7.0 0.0 0.0 ld3 { v1.s, v2.s, v3.s }[0], [x27], #12 -# CHECK-NEXT: 3. 1 8.0 0.0 5.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 13.0 0.0 0.0 ld3 { v1.s, v2.s, v3.s }[0], [x27], x28 -# CHECK-NEXT: 5. 1 14.0 0.0 5.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 19.0 0.0 0.0 ld3 { v1.d, v2.d, v3.d }[0], [x27], #24 -# CHECK-NEXT: 7. 1 20.0 0.0 5.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 25.0 0.0 0.0 ld3 { v1.d, v2.d, v3.d }[0], [x27], x28 -# CHECK-NEXT: 9. 1 26.0 0.0 5.0 add x0, x27, #1 -# CHECK-NEXT: 1 13.5 0.1 2.5 +# CHECK-NEXT: 1. 1 1.0 1.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 5.0 0.0 0.0 ld3 { v1.s, v2.s, v3.s }[0], [x27], #12 +# CHECK-NEXT: 3. 1 4.0 0.0 5.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 9.0 0.0 0.0 ld3 { v1.s, v2.s, v3.s }[0], [x27], x28 +# CHECK-NEXT: 5. 1 8.0 0.0 5.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 13.0 0.0 0.0 ld3 { v1.d, v2.d, v3.d }[0], [x27], #24 +# CHECK-NEXT: 7. 1 12.0 0.0 5.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 17.0 0.0 0.0 ld3 { v1.d, v2.d, v3.d }[0], [x27], x28 +# CHECK-NEXT: 9. 1 16.0 0.0 5.0 add x0, x27, #1 +# CHECK-NEXT: 1 8.6 0.2 2.4 # CHECK: [32] Code Region - G33 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 758 +# CHECK-NEXT: Total Cycles: 1507 # CHECK-NEXT: Total uOps: 3500 -# CHECK: Dispatch Width: 8 -# CHECK-NEXT: uOps Per Cycle: 4.62 -# CHECK-NEXT: IPC: 1.32 -# CHECK-NEXT: Block RThroughput: 7.5 +# CHECK: Dispatch Width: 3 +# CHECK-NEXT: uOps Per Cycle: 2.32 +# CHECK-NEXT: IPC: 0.66 +# CHECK-NEXT: Block RThroughput: 11.7 # CHECK: Timeline view: -# CHECK-NEXT: 012345 -# CHECK-NEXT: Index 0123456789 - -# CHECK: [0,0] DeeeeeeeER. . ld3r { v1.1d, v2.1d, v3.1d }, [x27], #24 -# CHECK-NEXT: [0,1] D=eE-----R. . add x0, x27, #1 -# CHECK-NEXT: [0,2] .DeeeeeeeER . ld3r { v1.2d, v2.2d, v3.2d }, [x27], #24 -# CHECK-NEXT: [0,3] .D=eE-----R . add x0, x27, #1 -# CHECK-NEXT: [0,4] . D=eeeeeeeER . ld3r { v1.2s, v2.2s, v3.2s }, [x27], #12 -# CHECK-NEXT: [0,5] . D==eE-----R . add x0, x27, #1 -# CHECK-NEXT: [0,6] . D=eeeeeeeER . ld3r { v1.4h, v2.4h, v3.4h }, [x27], #6 -# CHECK-NEXT: [0,7] . D==eE-----R . add x0, x27, #1 -# CHECK-NEXT: [0,8] . D==eeeeeeeER ld3r { v1.4s, v2.4s, v3.4s }, [x27], #12 -# CHECK-NEXT: [0,9] . D===eE-----R add x0, x27, #1 +# CHECK-NEXT: 0123456789 +# CHECK-NEXT: Index 0123456789 01 + +# CHECK: [0,0] DeeeeeeeER. . .. ld3r { v1.1d, v2.1d, v3.1d }, [x27], #24 +# CHECK-NEXT: [0,1] . DeE----R. . .. add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeeeeeeER . .. ld3r { v1.2d, v2.2d, v3.2d }, [x27], #24 +# CHECK-NEXT: [0,3] . DeE----R . .. add x0, x27, #1 +# CHECK-NEXT: [0,4] . .DeeeeeeeER .. ld3r { v1.2s, v2.2s, v3.2s }, [x27], #12 +# CHECK-NEXT: [0,5] . . DeE----R .. add x0, x27, #1 +# CHECK-NEXT: [0,6] . . DeeeeeeeER .. ld3r { v1.4h, v2.4h, v3.4h }, [x27], #6 +# CHECK-NEXT: [0,7] . . .DeE----R .. add x0, x27, #1 +# CHECK-NEXT: [0,8] . . . DeeeeeeeER ld3r { v1.4s, v2.4s, v3.4s }, [x27], #12 +# CHECK-NEXT: [0,9] . . . DeE----R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -2665,43 +2665,43 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld3r { v1.1d, v2.1d, v3.1d }, [x27], #24 -# CHECK-NEXT: 1. 1 2.0 0.0 5.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 1.0 0.0 0.0 ld3r { v1.2d, v2.2d, v3.2d }, [x27], #24 -# CHECK-NEXT: 3. 1 2.0 0.0 5.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 2.0 1.0 0.0 ld3r { v1.2s, v2.2s, v3.2s }, [x27], #12 -# CHECK-NEXT: 5. 1 3.0 0.0 5.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 2.0 0.0 0.0 ld3r { v1.4h, v2.4h, v3.4h }, [x27], #6 -# CHECK-NEXT: 7. 1 3.0 0.0 5.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 3.0 1.0 0.0 ld3r { v1.4s, v2.4s, v3.4s }, [x27], #12 -# CHECK-NEXT: 9. 1 4.0 0.0 5.0 add x0, x27, #1 -# CHECK-NEXT: 1 2.3 0.3 2.5 +# CHECK-NEXT: 1. 1 1.0 1.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 1.0 1.0 0.0 ld3r { v1.2d, v2.2d, v3.2d }, [x27], #24 +# CHECK-NEXT: 3. 1 1.0 1.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 1.0 0.0 ld3r { v1.2s, v2.2s, v3.2s }, [x27], #12 +# CHECK-NEXT: 5. 1 1.0 1.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 1.0 0.0 ld3r { v1.4h, v2.4h, v3.4h }, [x27], #6 +# CHECK-NEXT: 7. 1 1.0 1.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 1.0 0.0 ld3r { v1.4s, v2.4s, v3.4s }, [x27], #12 +# CHECK-NEXT: 9. 1 1.0 1.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.0 1.0 2.0 # CHECK: [33] Code Region - G34 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 758 +# CHECK-NEXT: Total Cycles: 1507 # CHECK-NEXT: Total uOps: 3500 -# CHECK: Dispatch Width: 8 -# CHECK-NEXT: uOps Per Cycle: 4.62 -# CHECK-NEXT: IPC: 1.32 -# CHECK-NEXT: Block RThroughput: 7.5 +# CHECK: Dispatch Width: 3 +# CHECK-NEXT: uOps Per Cycle: 2.32 +# CHECK-NEXT: IPC: 0.66 +# CHECK-NEXT: Block RThroughput: 11.7 # CHECK: Timeline view: -# CHECK-NEXT: 012345 -# CHECK-NEXT: Index 0123456789 - -# CHECK: [0,0] DeeeeeeeER. . ld3r { v1.8b, v2.8b, v3.8b }, [x27], #3 -# CHECK-NEXT: [0,1] D=eE-----R. . add x0, x27, #1 -# CHECK-NEXT: [0,2] .DeeeeeeeER . ld3r { v1.8h, v2.8h, v3.8h }, [x27], #6 -# CHECK-NEXT: [0,3] .D=eE-----R . add x0, x27, #1 -# CHECK-NEXT: [0,4] . D=eeeeeeeER . ld3r { v1.16b, v2.16b, v3.16b }, [x27], #3 -# CHECK-NEXT: [0,5] . D==eE-----R . add x0, x27, #1 -# CHECK-NEXT: [0,6] . D=eeeeeeeER . ld3r { v1.1d, v2.1d, v3.1d }, [x27], x28 -# CHECK-NEXT: [0,7] . D==eE-----R . add x0, x27, #1 -# CHECK-NEXT: [0,8] . D==eeeeeeeER ld3r { v1.2d, v2.2d, v3.2d }, [x27], x28 -# CHECK-NEXT: [0,9] . D===eE-----R add x0, x27, #1 +# CHECK-NEXT: 0123456789 +# CHECK-NEXT: Index 0123456789 01 + +# CHECK: [0,0] DeeeeeeeER. . .. ld3r { v1.8b, v2.8b, v3.8b }, [x27], #3 +# CHECK-NEXT: [0,1] . DeE----R. . .. add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeeeeeeER . .. ld3r { v1.8h, v2.8h, v3.8h }, [x27], #6 +# CHECK-NEXT: [0,3] . DeE----R . .. add x0, x27, #1 +# CHECK-NEXT: [0,4] . .DeeeeeeeER .. ld3r { v1.16b, v2.16b, v3.16b }, [x27], #3 +# CHECK-NEXT: [0,5] . . DeE----R .. add x0, x27, #1 +# CHECK-NEXT: [0,6] . . DeeeeeeeER .. ld3r { v1.1d, v2.1d, v3.1d }, [x27], x28 +# CHECK-NEXT: [0,7] . . .DeE----R .. add x0, x27, #1 +# CHECK-NEXT: [0,8] . . . DeeeeeeeER ld3r { v1.2d, v2.2d, v3.2d }, [x27], x28 +# CHECK-NEXT: [0,9] . . . DeE----R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -2711,43 +2711,43 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld3r { v1.8b, v2.8b, v3.8b }, [x27], #3 -# CHECK-NEXT: 1. 1 2.0 0.0 5.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 1.0 0.0 0.0 ld3r { v1.8h, v2.8h, v3.8h }, [x27], #6 -# CHECK-NEXT: 3. 1 2.0 0.0 5.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 2.0 1.0 0.0 ld3r { v1.16b, v2.16b, v3.16b }, [x27], #3 -# CHECK-NEXT: 5. 1 3.0 0.0 5.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 2.0 0.0 0.0 ld3r { v1.1d, v2.1d, v3.1d }, [x27], x28 -# CHECK-NEXT: 7. 1 3.0 0.0 5.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 3.0 1.0 0.0 ld3r { v1.2d, v2.2d, v3.2d }, [x27], x28 -# CHECK-NEXT: 9. 1 4.0 0.0 5.0 add x0, x27, #1 -# CHECK-NEXT: 1 2.3 0.3 2.5 +# CHECK-NEXT: 1. 1 1.0 1.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 1.0 1.0 0.0 ld3r { v1.8h, v2.8h, v3.8h }, [x27], #6 +# CHECK-NEXT: 3. 1 1.0 1.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 1.0 0.0 ld3r { v1.16b, v2.16b, v3.16b }, [x27], #3 +# CHECK-NEXT: 5. 1 1.0 1.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 1.0 0.0 ld3r { v1.1d, v2.1d, v3.1d }, [x27], x28 +# CHECK-NEXT: 7. 1 1.0 1.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 1.0 0.0 ld3r { v1.2d, v2.2d, v3.2d }, [x27], x28 +# CHECK-NEXT: 9. 1 1.0 1.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.0 1.0 2.0 # CHECK: [34] Code Region - G35 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 758 +# CHECK-NEXT: Total Cycles: 1507 # CHECK-NEXT: Total uOps: 3500 -# CHECK: Dispatch Width: 8 -# CHECK-NEXT: uOps Per Cycle: 4.62 -# CHECK-NEXT: IPC: 1.32 -# CHECK-NEXT: Block RThroughput: 7.5 +# CHECK: Dispatch Width: 3 +# CHECK-NEXT: uOps Per Cycle: 2.32 +# CHECK-NEXT: IPC: 0.66 +# CHECK-NEXT: Block RThroughput: 11.7 # CHECK: Timeline view: -# CHECK-NEXT: 012345 -# CHECK-NEXT: Index 0123456789 - -# CHECK: [0,0] DeeeeeeeER. . ld3r { v1.2s, v2.2s, v3.2s }, [x27], x28 -# CHECK-NEXT: [0,1] D=eE-----R. . add x0, x27, #1 -# CHECK-NEXT: [0,2] .DeeeeeeeER . ld3r { v1.4h, v2.4h, v3.4h }, [x27], x28 -# CHECK-NEXT: [0,3] .D=eE-----R . add x0, x27, #1 -# CHECK-NEXT: [0,4] . D=eeeeeeeER . ld3r { v1.4s, v2.4s, v3.4s }, [x27], x28 -# CHECK-NEXT: [0,5] . D==eE-----R . add x0, x27, #1 -# CHECK-NEXT: [0,6] . D=eeeeeeeER . ld3r { v1.8b, v2.8b, v3.8b }, [x27], x28 -# CHECK-NEXT: [0,7] . D==eE-----R . add x0, x27, #1 -# CHECK-NEXT: [0,8] . D==eeeeeeeER ld3r { v1.8h, v2.8h, v3.8h }, [x27], x28 -# CHECK-NEXT: [0,9] . D===eE-----R add x0, x27, #1 +# CHECK-NEXT: 0123456789 +# CHECK-NEXT: Index 0123456789 01 + +# CHECK: [0,0] DeeeeeeeER. . .. ld3r { v1.2s, v2.2s, v3.2s }, [x27], x28 +# CHECK-NEXT: [0,1] . DeE----R. . .. add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeeeeeeER . .. ld3r { v1.4h, v2.4h, v3.4h }, [x27], x28 +# CHECK-NEXT: [0,3] . DeE----R . .. add x0, x27, #1 +# CHECK-NEXT: [0,4] . .DeeeeeeeER .. ld3r { v1.4s, v2.4s, v3.4s }, [x27], x28 +# CHECK-NEXT: [0,5] . . DeE----R .. add x0, x27, #1 +# CHECK-NEXT: [0,6] . . DeeeeeeeER .. ld3r { v1.8b, v2.8b, v3.8b }, [x27], x28 +# CHECK-NEXT: [0,7] . . .DeE----R .. add x0, x27, #1 +# CHECK-NEXT: [0,8] . . . DeeeeeeeER ld3r { v1.8h, v2.8h, v3.8h }, [x27], x28 +# CHECK-NEXT: [0,9] . . . DeE----R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -2757,43 +2757,43 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld3r { v1.2s, v2.2s, v3.2s }, [x27], x28 -# CHECK-NEXT: 1. 1 2.0 0.0 5.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 1.0 0.0 0.0 ld3r { v1.4h, v2.4h, v3.4h }, [x27], x28 -# CHECK-NEXT: 3. 1 2.0 0.0 5.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 2.0 1.0 0.0 ld3r { v1.4s, v2.4s, v3.4s }, [x27], x28 -# CHECK-NEXT: 5. 1 3.0 0.0 5.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 2.0 0.0 0.0 ld3r { v1.8b, v2.8b, v3.8b }, [x27], x28 -# CHECK-NEXT: 7. 1 3.0 0.0 5.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 3.0 1.0 0.0 ld3r { v1.8h, v2.8h, v3.8h }, [x27], x28 -# CHECK-NEXT: 9. 1 4.0 0.0 5.0 add x0, x27, #1 -# CHECK-NEXT: 1 2.3 0.3 2.5 +# CHECK-NEXT: 1. 1 1.0 1.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 1.0 1.0 0.0 ld3r { v1.4h, v2.4h, v3.4h }, [x27], x28 +# CHECK-NEXT: 3. 1 1.0 1.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 1.0 0.0 ld3r { v1.4s, v2.4s, v3.4s }, [x27], x28 +# CHECK-NEXT: 5. 1 1.0 1.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 1.0 0.0 ld3r { v1.8b, v2.8b, v3.8b }, [x27], x28 +# CHECK-NEXT: 7. 1 1.0 1.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 1.0 0.0 ld3r { v1.8h, v2.8h, v3.8h }, [x27], x28 +# CHECK-NEXT: 9. 1 1.0 1.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.0 1.0 2.0 # CHECK: [35] Code Region - G36 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 960 +# CHECK-NEXT: Total Cycles: 1709 # CHECK-NEXT: Total uOps: 4500 -# CHECK: Dispatch Width: 8 -# CHECK-NEXT: uOps Per Cycle: 4.69 -# CHECK-NEXT: IPC: 1.04 -# CHECK-NEXT: Block RThroughput: 9.5 +# CHECK: Dispatch Width: 3 +# CHECK-NEXT: uOps Per Cycle: 2.63 +# CHECK-NEXT: IPC: 0.59 +# CHECK-NEXT: Block RThroughput: 15.0 # CHECK: Timeline view: # CHECK-NEXT: 0123456789 -# CHECK-NEXT: Index 0123456789 +# CHECK-NEXT: Index 0123456789 012345 -# CHECK: [0,0] DeeeeeeeER. . . ld3r { v1.16b, v2.16b, v3.16b }, [x27], x28 -# CHECK-NEXT: [0,1] D=eE-----R. . . add x0, x27, #1 -# CHECK-NEXT: [0,2] .DeeeeeeeeeeER . . ld4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64 -# CHECK-NEXT: [0,3] . DeE--------R . . add x0, x27, #1 -# CHECK-NEXT: [0,4] . DeeeeeeeeER . . ld4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32 -# CHECK-NEXT: [0,5] . DeE------R . . add x0, x27, #1 -# CHECK-NEXT: [0,6] . DeeeeeeeeER . ld4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32 -# CHECK-NEXT: [0,7] . .DeE------R . add x0, x27, #1 -# CHECK-NEXT: [0,8] . . DeeeeeeeeeeER ld4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64 -# CHECK-NEXT: [0,9] . . DeE--------R add x0, x27, #1 +# CHECK: [0,0] DeeeeeeeER. . . . ld3r { v1.16b, v2.16b, v3.16b }, [x27], x28 +# CHECK-NEXT: [0,1] . DeE----R. . . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeeeeeeeeeER . . ld4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64 +# CHECK-NEXT: [0,3] . .DeE------R . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . . DeeeeeeeeER . . ld4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32 +# CHECK-NEXT: [0,5] . . DeE-----R . . add x0, x27, #1 +# CHECK-NEXT: [0,6] . . DeeeeeeeeER . ld4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32 +# CHECK-NEXT: [0,7] . . . DeE-----R . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . . DeeeeeeeeeeER ld4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64 +# CHECK-NEXT: [0,9] . . . .DeE------R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -2803,43 +2803,43 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld3r { v1.16b, v2.16b, v3.16b }, [x27], x28 -# CHECK-NEXT: 1. 1 2.0 0.0 5.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 1.0 0.0 0.0 ld4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64 -# CHECK-NEXT: 3. 1 1.0 0.0 8.0 add x0, x27, #1 +# CHECK-NEXT: 1. 1 1.0 1.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 1.0 1.0 0.0 ld4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64 +# CHECK-NEXT: 3. 1 1.0 1.0 6.0 add x0, x27, #1 # CHECK-NEXT: 4. 1 1.0 1.0 0.0 ld4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32 -# CHECK-NEXT: 5. 1 1.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 5. 1 1.0 1.0 5.0 add x0, x27, #1 # CHECK-NEXT: 6. 1 1.0 1.0 0.0 ld4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32 -# CHECK-NEXT: 7. 1 1.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 7. 1 1.0 1.0 5.0 add x0, x27, #1 # CHECK-NEXT: 8. 1 1.0 1.0 0.0 ld4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64 -# CHECK-NEXT: 9. 1 1.0 0.0 8.0 add x0, x27, #1 -# CHECK-NEXT: 1 1.1 0.4 3.3 +# CHECK-NEXT: 9. 1 1.0 1.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.0 1.0 2.6 # CHECK: [36] Code Region - G37 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 1009 +# CHECK-NEXT: Total Cycles: 1808 # CHECK-NEXT: Total uOps: 4800 -# CHECK: Dispatch Width: 8 -# CHECK-NEXT: uOps Per Cycle: 4.76 -# CHECK-NEXT: IPC: 0.99 -# CHECK-NEXT: Block RThroughput: 10.0 +# CHECK: Dispatch Width: 3 +# CHECK-NEXT: uOps Per Cycle: 2.65 +# CHECK-NEXT: IPC: 0.55 +# CHECK-NEXT: Block RThroughput: 16.0 # CHECK: Timeline view: -# CHECK-NEXT: 012345678 -# CHECK-NEXT: Index 0123456789 +# CHECK-NEXT: 0123456789 +# CHECK-NEXT: Index 0123456789 012345 -# CHECK: [0,0] DeeeeeeeeER . . ld4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32 -# CHECK-NEXT: [0,1] .DeE------R . . add x0, x27, #1 -# CHECK-NEXT: [0,2] . DeeeeeeeeeeER. . ld4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64 -# CHECK-NEXT: [0,3] . DeE--------R. . add x0, x27, #1 -# CHECK-NEXT: [0,4] . DeeeeeeeeeeER . ld4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64 -# CHECK-NEXT: [0,5] . DeE--------R . add x0, x27, #1 -# CHECK-NEXT: [0,6] . .DeeeeeeeeeeER ld4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28 -# CHECK-NEXT: [0,7] . . DeE--------R add x0, x27, #1 -# CHECK-NEXT: [0,8] . . DeeeeeeeeER ld4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28 -# CHECK-NEXT: [0,9] . . DeE------R add x0, x27, #1 +# CHECK: [0,0] DeeeeeeeeER . . . ld4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32 +# CHECK-NEXT: [0,1] . DeE-----R . . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeeeeeeeeeER . . ld4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64 +# CHECK-NEXT: [0,3] . .DeE------R . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . . DeeeeeeeeeeER. . ld4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64 +# CHECK-NEXT: [0,5] . . DeE------R. . add x0, x27, #1 +# CHECK-NEXT: [0,6] . . .DeeeeeeeeeeER . ld4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28 +# CHECK-NEXT: [0,7] . . . DeE------R . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . . DeeeeeeeeER ld4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28 +# CHECK-NEXT: [0,9] . . . . DeE-----R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -2849,43 +2849,43 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32 -# CHECK-NEXT: 1. 1 1.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 1. 1 1.0 1.0 5.0 add x0, x27, #1 # CHECK-NEXT: 2. 1 1.0 1.0 0.0 ld4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64 -# CHECK-NEXT: 3. 1 1.0 0.0 8.0 add x0, x27, #1 +# CHECK-NEXT: 3. 1 1.0 1.0 6.0 add x0, x27, #1 # CHECK-NEXT: 4. 1 1.0 1.0 0.0 ld4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64 -# CHECK-NEXT: 5. 1 1.0 0.0 8.0 add x0, x27, #1 +# CHECK-NEXT: 5. 1 1.0 1.0 6.0 add x0, x27, #1 # CHECK-NEXT: 6. 1 1.0 1.0 0.0 ld4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28 -# CHECK-NEXT: 7. 1 1.0 0.0 8.0 add x0, x27, #1 +# CHECK-NEXT: 7. 1 1.0 1.0 6.0 add x0, x27, #1 # CHECK-NEXT: 8. 1 1.0 1.0 0.0 ld4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28 -# CHECK-NEXT: 9. 1 1.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 1 1.0 0.5 3.6 +# CHECK-NEXT: 9. 1 1.0 1.0 5.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.0 1.0 2.8 # CHECK: [37] Code Region - G38 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 1011 +# CHECK-NEXT: Total Cycles: 1809 # CHECK-NEXT: Total uOps: 4800 -# CHECK: Dispatch Width: 8 -# CHECK-NEXT: uOps Per Cycle: 4.75 -# CHECK-NEXT: IPC: 0.99 -# CHECK-NEXT: Block RThroughput: 10.0 +# CHECK: Dispatch Width: 3 +# CHECK-NEXT: uOps Per Cycle: 2.65 +# CHECK-NEXT: IPC: 0.55 +# CHECK-NEXT: Block RThroughput: 16.0 # CHECK: Timeline view: # CHECK-NEXT: 0123456789 -# CHECK-NEXT: Index 0123456789 0 - -# CHECK: [0,0] DeeeeeeeeER . . ld4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28 -# CHECK-NEXT: [0,1] .DeE------R . . add x0, x27, #1 -# CHECK-NEXT: [0,2] . DeeeeeeeeeeER. . ld4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28 -# CHECK-NEXT: [0,3] . DeE--------R. . add x0, x27, #1 -# CHECK-NEXT: [0,4] . DeeeeeeeeER. . ld4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28 -# CHECK-NEXT: [0,5] . DeE------R. . add x0, x27, #1 -# CHECK-NEXT: [0,6] . .DeeeeeeeeeeER . ld4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28 -# CHECK-NEXT: [0,7] . . DeE--------R . add x0, x27, #1 -# CHECK-NEXT: [0,8] . . DeeeeeeeeeeER ld4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28 -# CHECK-NEXT: [0,9] . . DeE--------R add x0, x27, #1 +# CHECK-NEXT: Index 0123456789 0123456 + +# CHECK: [0,0] DeeeeeeeeER . . .. ld4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28 +# CHECK-NEXT: [0,1] . DeE-----R . . .. add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeeeeeeeeeER . .. ld4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28 +# CHECK-NEXT: [0,3] . .DeE------R . .. add x0, x27, #1 +# CHECK-NEXT: [0,4] . . DeeeeeeeeER . .. ld4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28 +# CHECK-NEXT: [0,5] . . DeE-----R . .. add x0, x27, #1 +# CHECK-NEXT: [0,6] . . DeeeeeeeeeeER .. ld4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28 +# CHECK-NEXT: [0,7] . . . DeE------R .. add x0, x27, #1 +# CHECK-NEXT: [0,8] . . . DeeeeeeeeeeER ld4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28 +# CHECK-NEXT: [0,9] . . . . DeE------R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -2895,16 +2895,16 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28 -# CHECK-NEXT: 1. 1 1.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 1. 1 1.0 1.0 5.0 add x0, x27, #1 # CHECK-NEXT: 2. 1 1.0 1.0 0.0 ld4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28 -# CHECK-NEXT: 3. 1 1.0 0.0 8.0 add x0, x27, #1 +# CHECK-NEXT: 3. 1 1.0 1.0 6.0 add x0, x27, #1 # CHECK-NEXT: 4. 1 1.0 1.0 0.0 ld4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28 -# CHECK-NEXT: 5. 1 1.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 5. 1 1.0 1.0 5.0 add x0, x27, #1 # CHECK-NEXT: 6. 1 1.0 1.0 0.0 ld4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28 -# CHECK-NEXT: 7. 1 1.0 0.0 8.0 add x0, x27, #1 +# CHECK-NEXT: 7. 1 1.0 1.0 6.0 add x0, x27, #1 # CHECK-NEXT: 8. 1 1.0 1.0 0.0 ld4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28 -# CHECK-NEXT: 9. 1 1.0 0.0 8.0 add x0, x27, #1 -# CHECK-NEXT: 1 1.0 0.5 3.6 +# CHECK-NEXT: 9. 1 1.0 1.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.0 1.0 2.8 # CHECK: [38] Code Region - G39 @@ -2913,25 +2913,25 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 4003 # CHECK-NEXT: Total uOps: 5000 -# CHECK: Dispatch Width: 8 +# CHECK: Dispatch Width: 3 # CHECK-NEXT: uOps Per Cycle: 1.25 # CHECK-NEXT: IPC: 0.25 -# CHECK-NEXT: Block RThroughput: 10.0 +# CHECK-NEXT: Block RThroughput: 16.7 # CHECK: Timeline view: # CHECK-NEXT: 0123456789 0123456789 # CHECK-NEXT: Index 0123456789 0123456789 012 # CHECK: [0,0] DeeeeeeeeER . . . . . . . ld4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], #4 -# CHECK-NEXT: [0,1] .DeE------R . . . . . . . add x0, x27, #1 -# CHECK-NEXT: [0,2] . D======eeeeeeeeER . . . . . . ld4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], #4 -# CHECK-NEXT: [0,3] . D======eE------R . . . . . . add x0, x27, #1 -# CHECK-NEXT: [0,4] . D============eeeeeeeeER . . . . ld4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], x28 -# CHECK-NEXT: [0,5] . D============eE------R . . . . add x0, x27, #1 -# CHECK-NEXT: [0,6] . .D==================eeeeeeeeER. . . ld4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], x28 -# CHECK-NEXT: [0,7] . . D==================eE------R. . . add x0, x27, #1 -# CHECK-NEXT: [0,8] . . D========================eeeeeeeeER ld4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], #8 -# CHECK-NEXT: [0,9] . . D========================eE------R add x0, x27, #1 +# CHECK-NEXT: [0,1] . DeE----R . . . . . . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . D====eeeeeeeeER . . . . . . ld4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], #4 +# CHECK-NEXT: [0,3] . . D==eE------R . . . . . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . . D========eeeeeeeeER . . . . ld4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], x28 +# CHECK-NEXT: [0,5] . . .D======eE------R . . . . add x0, x27, #1 +# CHECK-NEXT: [0,6] . . . D============eeeeeeeeER. . . ld4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], x28 +# CHECK-NEXT: [0,7] . . . D==========eE------R. . . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . . .D================eeeeeeeeER ld4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], #8 +# CHECK-NEXT: [0,9] . . . . D==============eE------R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -2941,16 +2941,16 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], #4 -# CHECK-NEXT: 1. 1 1.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 7.0 0.0 0.0 ld4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], #4 -# CHECK-NEXT: 3. 1 7.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 13.0 0.0 0.0 ld4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], x28 -# CHECK-NEXT: 5. 1 13.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 19.0 0.0 0.0 ld4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], x28 -# CHECK-NEXT: 7. 1 19.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 25.0 0.0 0.0 ld4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], #8 -# CHECK-NEXT: 9. 1 25.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 1 13.0 0.1 3.0 +# CHECK-NEXT: 1. 1 1.0 1.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 5.0 0.0 0.0 ld4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], #4 +# CHECK-NEXT: 3. 1 3.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 9.0 0.0 0.0 ld4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], x28 +# CHECK-NEXT: 5. 1 7.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 13.0 0.0 0.0 ld4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], x28 +# CHECK-NEXT: 7. 1 11.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 17.0 0.0 0.0 ld4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], #8 +# CHECK-NEXT: 9. 1 15.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 1 8.2 0.2 2.8 # CHECK: [39] Code Region - G40 @@ -2959,25 +2959,25 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 4003 # CHECK-NEXT: Total uOps: 5000 -# CHECK: Dispatch Width: 8 +# CHECK: Dispatch Width: 3 # CHECK-NEXT: uOps Per Cycle: 1.25 # CHECK-NEXT: IPC: 0.25 -# CHECK-NEXT: Block RThroughput: 10.0 +# CHECK-NEXT: Block RThroughput: 16.7 # CHECK: Timeline view: # CHECK-NEXT: 0123456789 0123456789 # CHECK-NEXT: Index 0123456789 0123456789 012 # CHECK: [0,0] DeeeeeeeeER . . . . . . . ld4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], #8 -# CHECK-NEXT: [0,1] .DeE------R . . . . . . . add x0, x27, #1 -# CHECK-NEXT: [0,2] . D======eeeeeeeeER . . . . . . ld4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], x28 -# CHECK-NEXT: [0,3] . D======eE------R . . . . . . add x0, x27, #1 -# CHECK-NEXT: [0,4] . D============eeeeeeeeER . . . . ld4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], x28 -# CHECK-NEXT: [0,5] . D============eE------R . . . . add x0, x27, #1 -# CHECK-NEXT: [0,6] . .D==================eeeeeeeeER. . . ld4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], #16 -# CHECK-NEXT: [0,7] . . D==================eE------R. . . add x0, x27, #1 -# CHECK-NEXT: [0,8] . . D========================eeeeeeeeER ld4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], x28 -# CHECK-NEXT: [0,9] . . D========================eE------R add x0, x27, #1 +# CHECK-NEXT: [0,1] . DeE----R . . . . . . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . D====eeeeeeeeER . . . . . . ld4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], x28 +# CHECK-NEXT: [0,3] . . D==eE------R . . . . . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . . D========eeeeeeeeER . . . . ld4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], x28 +# CHECK-NEXT: [0,5] . . .D======eE------R . . . . add x0, x27, #1 +# CHECK-NEXT: [0,6] . . . D============eeeeeeeeER. . . ld4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], #16 +# CHECK-NEXT: [0,7] . . . D==========eE------R. . . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . . .D================eeeeeeeeER ld4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], x28 +# CHECK-NEXT: [0,9] . . . . D==============eE------R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -2987,43 +2987,43 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], #8 -# CHECK-NEXT: 1. 1 1.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 7.0 0.0 0.0 ld4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], x28 -# CHECK-NEXT: 3. 1 7.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 13.0 0.0 0.0 ld4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], x28 -# CHECK-NEXT: 5. 1 13.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 19.0 0.0 0.0 ld4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], #16 -# CHECK-NEXT: 7. 1 19.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 25.0 0.0 0.0 ld4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], x28 -# CHECK-NEXT: 9. 1 25.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 1 13.0 0.1 3.0 +# CHECK-NEXT: 1. 1 1.0 1.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 5.0 0.0 0.0 ld4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], x28 +# CHECK-NEXT: 3. 1 3.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 9.0 0.0 0.0 ld4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], x28 +# CHECK-NEXT: 5. 1 7.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 13.0 0.0 0.0 ld4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], #16 +# CHECK-NEXT: 7. 1 11.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 17.0 0.0 0.0 ld4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], x28 +# CHECK-NEXT: 9. 1 15.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 1 8.2 0.2 2.8 # CHECK: [40] Code Region - G41 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 2103 +# CHECK-NEXT: Total Cycles: 2106 # CHECK-NEXT: Total uOps: 5000 -# CHECK: Dispatch Width: 8 -# CHECK-NEXT: uOps Per Cycle: 2.38 -# CHECK-NEXT: IPC: 0.48 -# CHECK-NEXT: Block RThroughput: 10.0 +# CHECK: Dispatch Width: 3 +# CHECK-NEXT: uOps Per Cycle: 2.37 +# CHECK-NEXT: IPC: 0.47 +# CHECK-NEXT: Block RThroughput: 16.7 # CHECK: Timeline view: # CHECK-NEXT: 0123456789 -# CHECK-NEXT: Index 0123456789 0123 - -# CHECK: [0,0] DeeeeeeeeER . . . ld4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], #32 -# CHECK-NEXT: [0,1] .DeE------R . . . add x0, x27, #1 -# CHECK-NEXT: [0,2] . D======eeeeeeeeER . . ld4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], x28 -# CHECK-NEXT: [0,3] . D======eE------R . . add x0, x27, #1 -# CHECK-NEXT: [0,4] . D=====eeeeeeeeER. . ld4r { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32 -# CHECK-NEXT: [0,5] . D=====eE------R. . add x0, x27, #1 -# CHECK-NEXT: [0,6] . .D======eeeeeeeeER. ld4r { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #32 -# CHECK-NEXT: [0,7] . . D======eE------R. add x0, x27, #1 -# CHECK-NEXT: [0,8] . . D=====eeeeeeeeER ld4r { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #16 -# CHECK-NEXT: [0,9] . . D=====eE------R add x0, x27, #1 +# CHECK-NEXT: Index 0123456789 0123456 + +# CHECK: [0,0] DeeeeeeeeER . . .. ld4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], #32 +# CHECK-NEXT: [0,1] . DeE----R . . .. add x0, x27, #1 +# CHECK-NEXT: [0,2] . D====eeeeeeeeER . .. ld4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], x28 +# CHECK-NEXT: [0,3] . . D==eE------R . .. add x0, x27, #1 +# CHECK-NEXT: [0,4] . . D=eeeeeeeeER. .. ld4r { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32 +# CHECK-NEXT: [0,5] . . .DeE-----R. .. add x0, x27, #1 +# CHECK-NEXT: [0,6] . . . DeeeeeeeeER .. ld4r { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #32 +# CHECK-NEXT: [0,7] . . . DeE----R .. add x0, x27, #1 +# CHECK-NEXT: [0,8] . . . .DeeeeeeeeER ld4r { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #16 +# CHECK-NEXT: [0,9] . . . . DeE----R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -3033,43 +3033,43 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], #32 -# CHECK-NEXT: 1. 1 1.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 7.0 0.0 0.0 ld4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], x28 -# CHECK-NEXT: 3. 1 7.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 6.0 0.0 0.0 ld4r { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32 -# CHECK-NEXT: 5. 1 6.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 7.0 2.0 0.0 ld4r { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #32 -# CHECK-NEXT: 7. 1 7.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 6.0 0.0 0.0 ld4r { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #16 -# CHECK-NEXT: 9. 1 6.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 1 5.4 0.3 3.0 +# CHECK-NEXT: 1. 1 1.0 1.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 5.0 0.0 0.0 ld4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], x28 +# CHECK-NEXT: 3. 1 3.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 2.0 0.0 0.0 ld4r { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32 +# CHECK-NEXT: 5. 1 1.0 1.0 5.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 1.0 0.0 ld4r { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #32 +# CHECK-NEXT: 7. 1 1.0 1.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 1.0 0.0 ld4r { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #16 +# CHECK-NEXT: 9. 1 1.0 1.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.7 0.7 2.3 # CHECK: [41] Code Region - G42 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 1009 +# CHECK-NEXT: Total Cycles: 2007 # CHECK-NEXT: Total uOps: 5000 -# CHECK: Dispatch Width: 8 -# CHECK-NEXT: uOps Per Cycle: 4.96 -# CHECK-NEXT: IPC: 0.99 -# CHECK-NEXT: Block RThroughput: 10.0 +# CHECK: Dispatch Width: 3 +# CHECK-NEXT: uOps Per Cycle: 2.49 +# CHECK-NEXT: IPC: 0.50 +# CHECK-NEXT: Block RThroughput: 16.7 # CHECK: Timeline view: -# CHECK-NEXT: 012345678 -# CHECK-NEXT: Index 0123456789 - -# CHECK: [0,0] DeeeeeeeeER . . ld4r { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #8 -# CHECK-NEXT: [0,1] .DeE------R . . add x0, x27, #1 -# CHECK-NEXT: [0,2] . DeeeeeeeeER . . ld4r { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #16 -# CHECK-NEXT: [0,3] . DeE------R . . add x0, x27, #1 -# CHECK-NEXT: [0,4] . DeeeeeeeeER. . ld4r { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #4 -# CHECK-NEXT: [0,5] . DeE------R. . add x0, x27, #1 -# CHECK-NEXT: [0,6] . .DeeeeeeeeER . ld4r { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #8 -# CHECK-NEXT: [0,7] . . DeE------R . add x0, x27, #1 -# CHECK-NEXT: [0,8] . . DeeeeeeeeER ld4r { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #4 -# CHECK-NEXT: [0,9] . . DeE------R add x0, x27, #1 +# CHECK-NEXT: 0123456789 +# CHECK-NEXT: Index 0123456789 0123456 + +# CHECK: [0,0] DeeeeeeeeER . . .. ld4r { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #8 +# CHECK-NEXT: [0,1] . DeE----R . . .. add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeeeeeeeER. . .. ld4r { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #16 +# CHECK-NEXT: [0,3] . . DeE----R. . .. add x0, x27, #1 +# CHECK-NEXT: [0,4] . . DeeeeeeeeER . .. ld4r { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #4 +# CHECK-NEXT: [0,5] . . .DeE----R . .. add x0, x27, #1 +# CHECK-NEXT: [0,6] . . . DeeeeeeeeER .. ld4r { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #8 +# CHECK-NEXT: [0,7] . . . DeE----R .. add x0, x27, #1 +# CHECK-NEXT: [0,8] . . . .DeeeeeeeeER ld4r { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #4 +# CHECK-NEXT: [0,9] . . . . DeE----R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -3079,43 +3079,43 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld4r { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #8 -# CHECK-NEXT: 1. 1 1.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 1. 1 1.0 1.0 4.0 add x0, x27, #1 # CHECK-NEXT: 2. 1 1.0 1.0 0.0 ld4r { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #16 -# CHECK-NEXT: 3. 1 1.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 3. 1 1.0 1.0 4.0 add x0, x27, #1 # CHECK-NEXT: 4. 1 1.0 1.0 0.0 ld4r { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #4 -# CHECK-NEXT: 5. 1 1.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 5. 1 1.0 1.0 4.0 add x0, x27, #1 # CHECK-NEXT: 6. 1 1.0 1.0 0.0 ld4r { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #8 -# CHECK-NEXT: 7. 1 1.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 7. 1 1.0 1.0 4.0 add x0, x27, #1 # CHECK-NEXT: 8. 1 1.0 1.0 0.0 ld4r { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #4 -# CHECK-NEXT: 9. 1 1.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 1 1.0 0.5 3.0 +# CHECK-NEXT: 9. 1 1.0 1.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.0 1.0 2.0 # CHECK: [42] Code Region - G43 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 1009 +# CHECK-NEXT: Total Cycles: 2007 # CHECK-NEXT: Total uOps: 5000 -# CHECK: Dispatch Width: 8 -# CHECK-NEXT: uOps Per Cycle: 4.96 -# CHECK-NEXT: IPC: 0.99 -# CHECK-NEXT: Block RThroughput: 10.0 +# CHECK: Dispatch Width: 3 +# CHECK-NEXT: uOps Per Cycle: 2.49 +# CHECK-NEXT: IPC: 0.50 +# CHECK-NEXT: Block RThroughput: 16.7 # CHECK: Timeline view: -# CHECK-NEXT: 012345678 -# CHECK-NEXT: Index 0123456789 - -# CHECK: [0,0] DeeeeeeeeER . . ld4r { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28 -# CHECK-NEXT: [0,1] .DeE------R . . add x0, x27, #1 -# CHECK-NEXT: [0,2] . DeeeeeeeeER . . ld4r { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28 -# CHECK-NEXT: [0,3] . DeE------R . . add x0, x27, #1 -# CHECK-NEXT: [0,4] . DeeeeeeeeER. . ld4r { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28 -# CHECK-NEXT: [0,5] . DeE------R. . add x0, x27, #1 -# CHECK-NEXT: [0,6] . .DeeeeeeeeER . ld4r { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28 -# CHECK-NEXT: [0,7] . . DeE------R . add x0, x27, #1 -# CHECK-NEXT: [0,8] . . DeeeeeeeeER ld4r { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28 -# CHECK-NEXT: [0,9] . . DeE------R add x0, x27, #1 +# CHECK-NEXT: 0123456789 +# CHECK-NEXT: Index 0123456789 0123456 + +# CHECK: [0,0] DeeeeeeeeER . . .. ld4r { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28 +# CHECK-NEXT: [0,1] . DeE----R . . .. add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeeeeeeeER. . .. ld4r { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28 +# CHECK-NEXT: [0,3] . . DeE----R. . .. add x0, x27, #1 +# CHECK-NEXT: [0,4] . . DeeeeeeeeER . .. ld4r { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28 +# CHECK-NEXT: [0,5] . . .DeE----R . .. add x0, x27, #1 +# CHECK-NEXT: [0,6] . . . DeeeeeeeeER .. ld4r { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28 +# CHECK-NEXT: [0,7] . . . DeE----R .. add x0, x27, #1 +# CHECK-NEXT: [0,8] . . . .DeeeeeeeeER ld4r { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28 +# CHECK-NEXT: [0,9] . . . . DeE----R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -3125,43 +3125,43 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld4r { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28 -# CHECK-NEXT: 1. 1 1.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 1. 1 1.0 1.0 4.0 add x0, x27, #1 # CHECK-NEXT: 2. 1 1.0 1.0 0.0 ld4r { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28 -# CHECK-NEXT: 3. 1 1.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 3. 1 1.0 1.0 4.0 add x0, x27, #1 # CHECK-NEXT: 4. 1 1.0 1.0 0.0 ld4r { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28 -# CHECK-NEXT: 5. 1 1.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 5. 1 1.0 1.0 4.0 add x0, x27, #1 # CHECK-NEXT: 6. 1 1.0 1.0 0.0 ld4r { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28 -# CHECK-NEXT: 7. 1 1.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 7. 1 1.0 1.0 4.0 add x0, x27, #1 # CHECK-NEXT: 8. 1 1.0 1.0 0.0 ld4r { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28 -# CHECK-NEXT: 9. 1 1.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 1 1.0 0.5 3.0 +# CHECK-NEXT: 9. 1 1.0 1.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.0 1.0 2.0 # CHECK: [43] Code Region - G44 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 808 +# CHECK-NEXT: Total Cycles: 1606 # CHECK-NEXT: Total uOps: 3800 -# CHECK: Dispatch Width: 8 -# CHECK-NEXT: uOps Per Cycle: 4.70 -# CHECK-NEXT: IPC: 1.24 -# CHECK-NEXT: Block RThroughput: 8.0 +# CHECK: Dispatch Width: 3 +# CHECK-NEXT: uOps Per Cycle: 2.37 +# CHECK-NEXT: IPC: 0.62 +# CHECK-NEXT: Block RThroughput: 12.7 # CHECK: Timeline view: -# CHECK-NEXT: 012345 -# CHECK-NEXT: Index 0123456789 - -# CHECK: [0,0] DeeeeeeeeER . ld4r { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28 -# CHECK-NEXT: [0,1] .DeE------R . add x0, x27, #1 -# CHECK-NEXT: [0,2] . DeeeeeeeeER . ld4r { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28 -# CHECK-NEXT: [0,3] . DeE------R . add x0, x27, #1 -# CHECK-NEXT: [0,4] . DeeeeeeeeER. ld4r { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28 -# CHECK-NEXT: [0,5] . DeE------R. add x0, x27, #1 -# CHECK-NEXT: [0,6] . D=eeeeeE-R. ldp s1, s2, [x27], #248 -# CHECK-NEXT: [0,7] . D==eE----R. add x0, x27, #1 -# CHECK-NEXT: [0,8] . .D==eeeeeER ldp d1, d2, [x27], #496 -# CHECK-NEXT: [0,9] . .D===eE---R add x0, x27, #1 +# CHECK-NEXT: 0123456789 +# CHECK-NEXT: Index 0123456789 01 + +# CHECK: [0,0] DeeeeeeeeER . .. ld4r { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28 +# CHECK-NEXT: [0,1] . DeE----R . .. add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeeeeeeeER. .. ld4r { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28 +# CHECK-NEXT: [0,3] . . DeE----R. .. add x0, x27, #1 +# CHECK-NEXT: [0,4] . . DeeeeeeeeER .. ld4r { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28 +# CHECK-NEXT: [0,5] . . .DeE----R .. add x0, x27, #1 +# CHECK-NEXT: [0,6] . . . DeeeeeER.. ldp s1, s2, [x27], #248 +# CHECK-NEXT: [0,7] . . . DeE---R.. add x0, x27, #1 +# CHECK-NEXT: [0,8] . . . DeeeeeER ldp d1, d2, [x27], #496 +# CHECK-NEXT: [0,9] . . . DeE---R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -3171,43 +3171,43 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld4r { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28 -# CHECK-NEXT: 1. 1 1.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 1. 1 1.0 1.0 4.0 add x0, x27, #1 # CHECK-NEXT: 2. 1 1.0 1.0 0.0 ld4r { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28 -# CHECK-NEXT: 3. 1 1.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 3. 1 1.0 1.0 4.0 add x0, x27, #1 # CHECK-NEXT: 4. 1 1.0 1.0 0.0 ld4r { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28 -# CHECK-NEXT: 5. 1 1.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 2.0 1.0 1.0 ldp s1, s2, [x27], #248 -# CHECK-NEXT: 7. 1 3.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 3.0 1.0 0.0 ldp d1, d2, [x27], #496 -# CHECK-NEXT: 9. 1 4.0 0.0 3.0 add x0, x27, #1 -# CHECK-NEXT: 1 1.8 0.5 2.6 +# CHECK-NEXT: 5. 1 1.0 1.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 1.0 0.0 ldp s1, s2, [x27], #248 +# CHECK-NEXT: 7. 1 1.0 0.0 3.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 1.0 0.0 ldp d1, d2, [x27], #496 +# CHECK-NEXT: 9. 1 1.0 0.0 3.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.0 0.8 1.8 # CHECK: [44] Code Region - G45 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 508 +# CHECK-NEXT: Total Cycles: 1006 # CHECK-NEXT: Total uOps: 2000 -# CHECK: Dispatch Width: 8 -# CHECK-NEXT: uOps Per Cycle: 3.94 -# CHECK-NEXT: IPC: 1.97 -# CHECK-NEXT: Block RThroughput: 5.0 +# CHECK: Dispatch Width: 3 +# CHECK-NEXT: uOps Per Cycle: 1.99 +# CHECK-NEXT: IPC: 0.99 +# CHECK-NEXT: Block RThroughput: 6.7 # CHECK: Timeline view: -# CHECK-NEXT: 012 +# CHECK-NEXT: 012345 # CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeeeeeER. . ldp q1, q2, [x27], #992 -# CHECK-NEXT: [0,1] D=eE-----R. . add x0, x27, #1 -# CHECK-NEXT: [0,2] D=eeeeeE-R. . ldp s1, s2, [x27, #248]! -# CHECK-NEXT: [0,3] D==eE----R. . add x0, x27, #1 -# CHECK-NEXT: [0,4] .D=eeeeeER. . ldp d1, d2, [x27, #496]! -# CHECK-NEXT: [0,5] .D==eE---R. . add x0, x27, #1 -# CHECK-NEXT: [0,6] .D==eeeeeeeER ldp q1, q2, [x27, #992]! -# CHECK-NEXT: [0,7] .D===eE-----R add x0, x27, #1 -# CHECK-NEXT: [0,8] . D==eeeeE--R ldp w1, w2, [x27], #248 -# CHECK-NEXT: [0,9] . D===eE----R add x0, x27, #1 +# CHECK: [0,0] DeeeeeeeER. . ldp q1, q2, [x27], #992 +# CHECK-NEXT: [0,1] .DeE-----R. . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeeeeER. . ldp s1, s2, [x27, #248]! +# CHECK-NEXT: [0,3] . DeE---R. . add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeeeeER . ldp d1, d2, [x27, #496]! +# CHECK-NEXT: [0,5] . DeE---R . add x0, x27, #1 +# CHECK-NEXT: [0,6] . .DeeeeeeeER ldp q1, q2, [x27, #992]! +# CHECK-NEXT: [0,7] . . DeE-----R add x0, x27, #1 +# CHECK-NEXT: [0,8] . . DeeeeE-R ldp w1, w2, [x27], #248 +# CHECK-NEXT: [0,9] . . DeE---R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -3217,43 +3217,43 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ldp q1, q2, [x27], #992 -# CHECK-NEXT: 1. 1 2.0 0.0 5.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 2.0 0.0 1.0 ldp s1, s2, [x27, #248]! -# CHECK-NEXT: 3. 1 3.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 2.0 0.0 0.0 ldp d1, d2, [x27, #496]! -# CHECK-NEXT: 5. 1 3.0 0.0 3.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ldp q1, q2, [x27, #992]! -# CHECK-NEXT: 7. 1 4.0 0.0 5.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 3.0 0.0 2.0 ldp w1, w2, [x27], #248 -# CHECK-NEXT: 9. 1 4.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 1 2.7 0.1 2.4 +# CHECK-NEXT: 1. 1 1.0 0.0 5.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 1.0 1.0 0.0 ldp s1, s2, [x27, #248]! +# CHECK-NEXT: 3. 1 1.0 0.0 3.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 1.0 0.0 ldp d1, d2, [x27, #496]! +# CHECK-NEXT: 5. 1 1.0 0.0 3.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 1.0 0.0 ldp q1, q2, [x27, #992]! +# CHECK-NEXT: 7. 1 1.0 0.0 5.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 1.0 1.0 ldp w1, w2, [x27], #248 +# CHECK-NEXT: 9. 1 1.0 0.0 3.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.0 0.5 2.0 # CHECK: [45] Code Region - G46 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 507 +# CHECK-NEXT: Total Cycles: 1006 # CHECK-NEXT: Total uOps: 2000 -# CHECK: Dispatch Width: 8 -# CHECK-NEXT: uOps Per Cycle: 3.94 -# CHECK-NEXT: IPC: 1.97 -# CHECK-NEXT: Block RThroughput: 4.0 +# CHECK: Dispatch Width: 3 +# CHECK-NEXT: uOps Per Cycle: 1.99 +# CHECK-NEXT: IPC: 0.99 +# CHECK-NEXT: Block RThroughput: 6.7 # CHECK: Timeline view: -# CHECK-NEXT: 01 +# CHECK-NEXT: 012345 # CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeeER .. ldp x1, x2, [x27], #496 -# CHECK-NEXT: [0,1] D=eE--R .. add x0, x27, #1 -# CHECK-NEXT: [0,2] D=eeeeER .. ldp w1, w2, [x27, #248]! -# CHECK-NEXT: [0,3] D==eE--R .. add x0, x27, #1 -# CHECK-NEXT: [0,4] .D=eeeeER .. ldp x1, x2, [x27, #496]! -# CHECK-NEXT: [0,5] .D==eE--R .. add x0, x27, #1 -# CHECK-NEXT: [0,6] .D==eeeeeER. ldpsw x1, x2, [x27], #248 -# CHECK-NEXT: [0,7] .D===eE---R. add x0, x27, #1 -# CHECK-NEXT: [0,8] . D==eeeeeER ldpsw x1, x2, [x27, #248]! -# CHECK-NEXT: [0,9] . D===eE---R add x0, x27, #1 +# CHECK: [0,0] DeeeeER . . ldp x1, x2, [x27], #496 +# CHECK-NEXT: [0,1] .DeE--R . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeeeER . . ldp w1, w2, [x27, #248]! +# CHECK-NEXT: [0,3] . DeE--R . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeeeER . ldp x1, x2, [x27, #496]! +# CHECK-NEXT: [0,5] . DeE--R . add x0, x27, #1 +# CHECK-NEXT: [0,6] . .DeeeeeER . ldpsw x1, x2, [x27], #248 +# CHECK-NEXT: [0,7] . . DeE---R . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . DeeeeeER ldpsw x1, x2, [x27, #248]! +# CHECK-NEXT: [0,9] . . DeE---R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -3263,16 +3263,16 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ldp x1, x2, [x27], #496 -# CHECK-NEXT: 1. 1 2.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ldp w1, w2, [x27, #248]! -# CHECK-NEXT: 3. 1 3.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 2.0 0.0 0.0 ldp x1, x2, [x27, #496]! -# CHECK-NEXT: 5. 1 3.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ldpsw x1, x2, [x27], #248 -# CHECK-NEXT: 7. 1 4.0 0.0 3.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 3.0 0.0 0.0 ldpsw x1, x2, [x27, #248]! -# CHECK-NEXT: 9. 1 4.0 0.0 3.0 add x0, x27, #1 -# CHECK-NEXT: 1 2.7 0.1 1.2 +# CHECK-NEXT: 1. 1 1.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 1.0 1.0 0.0 ldp w1, w2, [x27, #248]! +# CHECK-NEXT: 3. 1 1.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 1.0 0.0 ldp x1, x2, [x27, #496]! +# CHECK-NEXT: 5. 1 1.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 1.0 0.0 ldpsw x1, x2, [x27], #248 +# CHECK-NEXT: 7. 1 1.0 0.0 3.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 1.0 0.0 ldpsw x1, x2, [x27, #248]! +# CHECK-NEXT: 9. 1 1.0 0.0 3.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.0 0.5 1.2 # CHECK: [46] Code Region - G47 @@ -3281,10 +3281,10 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 507 # CHECK-NEXT: Total uOps: 1500 -# CHECK: Dispatch Width: 8 +# CHECK: Dispatch Width: 3 # CHECK-NEXT: uOps Per Cycle: 2.96 # CHECK-NEXT: IPC: 1.97 -# CHECK-NEXT: Block RThroughput: 3.3 +# CHECK-NEXT: Block RThroughput: 5.0 # CHECK: Timeline view: # CHECK-NEXT: 01 @@ -3292,14 +3292,14 @@ add x0, x27, 1 # CHECK: [0,0] DeeeeeER .. ldr b1, [x27], #254 # CHECK-NEXT: [0,1] D=eE---R .. add x0, x27, #1 -# CHECK-NEXT: [0,2] D=eeeeeER .. ldr h1, [x27], #254 -# CHECK-NEXT: [0,3] D==eE---R .. add x0, x27, #1 -# CHECK-NEXT: [0,4] D==eeeeeER.. ldr s1, [x27], #254 -# CHECK-NEXT: [0,5] .D==eE---R.. add x0, x27, #1 -# CHECK-NEXT: [0,6] .D==eeeeeER. ldr d1, [x27], #254 -# CHECK-NEXT: [0,7] .D===eE---R. add x0, x27, #1 -# CHECK-NEXT: [0,8] .D===eeeeeER ldr q1, [x27], #254 -# CHECK-NEXT: [0,9] .D====eE---R add x0, x27, #1 +# CHECK-NEXT: [0,2] .DeeeeeER .. ldr h1, [x27], #254 +# CHECK-NEXT: [0,3] .D=eE---R .. add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeeeeER.. ldr s1, [x27], #254 +# CHECK-NEXT: [0,5] . D=eE---R.. add x0, x27, #1 +# CHECK-NEXT: [0,6] . DeeeeeER. ldr d1, [x27], #254 +# CHECK-NEXT: [0,7] . D=eE---R. add x0, x27, #1 +# CHECK-NEXT: [0,8] . DeeeeeER ldr q1, [x27], #254 +# CHECK-NEXT: [0,9] . D=eE---R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -3310,15 +3310,15 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ldr b1, [x27], #254 # CHECK-NEXT: 1. 1 2.0 0.0 3.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ldr h1, [x27], #254 -# CHECK-NEXT: 3. 1 3.0 0.0 3.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ldr s1, [x27], #254 -# CHECK-NEXT: 5. 1 3.0 0.0 3.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ldr d1, [x27], #254 -# CHECK-NEXT: 7. 1 4.0 0.0 3.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 4.0 0.0 0.0 ldr q1, [x27], #254 -# CHECK-NEXT: 9. 1 5.0 0.0 3.0 add x0, x27, #1 -# CHECK-NEXT: 1 3.0 0.1 1.5 +# CHECK-NEXT: 2. 1 1.0 0.0 0.0 ldr h1, [x27], #254 +# CHECK-NEXT: 3. 1 2.0 0.0 3.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 0.0 0.0 ldr s1, [x27], #254 +# CHECK-NEXT: 5. 1 2.0 0.0 3.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 0.0 0.0 ldr d1, [x27], #254 +# CHECK-NEXT: 7. 1 2.0 0.0 3.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 0.0 0.0 ldr q1, [x27], #254 +# CHECK-NEXT: 9. 1 2.0 0.0 3.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.5 0.1 1.5 # CHECK: [47] Code Region - G48 @@ -3327,10 +3327,10 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 507 # CHECK-NEXT: Total uOps: 1500 -# CHECK: Dispatch Width: 8 +# CHECK: Dispatch Width: 3 # CHECK-NEXT: uOps Per Cycle: 2.96 # CHECK-NEXT: IPC: 1.97 -# CHECK-NEXT: Block RThroughput: 3.3 +# CHECK-NEXT: Block RThroughput: 5.0 # CHECK: Timeline view: # CHECK-NEXT: 01 @@ -3338,14 +3338,14 @@ add x0, x27, 1 # CHECK: [0,0] DeeeeeER .. ldr b1, [x27, #254]! # CHECK-NEXT: [0,1] D=eE---R .. add x0, x27, #1 -# CHECK-NEXT: [0,2] D=eeeeeER .. ldr h1, [x27, #254]! -# CHECK-NEXT: [0,3] D==eE---R .. add x0, x27, #1 -# CHECK-NEXT: [0,4] D==eeeeeER.. ldr s1, [x27, #254]! -# CHECK-NEXT: [0,5] .D==eE---R.. add x0, x27, #1 -# CHECK-NEXT: [0,6] .D==eeeeeER. ldr d1, [x27, #254]! -# CHECK-NEXT: [0,7] .D===eE---R. add x0, x27, #1 -# CHECK-NEXT: [0,8] .D===eeeeeER ldr q1, [x27, #254]! -# CHECK-NEXT: [0,9] .D====eE---R add x0, x27, #1 +# CHECK-NEXT: [0,2] .DeeeeeER .. ldr h1, [x27, #254]! +# CHECK-NEXT: [0,3] .D=eE---R .. add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeeeeER.. ldr s1, [x27, #254]! +# CHECK-NEXT: [0,5] . D=eE---R.. add x0, x27, #1 +# CHECK-NEXT: [0,6] . DeeeeeER. ldr d1, [x27, #254]! +# CHECK-NEXT: [0,7] . D=eE---R. add x0, x27, #1 +# CHECK-NEXT: [0,8] . DeeeeeER ldr q1, [x27, #254]! +# CHECK-NEXT: [0,9] . D=eE---R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -3356,15 +3356,15 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ldr b1, [x27, #254]! # CHECK-NEXT: 1. 1 2.0 0.0 3.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ldr h1, [x27, #254]! -# CHECK-NEXT: 3. 1 3.0 0.0 3.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ldr s1, [x27, #254]! -# CHECK-NEXT: 5. 1 3.0 0.0 3.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ldr d1, [x27, #254]! -# CHECK-NEXT: 7. 1 4.0 0.0 3.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 4.0 0.0 0.0 ldr q1, [x27, #254]! -# CHECK-NEXT: 9. 1 5.0 0.0 3.0 add x0, x27, #1 -# CHECK-NEXT: 1 3.0 0.1 1.5 +# CHECK-NEXT: 2. 1 1.0 0.0 0.0 ldr h1, [x27, #254]! +# CHECK-NEXT: 3. 1 2.0 0.0 3.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 0.0 0.0 ldr s1, [x27, #254]! +# CHECK-NEXT: 5. 1 2.0 0.0 3.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 0.0 0.0 ldr d1, [x27, #254]! +# CHECK-NEXT: 7. 1 2.0 0.0 3.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 0.0 0.0 ldr q1, [x27, #254]! +# CHECK-NEXT: 9. 1 2.0 0.0 3.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.5 0.1 1.5 # CHECK: [48] Code Region - G49 @@ -3373,10 +3373,10 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 506 # CHECK-NEXT: Total uOps: 1500 -# CHECK: Dispatch Width: 8 +# CHECK: Dispatch Width: 3 # CHECK-NEXT: uOps Per Cycle: 2.96 # CHECK-NEXT: IPC: 1.98 -# CHECK-NEXT: Block RThroughput: 3.3 +# CHECK-NEXT: Block RThroughput: 5.0 # CHECK: Timeline view: # CHECK-NEXT: 0 @@ -3384,14 +3384,14 @@ add x0, x27, 1 # CHECK: [0,0] DeeeeER . ldr w1, [x27], #254 # CHECK-NEXT: [0,1] D=eE--R . add x0, x27, #1 -# CHECK-NEXT: [0,2] D=eeeeER . ldr x1, [x27], #254 -# CHECK-NEXT: [0,3] D==eE--R . add x0, x27, #1 -# CHECK-NEXT: [0,4] D==eeeeER . ldr w1, [x27, #254]! -# CHECK-NEXT: [0,5] .D==eE--R . add x0, x27, #1 -# CHECK-NEXT: [0,6] .D==eeeeER. ldr x1, [x27, #254]! -# CHECK-NEXT: [0,7] .D===eE--R. add x0, x27, #1 -# CHECK-NEXT: [0,8] .D===eeeeER ldrb w1, [x27], #254 -# CHECK-NEXT: [0,9] .D====eE--R add x0, x27, #1 +# CHECK-NEXT: [0,2] .DeeeeER . ldr x1, [x27], #254 +# CHECK-NEXT: [0,3] .D=eE--R . add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeeeER . ldr w1, [x27, #254]! +# CHECK-NEXT: [0,5] . D=eE--R . add x0, x27, #1 +# CHECK-NEXT: [0,6] . DeeeeER. ldr x1, [x27, #254]! +# CHECK-NEXT: [0,7] . D=eE--R. add x0, x27, #1 +# CHECK-NEXT: [0,8] . DeeeeER ldrb w1, [x27], #254 +# CHECK-NEXT: [0,9] . D=eE--R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -3402,15 +3402,15 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ldr w1, [x27], #254 # CHECK-NEXT: 1. 1 2.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ldr x1, [x27], #254 -# CHECK-NEXT: 3. 1 3.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ldr w1, [x27, #254]! -# CHECK-NEXT: 5. 1 3.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ldr x1, [x27, #254]! -# CHECK-NEXT: 7. 1 4.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 4.0 0.0 0.0 ldrb w1, [x27], #254 -# CHECK-NEXT: 9. 1 5.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 1 3.0 0.1 1.0 +# CHECK-NEXT: 2. 1 1.0 0.0 0.0 ldr x1, [x27], #254 +# CHECK-NEXT: 3. 1 2.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 0.0 0.0 ldr w1, [x27, #254]! +# CHECK-NEXT: 5. 1 2.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 0.0 0.0 ldr x1, [x27, #254]! +# CHECK-NEXT: 7. 1 2.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 0.0 0.0 ldrb w1, [x27], #254 +# CHECK-NEXT: 9. 1 2.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.5 0.1 1.0 # CHECK: [49] Code Region - G50 @@ -3419,10 +3419,10 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 506 # CHECK-NEXT: Total uOps: 1500 -# CHECK: Dispatch Width: 8 +# CHECK: Dispatch Width: 3 # CHECK-NEXT: uOps Per Cycle: 2.96 # CHECK-NEXT: IPC: 1.98 -# CHECK-NEXT: Block RThroughput: 3.3 +# CHECK-NEXT: Block RThroughput: 5.0 # CHECK: Timeline view: # CHECK-NEXT: 0 @@ -3430,14 +3430,14 @@ add x0, x27, 1 # CHECK: [0,0] DeeeeER . ldrb w1, [x27, #254]! # CHECK-NEXT: [0,1] D=eE--R . add x0, x27, #1 -# CHECK-NEXT: [0,2] D=eeeeER . ldrh w1, [x27], #254 -# CHECK-NEXT: [0,3] D==eE--R . add x0, x27, #1 -# CHECK-NEXT: [0,4] D==eeeeER . ldrh w1, [x27, #254]! -# CHECK-NEXT: [0,5] .D==eE--R . add x0, x27, #1 -# CHECK-NEXT: [0,6] .D==eeeeER. ldrsb w1, [x27], #254 -# CHECK-NEXT: [0,7] .D===eE--R. add x0, x27, #1 -# CHECK-NEXT: [0,8] .D===eeeeER ldrsb x1, [x27], #254 -# CHECK-NEXT: [0,9] .D====eE--R add x0, x27, #1 +# CHECK-NEXT: [0,2] .DeeeeER . ldrh w1, [x27], #254 +# CHECK-NEXT: [0,3] .D=eE--R . add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeeeER . ldrh w1, [x27, #254]! +# CHECK-NEXT: [0,5] . D=eE--R . add x0, x27, #1 +# CHECK-NEXT: [0,6] . DeeeeER. ldrsb w1, [x27], #254 +# CHECK-NEXT: [0,7] . D=eE--R. add x0, x27, #1 +# CHECK-NEXT: [0,8] . DeeeeER ldrsb x1, [x27], #254 +# CHECK-NEXT: [0,9] . D=eE--R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -3448,15 +3448,15 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ldrb w1, [x27, #254]! # CHECK-NEXT: 1. 1 2.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ldrh w1, [x27], #254 -# CHECK-NEXT: 3. 1 3.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ldrh w1, [x27, #254]! -# CHECK-NEXT: 5. 1 3.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ldrsb w1, [x27], #254 -# CHECK-NEXT: 7. 1 4.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 4.0 0.0 0.0 ldrsb x1, [x27], #254 -# CHECK-NEXT: 9. 1 5.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 1 3.0 0.1 1.0 +# CHECK-NEXT: 2. 1 1.0 0.0 0.0 ldrh w1, [x27], #254 +# CHECK-NEXT: 3. 1 2.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 0.0 0.0 ldrh w1, [x27, #254]! +# CHECK-NEXT: 5. 1 2.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 0.0 0.0 ldrsb w1, [x27], #254 +# CHECK-NEXT: 7. 1 2.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 0.0 0.0 ldrsb x1, [x27], #254 +# CHECK-NEXT: 9. 1 2.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.5 0.1 1.0 # CHECK: [50] Code Region - G51 @@ -3465,10 +3465,10 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 506 # CHECK-NEXT: Total uOps: 1500 -# CHECK: Dispatch Width: 8 +# CHECK: Dispatch Width: 3 # CHECK-NEXT: uOps Per Cycle: 2.96 # CHECK-NEXT: IPC: 1.98 -# CHECK-NEXT: Block RThroughput: 3.3 +# CHECK-NEXT: Block RThroughput: 5.0 # CHECK: Timeline view: # CHECK-NEXT: 0 @@ -3476,14 +3476,14 @@ add x0, x27, 1 # CHECK: [0,0] DeeeeER . ldrsb w1, [x27, #254]! # CHECK-NEXT: [0,1] D=eE--R . add x0, x27, #1 -# CHECK-NEXT: [0,2] D=eeeeER . ldrsb x1, [x27, #254]! -# CHECK-NEXT: [0,3] D==eE--R . add x0, x27, #1 -# CHECK-NEXT: [0,4] D==eeeeER . ldrsh w1, [x27], #254 -# CHECK-NEXT: [0,5] .D==eE--R . add x0, x27, #1 -# CHECK-NEXT: [0,6] .D==eeeeER. ldrsh x1, [x27], #254 -# CHECK-NEXT: [0,7] .D===eE--R. add x0, x27, #1 -# CHECK-NEXT: [0,8] .D===eeeeER ldrsh w1, [x27, #254]! -# CHECK-NEXT: [0,9] .D====eE--R add x0, x27, #1 +# CHECK-NEXT: [0,2] .DeeeeER . ldrsb x1, [x27, #254]! +# CHECK-NEXT: [0,3] .D=eE--R . add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeeeER . ldrsh w1, [x27], #254 +# CHECK-NEXT: [0,5] . D=eE--R . add x0, x27, #1 +# CHECK-NEXT: [0,6] . DeeeeER. ldrsh x1, [x27], #254 +# CHECK-NEXT: [0,7] . D=eE--R. add x0, x27, #1 +# CHECK-NEXT: [0,8] . DeeeeER ldrsh w1, [x27, #254]! +# CHECK-NEXT: [0,9] . D=eE--R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -3494,41 +3494,41 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ldrsb w1, [x27, #254]! # CHECK-NEXT: 1. 1 2.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ldrsb x1, [x27, #254]! -# CHECK-NEXT: 3. 1 3.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ldrsh w1, [x27], #254 -# CHECK-NEXT: 5. 1 3.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ldrsh x1, [x27], #254 -# CHECK-NEXT: 7. 1 4.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 4.0 0.0 0.0 ldrsh w1, [x27, #254]! -# CHECK-NEXT: 9. 1 5.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 1 3.0 0.1 1.0 +# CHECK-NEXT: 2. 1 1.0 0.0 0.0 ldrsb x1, [x27, #254]! +# CHECK-NEXT: 3. 1 2.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 0.0 0.0 ldrsh w1, [x27], #254 +# CHECK-NEXT: 5. 1 2.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 0.0 0.0 ldrsh x1, [x27], #254 +# CHECK-NEXT: 7. 1 2.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 0.0 0.0 ldrsh w1, [x27, #254]! +# CHECK-NEXT: 9. 1 2.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.5 0.1 1.0 # CHECK: [51] Code Region - G52 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 504 +# CHECK-NEXT: Total Cycles: 703 # CHECK-NEXT: Total uOps: 1700 -# CHECK: Dispatch Width: 8 -# CHECK-NEXT: uOps Per Cycle: 3.37 -# CHECK-NEXT: IPC: 1.98 -# CHECK-NEXT: Block RThroughput: 3.3 +# CHECK: Dispatch Width: 3 +# CHECK-NEXT: uOps Per Cycle: 2.42 +# CHECK-NEXT: IPC: 1.42 +# CHECK-NEXT: Block RThroughput: 5.7 # CHECK: Timeline view: -# CHECK-NEXT: Index 012345678 - -# CHECK: [0,0] DeeeeER . ldrsh x1, [x27, #254]! -# CHECK-NEXT: [0,1] D=eE--R . add x0, x27, #1 -# CHECK-NEXT: [0,2] D=eeeeER. ldrsw x1, [x27], #254 -# CHECK-NEXT: [0,3] D==eE--R. add x0, x27, #1 -# CHECK-NEXT: [0,4] D==eeeeER ldrsw x1, [x27, #254]! -# CHECK-NEXT: [0,5] .D==eE--R add x0, x27, #1 -# CHECK-NEXT: [0,6] .D==eeE-R st1 { v1.1d }, [x27], #8 -# CHECK-NEXT: [0,7] .D===eE-R add x0, x27, #1 -# CHECK-NEXT: [0,8] .D===eeER st1 { v1.2d }, [x27], #16 -# CHECK-NEXT: [0,9] . D===eER add x0, x27, #1 +# CHECK-NEXT: Index 0123456789 + +# CHECK: [0,0] DeeeeER . ldrsh x1, [x27, #254]! +# CHECK-NEXT: [0,1] D=eE--R . add x0, x27, #1 +# CHECK-NEXT: [0,2] .DeeeeER . ldrsw x1, [x27], #254 +# CHECK-NEXT: [0,3] .D=eE--R . add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeeeER. ldrsw x1, [x27, #254]! +# CHECK-NEXT: [0,5] . D=eE--R. add x0, x27, #1 +# CHECK-NEXT: [0,6] . DeeE-R. st1 { v1.1d }, [x27], #8 +# CHECK-NEXT: [0,7] . DeE-R. add x0, x27, #1 +# CHECK-NEXT: [0,8] . DeeER st1 { v1.2d }, [x27], #16 +# CHECK-NEXT: [0,9] . .DeER add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -3539,41 +3539,42 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ldrsh x1, [x27, #254]! # CHECK-NEXT: 1. 1 2.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ldrsw x1, [x27], #254 -# CHECK-NEXT: 3. 1 3.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ldrsw x1, [x27, #254]! -# CHECK-NEXT: 5. 1 3.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 3.0 0.0 1.0 st1 { v1.1d }, [x27], #8 -# CHECK-NEXT: 7. 1 4.0 0.0 1.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 4.0 0.0 0.0 st1 { v1.2d }, [x27], #16 -# CHECK-NEXT: 9. 1 4.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 2.9 0.1 0.8 +# CHECK-NEXT: 2. 1 1.0 0.0 0.0 ldrsw x1, [x27], #254 +# CHECK-NEXT: 3. 1 2.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 0.0 0.0 ldrsw x1, [x27, #254]! +# CHECK-NEXT: 5. 1 2.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 0.0 1.0 st1 { v1.1d }, [x27], #8 +# CHECK-NEXT: 7. 1 1.0 0.0 1.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 1.0 0.0 st1 { v1.2d }, [x27], #16 +# CHECK-NEXT: 9. 1 1.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.3 0.2 0.8 # CHECK: [52] Code Region - G53 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 504 +# CHECK-NEXT: Total Cycles: 1003 # CHECK-NEXT: Total uOps: 2000 -# CHECK: Dispatch Width: 8 -# CHECK-NEXT: uOps Per Cycle: 3.97 -# CHECK-NEXT: IPC: 1.98 -# CHECK-NEXT: Block RThroughput: 3.3 +# CHECK: Dispatch Width: 3 +# CHECK-NEXT: uOps Per Cycle: 1.99 +# CHECK-NEXT: IPC: 1.00 +# CHECK-NEXT: Block RThroughput: 6.7 # CHECK: Timeline view: -# CHECK-NEXT: Index 012345678 - -# CHECK: [0,0] DeeER. . st1 { v1.2s }, [x27], #8 -# CHECK-NEXT: [0,1] D=eER. . add x0, x27, #1 -# CHECK-NEXT: [0,2] D=eeER . st1 { v1.4h }, [x27], #8 -# CHECK-NEXT: [0,3] D==eER . add x0, x27, #1 -# CHECK-NEXT: [0,4] .D=eeER . st1 { v1.4s }, [x27], #16 -# CHECK-NEXT: [0,5] .D==eER . add x0, x27, #1 -# CHECK-NEXT: [0,6] .D==eeER. st1 { v1.8b }, [x27], #8 -# CHECK-NEXT: [0,7] .D===eER. add x0, x27, #1 -# CHECK-NEXT: [0,8] . D==eeER st1 { v1.8h }, [x27], #16 -# CHECK-NEXT: [0,9] . D===eER add x0, x27, #1 +# CHECK-NEXT: 012 +# CHECK-NEXT: Index 0123456789 + +# CHECK: [0,0] DeeER. . . st1 { v1.2s }, [x27], #8 +# CHECK-NEXT: [0,1] .DeER. . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeER . . st1 { v1.4h }, [x27], #8 +# CHECK-NEXT: [0,3] . DeER . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeER . . st1 { v1.4s }, [x27], #16 +# CHECK-NEXT: [0,5] . DeER . . add x0, x27, #1 +# CHECK-NEXT: [0,6] . .DeeER . st1 { v1.8b }, [x27], #8 +# CHECK-NEXT: [0,7] . . DeER . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . DeeER st1 { v1.8h }, [x27], #16 +# CHECK-NEXT: [0,9] . . DeER add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -3583,42 +3584,43 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.2s }, [x27], #8 -# CHECK-NEXT: 1. 1 2.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st1 { v1.4h }, [x27], #8 -# CHECK-NEXT: 3. 1 3.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 2.0 0.0 0.0 st1 { v1.4s }, [x27], #16 -# CHECK-NEXT: 5. 1 3.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 3.0 0.0 0.0 st1 { v1.8b }, [x27], #8 -# CHECK-NEXT: 7. 1 4.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 3.0 0.0 0.0 st1 { v1.8h }, [x27], #16 -# CHECK-NEXT: 9. 1 4.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 2.7 0.1 0.0 +# CHECK-NEXT: 1. 1 1.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 1.0 1.0 0.0 st1 { v1.4h }, [x27], #8 +# CHECK-NEXT: 3. 1 1.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 1.0 0.0 st1 { v1.4s }, [x27], #16 +# CHECK-NEXT: 5. 1 1.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 1.0 0.0 st1 { v1.8b }, [x27], #8 +# CHECK-NEXT: 7. 1 1.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 1.0 0.0 st1 { v1.8h }, [x27], #16 +# CHECK-NEXT: 9. 1 1.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.0 0.5 0.0 # CHECK: [53] Code Region - G54 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 504 +# CHECK-NEXT: Total Cycles: 1003 # CHECK-NEXT: Total uOps: 2000 -# CHECK: Dispatch Width: 8 -# CHECK-NEXT: uOps Per Cycle: 3.97 -# CHECK-NEXT: IPC: 1.98 -# CHECK-NEXT: Block RThroughput: 3.3 +# CHECK: Dispatch Width: 3 +# CHECK-NEXT: uOps Per Cycle: 1.99 +# CHECK-NEXT: IPC: 1.00 +# CHECK-NEXT: Block RThroughput: 6.7 # CHECK: Timeline view: -# CHECK-NEXT: Index 012345678 - -# CHECK: [0,0] DeeER. . st1 { v1.16b }, [x27], #16 -# CHECK-NEXT: [0,1] D=eER. . add x0, x27, #1 -# CHECK-NEXT: [0,2] D=eeER . st1 { v1.1d }, [x27], x28 -# CHECK-NEXT: [0,3] D==eER . add x0, x27, #1 -# CHECK-NEXT: [0,4] .D=eeER . st1 { v1.2d }, [x27], x28 -# CHECK-NEXT: [0,5] .D==eER . add x0, x27, #1 -# CHECK-NEXT: [0,6] .D==eeER. st1 { v1.2s }, [x27], x28 -# CHECK-NEXT: [0,7] .D===eER. add x0, x27, #1 -# CHECK-NEXT: [0,8] . D==eeER st1 { v1.4h }, [x27], x28 -# CHECK-NEXT: [0,9] . D===eER add x0, x27, #1 +# CHECK-NEXT: 012 +# CHECK-NEXT: Index 0123456789 + +# CHECK: [0,0] DeeER. . . st1 { v1.16b }, [x27], #16 +# CHECK-NEXT: [0,1] .DeER. . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeER . . st1 { v1.1d }, [x27], x28 +# CHECK-NEXT: [0,3] . DeER . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeER . . st1 { v1.2d }, [x27], x28 +# CHECK-NEXT: [0,5] . DeER . . add x0, x27, #1 +# CHECK-NEXT: [0,6] . .DeeER . st1 { v1.2s }, [x27], x28 +# CHECK-NEXT: [0,7] . . DeER . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . DeeER st1 { v1.4h }, [x27], x28 +# CHECK-NEXT: [0,9] . . DeER add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -3628,42 +3630,43 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.16b }, [x27], #16 -# CHECK-NEXT: 1. 1 2.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st1 { v1.1d }, [x27], x28 -# CHECK-NEXT: 3. 1 3.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 2.0 0.0 0.0 st1 { v1.2d }, [x27], x28 -# CHECK-NEXT: 5. 1 3.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 3.0 0.0 0.0 st1 { v1.2s }, [x27], x28 -# CHECK-NEXT: 7. 1 4.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 3.0 0.0 0.0 st1 { v1.4h }, [x27], x28 -# CHECK-NEXT: 9. 1 4.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 2.7 0.1 0.0 +# CHECK-NEXT: 1. 1 1.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 1.0 1.0 0.0 st1 { v1.1d }, [x27], x28 +# CHECK-NEXT: 3. 1 1.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 1.0 0.0 st1 { v1.2d }, [x27], x28 +# CHECK-NEXT: 5. 1 1.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 1.0 0.0 st1 { v1.2s }, [x27], x28 +# CHECK-NEXT: 7. 1 1.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 1.0 0.0 st1 { v1.4h }, [x27], x28 +# CHECK-NEXT: 9. 1 1.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.0 0.5 0.0 # CHECK: [54] Code Region - G55 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 504 +# CHECK-NEXT: Total Cycles: 1003 # CHECK-NEXT: Total uOps: 2100 -# CHECK: Dispatch Width: 8 -# CHECK-NEXT: uOps Per Cycle: 4.17 -# CHECK-NEXT: IPC: 1.98 -# CHECK-NEXT: Block RThroughput: 3.3 +# CHECK: Dispatch Width: 3 +# CHECK-NEXT: uOps Per Cycle: 2.09 +# CHECK-NEXT: IPC: 1.00 +# CHECK-NEXT: Block RThroughput: 7.0 # CHECK: Timeline view: -# CHECK-NEXT: Index 012345678 - -# CHECK: [0,0] DeeER. . st1 { v1.4s }, [x27], x28 -# CHECK-NEXT: [0,1] D=eER. . add x0, x27, #1 -# CHECK-NEXT: [0,2] D=eeER . st1 { v1.8b }, [x27], x28 -# CHECK-NEXT: [0,3] D==eER . add x0, x27, #1 -# CHECK-NEXT: [0,4] .D=eeER . st1 { v1.8h }, [x27], x28 -# CHECK-NEXT: [0,5] .D==eER . add x0, x27, #1 -# CHECK-NEXT: [0,6] .D==eeER. st1 { v1.16b }, [x27], x28 -# CHECK-NEXT: [0,7] .D===eER. add x0, x27, #1 -# CHECK-NEXT: [0,8] . D==eeER st1 { v1.1d, v2.1d }, [x27], #16 -# CHECK-NEXT: [0,9] . D===eER add x0, x27, #1 +# CHECK-NEXT: 012 +# CHECK-NEXT: Index 0123456789 + +# CHECK: [0,0] DeeER. . . st1 { v1.4s }, [x27], x28 +# CHECK-NEXT: [0,1] .DeER. . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeER . . st1 { v1.8b }, [x27], x28 +# CHECK-NEXT: [0,3] . DeER . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeER . . st1 { v1.8h }, [x27], x28 +# CHECK-NEXT: [0,5] . DeER . . add x0, x27, #1 +# CHECK-NEXT: [0,6] . .DeeER . st1 { v1.16b }, [x27], x28 +# CHECK-NEXT: [0,7] . . DeER . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . DeeER st1 { v1.1d, v2.1d }, [x27], #16 +# CHECK-NEXT: [0,9] . . DeER add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -3673,42 +3676,43 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.4s }, [x27], x28 -# CHECK-NEXT: 1. 1 2.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st1 { v1.8b }, [x27], x28 -# CHECK-NEXT: 3. 1 3.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 2.0 0.0 0.0 st1 { v1.8h }, [x27], x28 -# CHECK-NEXT: 5. 1 3.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 3.0 0.0 0.0 st1 { v1.16b }, [x27], x28 -# CHECK-NEXT: 7. 1 4.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 3.0 0.0 0.0 st1 { v1.1d, v2.1d }, [x27], #16 -# CHECK-NEXT: 9. 1 4.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 2.7 0.1 0.0 +# CHECK-NEXT: 1. 1 1.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 1.0 1.0 0.0 st1 { v1.8b }, [x27], x28 +# CHECK-NEXT: 3. 1 1.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 1.0 0.0 st1 { v1.8h }, [x27], x28 +# CHECK-NEXT: 5. 1 1.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 1.0 0.0 st1 { v1.16b }, [x27], x28 +# CHECK-NEXT: 7. 1 1.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 1.0 0.0 st1 { v1.1d, v2.1d }, [x27], #16 +# CHECK-NEXT: 9. 1 1.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.0 0.5 0.0 # CHECK: [55] Code Region - G56 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 504 +# CHECK-NEXT: Total Cycles: 1003 # CHECK-NEXT: Total uOps: 2700 -# CHECK: Dispatch Width: 8 -# CHECK-NEXT: uOps Per Cycle: 5.36 -# CHECK-NEXT: IPC: 1.98 -# CHECK-NEXT: Block RThroughput: 5.0 +# CHECK: Dispatch Width: 3 +# CHECK-NEXT: uOps Per Cycle: 2.69 +# CHECK-NEXT: IPC: 1.00 +# CHECK-NEXT: Block RThroughput: 9.0 # CHECK: Timeline view: -# CHECK-NEXT: Index 012345678 - -# CHECK: [0,0] DeeER. . st1 { v1.2d, v2.2d }, [x27], #32 -# CHECK-NEXT: [0,1] D=eER. . add x0, x27, #1 -# CHECK-NEXT: [0,2] .DeeER . st1 { v1.2s, v2.2s }, [x27], #16 -# CHECK-NEXT: [0,3] .D=eER . add x0, x27, #1 -# CHECK-NEXT: [0,4] . DeeER . st1 { v1.4h, v2.4h }, [x27], #16 -# CHECK-NEXT: [0,5] . D=eER . add x0, x27, #1 -# CHECK-NEXT: [0,6] . DeeER. st1 { v1.4s, v2.4s }, [x27], #32 -# CHECK-NEXT: [0,7] . D=eER. add x0, x27, #1 -# CHECK-NEXT: [0,8] . DeeER st1 { v1.8b, v2.8b }, [x27], #16 -# CHECK-NEXT: [0,9] . D=eER add x0, x27, #1 +# CHECK-NEXT: 012 +# CHECK-NEXT: Index 0123456789 + +# CHECK: [0,0] DeeER. . . st1 { v1.2d, v2.2d }, [x27], #32 +# CHECK-NEXT: [0,1] .DeER. . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeER . . st1 { v1.2s, v2.2s }, [x27], #16 +# CHECK-NEXT: [0,3] . DeER . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeER . . st1 { v1.4h, v2.4h }, [x27], #16 +# CHECK-NEXT: [0,5] . DeER . . add x0, x27, #1 +# CHECK-NEXT: [0,6] . .DeeER . st1 { v1.4s, v2.4s }, [x27], #32 +# CHECK-NEXT: [0,7] . . DeER . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . DeeER st1 { v1.8b, v2.8b }, [x27], #16 +# CHECK-NEXT: [0,9] . . DeER add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -3718,42 +3722,43 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.2d, v2.2d }, [x27], #32 -# CHECK-NEXT: 1. 1 2.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 1.0 0.0 0.0 st1 { v1.2s, v2.2s }, [x27], #16 -# CHECK-NEXT: 3. 1 2.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 1.0 0.0 0.0 st1 { v1.4h, v2.4h }, [x27], #16 -# CHECK-NEXT: 5. 1 2.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 1.0 0.0 0.0 st1 { v1.4s, v2.4s }, [x27], #32 -# CHECK-NEXT: 7. 1 2.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 1.0 0.0 0.0 st1 { v1.8b, v2.8b }, [x27], #16 -# CHECK-NEXT: 9. 1 2.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 1.5 0.1 0.0 +# CHECK-NEXT: 1. 1 1.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 1.0 1.0 0.0 st1 { v1.2s, v2.2s }, [x27], #16 +# CHECK-NEXT: 3. 1 1.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 1.0 0.0 st1 { v1.4h, v2.4h }, [x27], #16 +# CHECK-NEXT: 5. 1 1.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 1.0 0.0 st1 { v1.4s, v2.4s }, [x27], #32 +# CHECK-NEXT: 7. 1 1.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 1.0 0.0 st1 { v1.8b, v2.8b }, [x27], #16 +# CHECK-NEXT: 9. 1 1.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.0 0.5 0.0 # CHECK: [56] Code Region - G57 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 504 +# CHECK-NEXT: Total Cycles: 1003 # CHECK-NEXT: Total uOps: 2800 -# CHECK: Dispatch Width: 8 -# CHECK-NEXT: uOps Per Cycle: 5.56 -# CHECK-NEXT: IPC: 1.98 -# CHECK-NEXT: Block RThroughput: 5.0 +# CHECK: Dispatch Width: 3 +# CHECK-NEXT: uOps Per Cycle: 2.79 +# CHECK-NEXT: IPC: 1.00 +# CHECK-NEXT: Block RThroughput: 9.3 # CHECK: Timeline view: -# CHECK-NEXT: Index 012345678 - -# CHECK: [0,0] DeeER. . st1 { v1.8h, v2.8h }, [x27], #32 -# CHECK-NEXT: [0,1] D=eER. . add x0, x27, #1 -# CHECK-NEXT: [0,2] .DeeER . st1 { v1.16b, v2.16b }, [x27], #32 -# CHECK-NEXT: [0,3] .D=eER . add x0, x27, #1 -# CHECK-NEXT: [0,4] . DeeER . st1 { v1.1d, v2.1d }, [x27], x28 -# CHECK-NEXT: [0,5] . D=eER . add x0, x27, #1 -# CHECK-NEXT: [0,6] . DeeER. st1 { v1.2d, v2.2d }, [x27], x28 -# CHECK-NEXT: [0,7] . D=eER. add x0, x27, #1 -# CHECK-NEXT: [0,8] . DeeER st1 { v1.2s, v2.2s }, [x27], x28 -# CHECK-NEXT: [0,9] . D=eER add x0, x27, #1 +# CHECK-NEXT: 012 +# CHECK-NEXT: Index 0123456789 + +# CHECK: [0,0] DeeER. . . st1 { v1.8h, v2.8h }, [x27], #32 +# CHECK-NEXT: [0,1] .DeER. . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeER . . st1 { v1.16b, v2.16b }, [x27], #32 +# CHECK-NEXT: [0,3] . DeER . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeER . . st1 { v1.1d, v2.1d }, [x27], x28 +# CHECK-NEXT: [0,5] . DeER . . add x0, x27, #1 +# CHECK-NEXT: [0,6] . .DeeER . st1 { v1.2d, v2.2d }, [x27], x28 +# CHECK-NEXT: [0,7] . . DeER . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . DeeER st1 { v1.2s, v2.2s }, [x27], x28 +# CHECK-NEXT: [0,9] . . DeER add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -3763,42 +3768,43 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.8h, v2.8h }, [x27], #32 -# CHECK-NEXT: 1. 1 2.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 1.0 0.0 0.0 st1 { v1.16b, v2.16b }, [x27], #32 -# CHECK-NEXT: 3. 1 2.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 1.0 0.0 0.0 st1 { v1.1d, v2.1d }, [x27], x28 -# CHECK-NEXT: 5. 1 2.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 1.0 0.0 0.0 st1 { v1.2d, v2.2d }, [x27], x28 -# CHECK-NEXT: 7. 1 2.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 1.0 0.0 0.0 st1 { v1.2s, v2.2s }, [x27], x28 -# CHECK-NEXT: 9. 1 2.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 1.5 0.1 0.0 +# CHECK-NEXT: 1. 1 1.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 1.0 1.0 0.0 st1 { v1.16b, v2.16b }, [x27], #32 +# CHECK-NEXT: 3. 1 1.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 1.0 0.0 st1 { v1.1d, v2.1d }, [x27], x28 +# CHECK-NEXT: 5. 1 1.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 1.0 0.0 st1 { v1.2d, v2.2d }, [x27], x28 +# CHECK-NEXT: 7. 1 1.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 1.0 0.0 st1 { v1.2s, v2.2s }, [x27], x28 +# CHECK-NEXT: 9. 1 1.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.0 0.5 0.0 # CHECK: [57] Code Region - G58 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 504 +# CHECK-NEXT: Total Cycles: 1003 # CHECK-NEXT: Total uOps: 2800 -# CHECK: Dispatch Width: 8 -# CHECK-NEXT: uOps Per Cycle: 5.56 -# CHECK-NEXT: IPC: 1.98 -# CHECK-NEXT: Block RThroughput: 5.0 +# CHECK: Dispatch Width: 3 +# CHECK-NEXT: uOps Per Cycle: 2.79 +# CHECK-NEXT: IPC: 1.00 +# CHECK-NEXT: Block RThroughput: 9.3 # CHECK: Timeline view: -# CHECK-NEXT: Index 012345678 - -# CHECK: [0,0] DeeER. . st1 { v1.4h, v2.4h }, [x27], x28 -# CHECK-NEXT: [0,1] D=eER. . add x0, x27, #1 -# CHECK-NEXT: [0,2] .DeeER . st1 { v1.4s, v2.4s }, [x27], x28 -# CHECK-NEXT: [0,3] .D=eER . add x0, x27, #1 -# CHECK-NEXT: [0,4] . DeeER . st1 { v1.8b, v2.8b }, [x27], x28 -# CHECK-NEXT: [0,5] . D=eER . add x0, x27, #1 -# CHECK-NEXT: [0,6] . DeeER. st1 { v1.8h, v2.8h }, [x27], x28 -# CHECK-NEXT: [0,7] . D=eER. add x0, x27, #1 -# CHECK-NEXT: [0,8] . DeeER st1 { v1.16b, v2.16b }, [x27], x28 -# CHECK-NEXT: [0,9] . D=eER add x0, x27, #1 +# CHECK-NEXT: 012 +# CHECK-NEXT: Index 0123456789 + +# CHECK: [0,0] DeeER. . . st1 { v1.4h, v2.4h }, [x27], x28 +# CHECK-NEXT: [0,1] .DeER. . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeER . . st1 { v1.4s, v2.4s }, [x27], x28 +# CHECK-NEXT: [0,3] . DeER . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeER . . st1 { v1.8b, v2.8b }, [x27], x28 +# CHECK-NEXT: [0,5] . DeER . . add x0, x27, #1 +# CHECK-NEXT: [0,6] . .DeeER . st1 { v1.8h, v2.8h }, [x27], x28 +# CHECK-NEXT: [0,7] . . DeER . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . DeeER st1 { v1.16b, v2.16b }, [x27], x28 +# CHECK-NEXT: [0,9] . . DeER add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -3808,43 +3814,43 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.4h, v2.4h }, [x27], x28 -# CHECK-NEXT: 1. 1 2.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 1.0 0.0 0.0 st1 { v1.4s, v2.4s }, [x27], x28 -# CHECK-NEXT: 3. 1 2.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 1.0 0.0 0.0 st1 { v1.8b, v2.8b }, [x27], x28 -# CHECK-NEXT: 5. 1 2.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 1.0 0.0 0.0 st1 { v1.8h, v2.8h }, [x27], x28 -# CHECK-NEXT: 7. 1 2.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 1.0 0.0 0.0 st1 { v1.16b, v2.16b }, [x27], x28 -# CHECK-NEXT: 9. 1 2.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 1.5 0.1 0.0 +# CHECK-NEXT: 1. 1 1.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 1.0 1.0 0.0 st1 { v1.4s, v2.4s }, [x27], x28 +# CHECK-NEXT: 3. 1 1.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 1.0 0.0 st1 { v1.8b, v2.8b }, [x27], x28 +# CHECK-NEXT: 5. 1 1.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 1.0 0.0 st1 { v1.8h, v2.8h }, [x27], x28 +# CHECK-NEXT: 7. 1 1.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 1.0 0.0 st1 { v1.16b, v2.16b }, [x27], x28 +# CHECK-NEXT: 9. 1 1.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.0 0.5 0.0 # CHECK: [58] Code Region - G59 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 755 +# CHECK-NEXT: Total Cycles: 1504 # CHECK-NEXT: Total uOps: 3700 -# CHECK: Dispatch Width: 8 -# CHECK-NEXT: uOps Per Cycle: 4.90 -# CHECK-NEXT: IPC: 1.32 -# CHECK-NEXT: Block RThroughput: 7.5 +# CHECK: Dispatch Width: 3 +# CHECK-NEXT: uOps Per Cycle: 2.46 +# CHECK-NEXT: IPC: 0.66 +# CHECK-NEXT: Block RThroughput: 12.3 # CHECK: Timeline view: -# CHECK-NEXT: 012 +# CHECK-NEXT: 012345678 # CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeER . . st1 { v1.1d, v2.1d, v3.1d }, [x27], #24 -# CHECK-NEXT: [0,1] D=eE-R . . add x0, x27, #1 -# CHECK-NEXT: [0,2] .DeeeeER . . st1 { v1.2d, v2.2d, v3.2d }, [x27], #48 -# CHECK-NEXT: [0,3] .D=eE--R . . add x0, x27, #1 -# CHECK-NEXT: [0,4] . D=eeeER . . st1 { v1.2s, v2.2s, v3.2s }, [x27], #24 -# CHECK-NEXT: [0,5] . D==eE-R . . add x0, x27, #1 -# CHECK-NEXT: [0,6] . D=eeeER. . st1 { v1.4h, v2.4h, v3.4h }, [x27], #24 -# CHECK-NEXT: [0,7] . D==eE-R. . add x0, x27, #1 -# CHECK-NEXT: [0,8] . D==eeeeER st1 { v1.4s, v2.4s, v3.4s }, [x27], #48 -# CHECK-NEXT: [0,9] . D===eE--R add x0, x27, #1 +# CHECK: [0,0] DeeeER . . . st1 { v1.1d, v2.1d, v3.1d }, [x27], #24 +# CHECK-NEXT: [0,1] . DeER . . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeeeER. . . st1 { v1.2d, v2.2d, v3.2d }, [x27], #48 +# CHECK-NEXT: [0,3] . DeE-R. . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . .DeeeER . . st1 { v1.2s, v2.2s, v3.2s }, [x27], #24 +# CHECK-NEXT: [0,5] . . DeER . . add x0, x27, #1 +# CHECK-NEXT: [0,6] . . DeeeER. . st1 { v1.4h, v2.4h, v3.4h }, [x27], #24 +# CHECK-NEXT: [0,7] . . .DeER. . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . . DeeeeER st1 { v1.4s, v2.4s, v3.4s }, [x27], #48 +# CHECK-NEXT: [0,9] . . . DeE-R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -3854,43 +3860,43 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.1d, v2.1d, v3.1d }, [x27], #24 -# CHECK-NEXT: 1. 1 2.0 0.0 1.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 1.0 0.0 0.0 st1 { v1.2d, v2.2d, v3.2d }, [x27], #48 -# CHECK-NEXT: 3. 1 2.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 2.0 1.0 0.0 st1 { v1.2s, v2.2s, v3.2s }, [x27], #24 -# CHECK-NEXT: 5. 1 3.0 0.0 1.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 2.0 0.0 0.0 st1 { v1.4h, v2.4h, v3.4h }, [x27], #24 -# CHECK-NEXT: 7. 1 3.0 0.0 1.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 3.0 1.0 0.0 st1 { v1.4s, v2.4s, v3.4s }, [x27], #48 -# CHECK-NEXT: 9. 1 4.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 1 2.3 0.3 0.7 +# CHECK-NEXT: 1. 1 1.0 1.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 1.0 1.0 0.0 st1 { v1.2d, v2.2d, v3.2d }, [x27], #48 +# CHECK-NEXT: 3. 1 1.0 1.0 1.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 1.0 0.0 st1 { v1.2s, v2.2s, v3.2s }, [x27], #24 +# CHECK-NEXT: 5. 1 1.0 1.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 1.0 0.0 st1 { v1.4h, v2.4h, v3.4h }, [x27], #24 +# CHECK-NEXT: 7. 1 1.0 1.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 1.0 0.0 st1 { v1.4s, v2.4s, v3.4s }, [x27], #48 +# CHECK-NEXT: 9. 1 1.0 1.0 1.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.0 1.0 0.2 # CHECK: [59] Code Region - G60 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 755 +# CHECK-NEXT: Total Cycles: 1504 # CHECK-NEXT: Total uOps: 3800 -# CHECK: Dispatch Width: 8 -# CHECK-NEXT: uOps Per Cycle: 5.03 -# CHECK-NEXT: IPC: 1.32 -# CHECK-NEXT: Block RThroughput: 7.5 +# CHECK: Dispatch Width: 3 +# CHECK-NEXT: uOps Per Cycle: 2.53 +# CHECK-NEXT: IPC: 0.66 +# CHECK-NEXT: Block RThroughput: 12.7 # CHECK: Timeline view: -# CHECK-NEXT: 012 +# CHECK-NEXT: 012345678 # CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeER . . st1 { v1.8b, v2.8b, v3.8b }, [x27], #24 -# CHECK-NEXT: [0,1] D=eE-R . . add x0, x27, #1 -# CHECK-NEXT: [0,2] .DeeeeER . . st1 { v1.8h, v2.8h, v3.8h }, [x27], #48 -# CHECK-NEXT: [0,3] .D=eE--R . . add x0, x27, #1 -# CHECK-NEXT: [0,4] . D=eeeeER. . st1 { v1.16b, v2.16b, v3.16b }, [x27], #48 -# CHECK-NEXT: [0,5] . D==eE--R. . add x0, x27, #1 -# CHECK-NEXT: [0,6] . D=eeeER. . st1 { v1.1d, v2.1d, v3.1d }, [x27], x28 -# CHECK-NEXT: [0,7] . D==eE-R. . add x0, x27, #1 -# CHECK-NEXT: [0,8] . D==eeeeER st1 { v1.2d, v2.2d, v3.2d }, [x27], x28 -# CHECK-NEXT: [0,9] . D===eE--R add x0, x27, #1 +# CHECK: [0,0] DeeeER . . . st1 { v1.8b, v2.8b, v3.8b }, [x27], #24 +# CHECK-NEXT: [0,1] . DeER . . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeeeER. . . st1 { v1.8h, v2.8h, v3.8h }, [x27], #48 +# CHECK-NEXT: [0,3] . DeE-R. . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . .DeeeeER . . st1 { v1.16b, v2.16b, v3.16b }, [x27], #48 +# CHECK-NEXT: [0,5] . . DeE-R . . add x0, x27, #1 +# CHECK-NEXT: [0,6] . . DeeeER. . st1 { v1.1d, v2.1d, v3.1d }, [x27], x28 +# CHECK-NEXT: [0,7] . . .DeER. . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . . DeeeeER st1 { v1.2d, v2.2d, v3.2d }, [x27], x28 +# CHECK-NEXT: [0,9] . . . DeE-R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -3900,43 +3906,43 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.8b, v2.8b, v3.8b }, [x27], #24 -# CHECK-NEXT: 1. 1 2.0 0.0 1.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 1.0 0.0 0.0 st1 { v1.8h, v2.8h, v3.8h }, [x27], #48 -# CHECK-NEXT: 3. 1 2.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 2.0 1.0 0.0 st1 { v1.16b, v2.16b, v3.16b }, [x27], #48 -# CHECK-NEXT: 5. 1 3.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 2.0 0.0 0.0 st1 { v1.1d, v2.1d, v3.1d }, [x27], x28 -# CHECK-NEXT: 7. 1 3.0 0.0 1.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 3.0 1.0 0.0 st1 { v1.2d, v2.2d, v3.2d }, [x27], x28 -# CHECK-NEXT: 9. 1 4.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 1 2.3 0.3 0.8 +# CHECK-NEXT: 1. 1 1.0 1.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 1.0 1.0 0.0 st1 { v1.8h, v2.8h, v3.8h }, [x27], #48 +# CHECK-NEXT: 3. 1 1.0 1.0 1.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 1.0 0.0 st1 { v1.16b, v2.16b, v3.16b }, [x27], #48 +# CHECK-NEXT: 5. 1 1.0 1.0 1.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 1.0 0.0 st1 { v1.1d, v2.1d, v3.1d }, [x27], x28 +# CHECK-NEXT: 7. 1 1.0 1.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 1.0 0.0 st1 { v1.2d, v2.2d, v3.2d }, [x27], x28 +# CHECK-NEXT: 9. 1 1.0 1.0 1.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.0 1.0 0.3 # CHECK: [60] Code Region - G61 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 755 +# CHECK-NEXT: Total Cycles: 1504 # CHECK-NEXT: Total uOps: 3700 -# CHECK: Dispatch Width: 8 -# CHECK-NEXT: uOps Per Cycle: 4.90 -# CHECK-NEXT: IPC: 1.32 -# CHECK-NEXT: Block RThroughput: 7.5 +# CHECK: Dispatch Width: 3 +# CHECK-NEXT: uOps Per Cycle: 2.46 +# CHECK-NEXT: IPC: 0.66 +# CHECK-NEXT: Block RThroughput: 12.3 # CHECK: Timeline view: -# CHECK-NEXT: 012 +# CHECK-NEXT: 012345678 # CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeER . . st1 { v1.2s, v2.2s, v3.2s }, [x27], x28 -# CHECK-NEXT: [0,1] D=eE-R . . add x0, x27, #1 -# CHECK-NEXT: [0,2] .DeeeER . . st1 { v1.4h, v2.4h, v3.4h }, [x27], x28 -# CHECK-NEXT: [0,3] .D=eE-R . . add x0, x27, #1 -# CHECK-NEXT: [0,4] . D=eeeeER. . st1 { v1.4s, v2.4s, v3.4s }, [x27], x28 -# CHECK-NEXT: [0,5] . D==eE--R. . add x0, x27, #1 -# CHECK-NEXT: [0,6] . D=eeeER. . st1 { v1.8b, v2.8b, v3.8b }, [x27], x28 -# CHECK-NEXT: [0,7] . D==eE-R. . add x0, x27, #1 -# CHECK-NEXT: [0,8] . D==eeeeER st1 { v1.8h, v2.8h, v3.8h }, [x27], x28 -# CHECK-NEXT: [0,9] . D===eE--R add x0, x27, #1 +# CHECK: [0,0] DeeeER . . . st1 { v1.2s, v2.2s, v3.2s }, [x27], x28 +# CHECK-NEXT: [0,1] . DeER . . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeeER . . . st1 { v1.4h, v2.4h, v3.4h }, [x27], x28 +# CHECK-NEXT: [0,3] . DeER . . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . .DeeeeER . . st1 { v1.4s, v2.4s, v3.4s }, [x27], x28 +# CHECK-NEXT: [0,5] . . DeE-R . . add x0, x27, #1 +# CHECK-NEXT: [0,6] . . DeeeER. . st1 { v1.8b, v2.8b, v3.8b }, [x27], x28 +# CHECK-NEXT: [0,7] . . .DeER. . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . . DeeeeER st1 { v1.8h, v2.8h, v3.8h }, [x27], x28 +# CHECK-NEXT: [0,9] . . . DeE-R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -3946,43 +3952,43 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.2s, v2.2s, v3.2s }, [x27], x28 -# CHECK-NEXT: 1. 1 2.0 0.0 1.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 1.0 0.0 0.0 st1 { v1.4h, v2.4h, v3.4h }, [x27], x28 -# CHECK-NEXT: 3. 1 2.0 0.0 1.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 2.0 1.0 0.0 st1 { v1.4s, v2.4s, v3.4s }, [x27], x28 -# CHECK-NEXT: 5. 1 3.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 2.0 0.0 0.0 st1 { v1.8b, v2.8b, v3.8b }, [x27], x28 -# CHECK-NEXT: 7. 1 3.0 0.0 1.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 3.0 1.0 0.0 st1 { v1.8h, v2.8h, v3.8h }, [x27], x28 -# CHECK-NEXT: 9. 1 4.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 1 2.3 0.3 0.7 +# CHECK-NEXT: 1. 1 1.0 1.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 1.0 1.0 0.0 st1 { v1.4h, v2.4h, v3.4h }, [x27], x28 +# CHECK-NEXT: 3. 1 1.0 1.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 1.0 0.0 st1 { v1.4s, v2.4s, v3.4s }, [x27], x28 +# CHECK-NEXT: 5. 1 1.0 1.0 1.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 1.0 0.0 st1 { v1.8b, v2.8b, v3.8b }, [x27], x28 +# CHECK-NEXT: 7. 1 1.0 1.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 1.0 0.0 st1 { v1.8h, v2.8h, v3.8h }, [x27], x28 +# CHECK-NEXT: 9. 1 1.0 1.0 1.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.0 1.0 0.2 # CHECK: [61] Code Region - G62 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 704 +# CHECK-NEXT: Total Cycles: 1303 # CHECK-NEXT: Total uOps: 3600 -# CHECK: Dispatch Width: 8 -# CHECK-NEXT: uOps Per Cycle: 5.11 -# CHECK-NEXT: IPC: 1.42 -# CHECK-NEXT: Block RThroughput: 6.5 +# CHECK: Dispatch Width: 3 +# CHECK-NEXT: uOps Per Cycle: 2.76 +# CHECK-NEXT: IPC: 0.77 +# CHECK-NEXT: Block RThroughput: 12.0 # CHECK: Timeline view: -# CHECK-NEXT: 0 +# CHECK-NEXT: 012345 # CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeeER . st1 { v1.16b, v2.16b, v3.16b }, [x27], x28 -# CHECK-NEXT: [0,1] D=eE--R . add x0, x27, #1 -# CHECK-NEXT: [0,2] .DeeE-R . st1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32 -# CHECK-NEXT: [0,3] .D=eE-R . add x0, x27, #1 -# CHECK-NEXT: [0,4] . D=eeeeeER st1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64 -# CHECK-NEXT: [0,5] . D=eE---R add x0, x27, #1 -# CHECK-NEXT: [0,6] . D=eeE--R st1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32 -# CHECK-NEXT: [0,7] . D==eE--R add x0, x27, #1 -# CHECK-NEXT: [0,8] . D==eeER st1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32 -# CHECK-NEXT: [0,9] . D===eER add x0, x27, #1 +# CHECK: [0,0] DeeeeER . . st1 { v1.16b, v2.16b, v3.16b }, [x27], x28 +# CHECK-NEXT: [0,1] . DeE-R . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeER . . st1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32 +# CHECK-NEXT: [0,3] . DeER . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeeeeER . st1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64 +# CHECK-NEXT: [0,5] . . DeE-R . add x0, x27, #1 +# CHECK-NEXT: [0,6] . . DeeER . st1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32 +# CHECK-NEXT: [0,7] . . DeER . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . .DeeER st1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32 +# CHECK-NEXT: [0,9] . . . DeER add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -3992,43 +3998,43 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.16b, v2.16b, v3.16b }, [x27], x28 -# CHECK-NEXT: 1. 1 2.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 1.0 0.0 1.0 st1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32 -# CHECK-NEXT: 3. 1 2.0 0.0 1.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 2.0 1.0 0.0 st1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64 -# CHECK-NEXT: 5. 1 2.0 0.0 3.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 2.0 0.0 2.0 st1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32 -# CHECK-NEXT: 7. 1 3.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 3.0 1.0 0.0 st1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32 -# CHECK-NEXT: 9. 1 4.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 2.2 0.3 1.1 +# CHECK-NEXT: 1. 1 1.0 1.0 1.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 1.0 1.0 0.0 st1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32 +# CHECK-NEXT: 3. 1 1.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 1.0 0.0 st1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64 +# CHECK-NEXT: 5. 1 1.0 1.0 1.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 1.0 0.0 st1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32 +# CHECK-NEXT: 7. 1 1.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 1.0 0.0 st1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32 +# CHECK-NEXT: 9. 1 1.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.0 0.7 0.2 # CHECK: [62] Code Region - G63 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 804 +# CHECK-NEXT: Total Cycles: 1603 # CHECK-NEXT: Total uOps: 4200 -# CHECK: Dispatch Width: 8 -# CHECK-NEXT: uOps Per Cycle: 5.22 -# CHECK-NEXT: IPC: 1.24 -# CHECK-NEXT: Block RThroughput: 8.0 +# CHECK: Dispatch Width: 3 +# CHECK-NEXT: uOps Per Cycle: 2.62 +# CHECK-NEXT: IPC: 0.62 +# CHECK-NEXT: Block RThroughput: 14.0 # CHECK: Timeline view: -# CHECK-NEXT: 01 +# CHECK-NEXT: 012345678 # CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeeeER .. st1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64 -# CHECK-NEXT: [0,1] .DeE---R .. add x0, x27, #1 -# CHECK-NEXT: [0,2] .DeeE--R .. st1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32 -# CHECK-NEXT: [0,3] .D=eE--R .. add x0, x27, #1 -# CHECK-NEXT: [0,4] . D=eeeeeER. st1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64 -# CHECK-NEXT: [0,5] . D=eE---R. add x0, x27, #1 -# CHECK-NEXT: [0,6] . DeeeeeER st1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64 -# CHECK-NEXT: [0,7] . DeE---R add x0, x27, #1 -# CHECK-NEXT: [0,8] . D==eeER st1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28 -# CHECK-NEXT: [0,9] . D===eER add x0, x27, #1 +# CHECK: [0,0] DeeeeeER . . . st1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64 +# CHECK-NEXT: [0,1] . DeE-R . . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeER . . . st1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32 +# CHECK-NEXT: [0,3] . DeER . . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . .DeeeeeER . . st1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64 +# CHECK-NEXT: [0,5] . . DeE-R . . add x0, x27, #1 +# CHECK-NEXT: [0,6] . . DeeeeeER. st1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64 +# CHECK-NEXT: [0,7] . . . DeE-R. add x0, x27, #1 +# CHECK-NEXT: [0,8] . . . DeeER st1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28 +# CHECK-NEXT: [0,9] . . . DeER add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -4038,43 +4044,43 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64 -# CHECK-NEXT: 1. 1 1.0 0.0 3.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 1.0 0.0 2.0 st1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32 -# CHECK-NEXT: 3. 1 2.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 2.0 1.0 0.0 st1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64 -# CHECK-NEXT: 5. 1 2.0 0.0 3.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 1.0 0.0 0.0 st1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64 -# CHECK-NEXT: 7. 1 1.0 0.0 3.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 3.0 2.0 0.0 st1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28 -# CHECK-NEXT: 9. 1 4.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 1.8 0.4 1.3 +# CHECK-NEXT: 1. 1 1.0 1.0 1.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 1.0 1.0 0.0 st1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32 +# CHECK-NEXT: 3. 1 1.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 1.0 0.0 st1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64 +# CHECK-NEXT: 5. 1 1.0 1.0 1.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 1.0 0.0 st1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64 +# CHECK-NEXT: 7. 1 1.0 1.0 1.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 1.0 0.0 st1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28 +# CHECK-NEXT: 9. 1 1.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.0 0.8 0.3 # CHECK: [63] Code Region - G64 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 705 +# CHECK-NEXT: Total Cycles: 1403 # CHECK-NEXT: Total uOps: 3800 -# CHECK: Dispatch Width: 8 -# CHECK-NEXT: uOps Per Cycle: 5.39 -# CHECK-NEXT: IPC: 1.42 -# CHECK-NEXT: Block RThroughput: 7.0 +# CHECK: Dispatch Width: 3 +# CHECK-NEXT: uOps Per Cycle: 2.71 +# CHECK-NEXT: IPC: 0.71 +# CHECK-NEXT: Block RThroughput: 12.7 # CHECK: Timeline view: -# CHECK-NEXT: 01 +# CHECK-NEXT: 0123456 # CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeeeER .. st1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28 -# CHECK-NEXT: [0,1] .DeE---R .. add x0, x27, #1 -# CHECK-NEXT: [0,2] .DeeE--R .. st1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28 -# CHECK-NEXT: [0,3] .D=eE--R .. add x0, x27, #1 -# CHECK-NEXT: [0,4] . D=eeER .. st1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28 -# CHECK-NEXT: [0,5] . D==eER .. add x0, x27, #1 -# CHECK-NEXT: [0,6] . D=eeeeeER st1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28 -# CHECK-NEXT: [0,7] . D=eE---R add x0, x27, #1 -# CHECK-NEXT: [0,8] . D=eeE--R st1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28 -# CHECK-NEXT: [0,9] . D==eE--R add x0, x27, #1 +# CHECK: [0,0] DeeeeeER . .. st1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28 +# CHECK-NEXT: [0,1] . DeE-R . .. add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeER . .. st1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28 +# CHECK-NEXT: [0,3] . DeER . .. add x0, x27, #1 +# CHECK-NEXT: [0,4] . .DeeER .. st1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28 +# CHECK-NEXT: [0,5] . . DeER .. add x0, x27, #1 +# CHECK-NEXT: [0,6] . . DeeeeeER. st1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28 +# CHECK-NEXT: [0,7] . . .DeE-R. add x0, x27, #1 +# CHECK-NEXT: [0,8] . . . DeeER st1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28 +# CHECK-NEXT: [0,9] . . . DeER add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -4084,43 +4090,43 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28 -# CHECK-NEXT: 1. 1 1.0 0.0 3.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 1.0 0.0 2.0 st1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28 -# CHECK-NEXT: 3. 1 2.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 2.0 1.0 0.0 st1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28 -# CHECK-NEXT: 5. 1 3.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 2.0 0.0 0.0 st1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28 -# CHECK-NEXT: 7. 1 2.0 0.0 3.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 2.0 0.0 2.0 st1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28 -# CHECK-NEXT: 9. 1 3.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 1 1.9 0.2 1.4 +# CHECK-NEXT: 1. 1 1.0 1.0 1.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 1.0 1.0 0.0 st1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28 +# CHECK-NEXT: 3. 1 1.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 1.0 0.0 st1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28 +# CHECK-NEXT: 5. 1 1.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 1.0 0.0 st1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28 +# CHECK-NEXT: 7. 1 1.0 1.0 1.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 1.0 0.0 st1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28 +# CHECK-NEXT: 9. 1 1.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.0 0.7 0.2 # CHECK: [64] Code Region - G65 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 706 +# CHECK-NEXT: Total Cycles: 1405 # CHECK-NEXT: Total uOps: 3200 -# CHECK: Dispatch Width: 8 -# CHECK-NEXT: uOps Per Cycle: 4.53 -# CHECK-NEXT: IPC: 1.42 -# CHECK-NEXT: Block RThroughput: 5.5 +# CHECK: Dispatch Width: 3 +# CHECK-NEXT: uOps Per Cycle: 2.28 +# CHECK-NEXT: IPC: 0.71 +# CHECK-NEXT: Block RThroughput: 10.7 # CHECK: Timeline view: -# CHECK-NEXT: 012 +# CHECK-NEXT: 012345678 # CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeeeER . . st1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28 -# CHECK-NEXT: [0,1] .DeE---R . . add x0, x27, #1 -# CHECK-NEXT: [0,2] . DeeeeeER. . st1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28 -# CHECK-NEXT: [0,3] . DeE---R. . add x0, x27, #1 -# CHECK-NEXT: [0,4] . D=eeeeER . st1 { v1.b }[0], [x27], #1 -# CHECK-NEXT: [0,5] . D==eE--R . add x0, x27, #1 -# CHECK-NEXT: [0,6] . D=eeeeER. st1 { v1.b }[8], [x27], #1 -# CHECK-NEXT: [0,7] . D==eE--R. add x0, x27, #1 -# CHECK-NEXT: [0,8] . D==eeeeER st1 { v1.b }[0], [x27], x28 -# CHECK-NEXT: [0,9] . D===eE--R add x0, x27, #1 +# CHECK: [0,0] DeeeeeER . . . st1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28 +# CHECK-NEXT: [0,1] . DeE-R . . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeeeeER . . st1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28 +# CHECK-NEXT: [0,3] . . DeE-R . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . . DeeeeER. . st1 { v1.b }[0], [x27], #1 +# CHECK-NEXT: [0,5] . . DeE--R. . add x0, x27, #1 +# CHECK-NEXT: [0,6] . . DeeeeER . st1 { v1.b }[8], [x27], #1 +# CHECK-NEXT: [0,7] . . .DeE--R . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . . DeeeeER st1 { v1.b }[0], [x27], x28 +# CHECK-NEXT: [0,9] . . . DeE--R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -4130,43 +4136,43 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28 -# CHECK-NEXT: 1. 1 1.0 0.0 3.0 add x0, x27, #1 +# CHECK-NEXT: 1. 1 1.0 1.0 1.0 add x0, x27, #1 # CHECK-NEXT: 2. 1 1.0 1.0 0.0 st1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28 -# CHECK-NEXT: 3. 1 1.0 0.0 3.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 2.0 1.0 0.0 st1 { v1.b }[0], [x27], #1 -# CHECK-NEXT: 5. 1 3.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 2.0 0.0 0.0 st1 { v1.b }[8], [x27], #1 -# CHECK-NEXT: 7. 1 3.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 3.0 0.0 0.0 st1 { v1.b }[0], [x27], x28 -# CHECK-NEXT: 9. 1 4.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 1 2.1 0.3 1.2 +# CHECK-NEXT: 3. 1 1.0 1.0 1.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 1.0 0.0 st1 { v1.b }[0], [x27], #1 +# CHECK-NEXT: 5. 1 1.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 1.0 0.0 st1 { v1.b }[8], [x27], #1 +# CHECK-NEXT: 7. 1 1.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 1.0 0.0 st1 { v1.b }[0], [x27], x28 +# CHECK-NEXT: 9. 1 1.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.0 0.7 0.8 # CHECK: [65] Code Region - G66 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 506 +# CHECK-NEXT: Total Cycles: 1005 # CHECK-NEXT: Total uOps: 2000 -# CHECK: Dispatch Width: 8 -# CHECK-NEXT: uOps Per Cycle: 3.95 -# CHECK-NEXT: IPC: 1.98 -# CHECK-NEXT: Block RThroughput: 3.3 +# CHECK: Dispatch Width: 3 +# CHECK-NEXT: uOps Per Cycle: 1.99 +# CHECK-NEXT: IPC: 1.00 +# CHECK-NEXT: Block RThroughput: 6.7 # CHECK: Timeline view: -# CHECK-NEXT: 0 +# CHECK-NEXT: 01234 # CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeeER . st1 { v1.b }[8], [x27], x28 -# CHECK-NEXT: [0,1] D=eE--R . add x0, x27, #1 -# CHECK-NEXT: [0,2] D=eeeeER . st1 { v1.h }[0], [x27], #2 -# CHECK-NEXT: [0,3] D==eE--R . add x0, x27, #1 -# CHECK-NEXT: [0,4] .D=eeeeER . st1 { v1.h }[4], [x27], #2 -# CHECK-NEXT: [0,5] .D==eE--R . add x0, x27, #1 -# CHECK-NEXT: [0,6] .D==eeeeER. st1 { v1.h }[0], [x27], x28 -# CHECK-NEXT: [0,7] .D===eE--R. add x0, x27, #1 -# CHECK-NEXT: [0,8] . D==eeeeER st1 { v1.h }[4], [x27], x28 -# CHECK-NEXT: [0,9] . D===eE--R add x0, x27, #1 +# CHECK: [0,0] DeeeeER . . st1 { v1.b }[8], [x27], x28 +# CHECK-NEXT: [0,1] .DeE--R . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeeeER . . st1 { v1.h }[0], [x27], #2 +# CHECK-NEXT: [0,3] . DeE--R . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeeeER . st1 { v1.h }[4], [x27], #2 +# CHECK-NEXT: [0,5] . DeE--R . add x0, x27, #1 +# CHECK-NEXT: [0,6] . .DeeeeER . st1 { v1.h }[0], [x27], x28 +# CHECK-NEXT: [0,7] . . DeE--R . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . DeeeeER st1 { v1.h }[4], [x27], x28 +# CHECK-NEXT: [0,9] . . DeE--R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -4176,43 +4182,43 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.b }[8], [x27], x28 -# CHECK-NEXT: 1. 1 2.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st1 { v1.h }[0], [x27], #2 -# CHECK-NEXT: 3. 1 3.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 2.0 0.0 0.0 st1 { v1.h }[4], [x27], #2 -# CHECK-NEXT: 5. 1 3.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 3.0 0.0 0.0 st1 { v1.h }[0], [x27], x28 -# CHECK-NEXT: 7. 1 4.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 3.0 0.0 0.0 st1 { v1.h }[4], [x27], x28 -# CHECK-NEXT: 9. 1 4.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 1 2.7 0.1 1.0 +# CHECK-NEXT: 1. 1 1.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 1.0 1.0 0.0 st1 { v1.h }[0], [x27], #2 +# CHECK-NEXT: 3. 1 1.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 1.0 0.0 st1 { v1.h }[4], [x27], #2 +# CHECK-NEXT: 5. 1 1.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 1.0 0.0 st1 { v1.h }[0], [x27], x28 +# CHECK-NEXT: 7. 1 1.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 1.0 0.0 st1 { v1.h }[4], [x27], x28 +# CHECK-NEXT: 9. 1 1.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.0 0.5 1.0 # CHECK: [66] Code Region - G67 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 507 +# CHECK-NEXT: Total Cycles: 1006 # CHECK-NEXT: Total uOps: 2200 -# CHECK: Dispatch Width: 8 -# CHECK-NEXT: uOps Per Cycle: 4.34 -# CHECK-NEXT: IPC: 1.97 -# CHECK-NEXT: Block RThroughput: 3.3 +# CHECK: Dispatch Width: 3 +# CHECK-NEXT: uOps Per Cycle: 2.19 +# CHECK-NEXT: IPC: 0.99 +# CHECK-NEXT: Block RThroughput: 7.3 # CHECK: Timeline view: -# CHECK-NEXT: 01 +# CHECK-NEXT: 012345 # CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeeER .. st1 { v1.s }[0], [x27], #4 -# CHECK-NEXT: [0,1] D=eE--R .. add x0, x27, #1 -# CHECK-NEXT: [0,2] D=eeeeER .. st1 { v1.s }[0], [x27], x28 -# CHECK-NEXT: [0,3] D==eE--R .. add x0, x27, #1 -# CHECK-NEXT: [0,4] .D=eeeeER .. st1 { v1.d }[0], [x27], #8 -# CHECK-NEXT: [0,5] .D==eE--R .. add x0, x27, #1 -# CHECK-NEXT: [0,6] .D==eeeeER.. st1 { v1.d }[0], [x27], x28 -# CHECK-NEXT: [0,7] .D===eE--R.. add x0, x27, #1 -# CHECK-NEXT: [0,8] . D==eeeeeER st2 { v1.2d, v2.2d }, [x27], #32 -# CHECK-NEXT: [0,9] . D===eE---R add x0, x27, #1 +# CHECK: [0,0] DeeeeER . . st1 { v1.s }[0], [x27], #4 +# CHECK-NEXT: [0,1] .DeE--R . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeeeER . . st1 { v1.s }[0], [x27], x28 +# CHECK-NEXT: [0,3] . DeE--R . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeeeER . st1 { v1.d }[0], [x27], #8 +# CHECK-NEXT: [0,5] . DeE--R . add x0, x27, #1 +# CHECK-NEXT: [0,6] . .DeeeeER . st1 { v1.d }[0], [x27], x28 +# CHECK-NEXT: [0,7] . . DeE--R . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . DeeeeeER st2 { v1.2d, v2.2d }, [x27], #32 +# CHECK-NEXT: [0,9] . . DeE---R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -4222,43 +4228,43 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.s }[0], [x27], #4 -# CHECK-NEXT: 1. 1 2.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st1 { v1.s }[0], [x27], x28 -# CHECK-NEXT: 3. 1 3.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 2.0 0.0 0.0 st1 { v1.d }[0], [x27], #8 -# CHECK-NEXT: 5. 1 3.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 3.0 0.0 0.0 st1 { v1.d }[0], [x27], x28 -# CHECK-NEXT: 7. 1 4.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 3.0 0.0 0.0 st2 { v1.2d, v2.2d }, [x27], #32 -# CHECK-NEXT: 9. 1 4.0 0.0 3.0 add x0, x27, #1 -# CHECK-NEXT: 1 2.7 0.1 1.1 +# CHECK-NEXT: 1. 1 1.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 1.0 1.0 0.0 st1 { v1.s }[0], [x27], x28 +# CHECK-NEXT: 3. 1 1.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 1.0 0.0 st1 { v1.d }[0], [x27], #8 +# CHECK-NEXT: 5. 1 1.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 1.0 0.0 st1 { v1.d }[0], [x27], x28 +# CHECK-NEXT: 7. 1 1.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 1.0 0.0 st2 { v1.2d, v2.2d }, [x27], #32 +# CHECK-NEXT: 9. 1 1.0 0.0 3.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.0 0.5 1.1 # CHECK: [67] Code Region - G68 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 507 +# CHECK-NEXT: Total Cycles: 1006 # CHECK-NEXT: Total uOps: 2400 -# CHECK: Dispatch Width: 8 -# CHECK-NEXT: uOps Per Cycle: 4.73 -# CHECK-NEXT: IPC: 1.97 -# CHECK-NEXT: Block RThroughput: 3.5 +# CHECK: Dispatch Width: 3 +# CHECK-NEXT: uOps Per Cycle: 2.39 +# CHECK-NEXT: IPC: 0.99 +# CHECK-NEXT: Block RThroughput: 8.0 # CHECK: Timeline view: -# CHECK-NEXT: 01 +# CHECK-NEXT: 012345 # CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeeER .. st2 { v1.2s, v2.2s }, [x27], #16 -# CHECK-NEXT: [0,1] D=eE--R .. add x0, x27, #1 -# CHECK-NEXT: [0,2] D=eeeeER .. st2 { v1.4h, v2.4h }, [x27], #16 -# CHECK-NEXT: [0,3] D==eE--R .. add x0, x27, #1 -# CHECK-NEXT: [0,4] .D=eeeeeER.. st2 { v1.4s, v2.4s }, [x27], #32 -# CHECK-NEXT: [0,5] .D==eE---R.. add x0, x27, #1 -# CHECK-NEXT: [0,6] . D=eeeeER.. st2 { v1.8b, v2.8b }, [x27], #16 -# CHECK-NEXT: [0,7] . D==eE--R.. add x0, x27, #1 -# CHECK-NEXT: [0,8] . D=eeeeeER st2 { v1.8h, v2.8h }, [x27], #32 -# CHECK-NEXT: [0,9] . D==eE---R add x0, x27, #1 +# CHECK: [0,0] DeeeeER . . st2 { v1.2s, v2.2s }, [x27], #16 +# CHECK-NEXT: [0,1] .DeE--R . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeeeER . . st2 { v1.4h, v2.4h }, [x27], #16 +# CHECK-NEXT: [0,3] . DeE--R . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeeeeER . st2 { v1.4s, v2.4s }, [x27], #32 +# CHECK-NEXT: [0,5] . DeE---R . add x0, x27, #1 +# CHECK-NEXT: [0,6] . .DeeeeER . st2 { v1.8b, v2.8b }, [x27], #16 +# CHECK-NEXT: [0,7] . . DeE--R . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . DeeeeeER st2 { v1.8h, v2.8h }, [x27], #32 +# CHECK-NEXT: [0,9] . . DeE---R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -4268,43 +4274,43 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st2 { v1.2s, v2.2s }, [x27], #16 -# CHECK-NEXT: 1. 1 2.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st2 { v1.4h, v2.4h }, [x27], #16 -# CHECK-NEXT: 3. 1 3.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 2.0 0.0 0.0 st2 { v1.4s, v2.4s }, [x27], #32 -# CHECK-NEXT: 5. 1 3.0 0.0 3.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 2.0 0.0 0.0 st2 { v1.8b, v2.8b }, [x27], #16 -# CHECK-NEXT: 7. 1 3.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 2.0 0.0 0.0 st2 { v1.8h, v2.8h }, [x27], #32 -# CHECK-NEXT: 9. 1 3.0 0.0 3.0 add x0, x27, #1 -# CHECK-NEXT: 1 2.3 0.1 1.2 +# CHECK-NEXT: 1. 1 1.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 1.0 1.0 0.0 st2 { v1.4h, v2.4h }, [x27], #16 +# CHECK-NEXT: 3. 1 1.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 1.0 0.0 st2 { v1.4s, v2.4s }, [x27], #32 +# CHECK-NEXT: 5. 1 1.0 0.0 3.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 1.0 0.0 st2 { v1.8b, v2.8b }, [x27], #16 +# CHECK-NEXT: 7. 1 1.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 1.0 0.0 st2 { v1.8h, v2.8h }, [x27], #32 +# CHECK-NEXT: 9. 1 1.0 0.0 3.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.0 0.5 1.2 # CHECK: [68] Code Region - G69 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 507 +# CHECK-NEXT: Total Cycles: 1006 # CHECK-NEXT: Total uOps: 2600 -# CHECK: Dispatch Width: 8 -# CHECK-NEXT: uOps Per Cycle: 5.13 -# CHECK-NEXT: IPC: 1.97 -# CHECK-NEXT: Block RThroughput: 4.0 +# CHECK: Dispatch Width: 3 +# CHECK-NEXT: uOps Per Cycle: 2.58 +# CHECK-NEXT: IPC: 0.99 +# CHECK-NEXT: Block RThroughput: 8.7 # CHECK: Timeline view: -# CHECK-NEXT: 01 +# CHECK-NEXT: 012345 # CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeeeER .. st2 { v1.16b, v2.16b }, [x27], #32 -# CHECK-NEXT: [0,1] D=eE---R .. add x0, x27, #1 -# CHECK-NEXT: [0,2] .DeeeeeER .. st2 { v1.2d, v2.2d }, [x27], x28 -# CHECK-NEXT: [0,3] .D=eE---R .. add x0, x27, #1 -# CHECK-NEXT: [0,4] . DeeeeER .. st2 { v1.2s, v2.2s }, [x27], x28 -# CHECK-NEXT: [0,5] . D=eE--R .. add x0, x27, #1 -# CHECK-NEXT: [0,6] . D=eeeeER.. st2 { v1.4h, v2.4h }, [x27], x28 -# CHECK-NEXT: [0,7] . D==eE--R.. add x0, x27, #1 -# CHECK-NEXT: [0,8] . D=eeeeeER st2 { v1.4s, v2.4s }, [x27], x28 -# CHECK-NEXT: [0,9] . D==eE---R add x0, x27, #1 +# CHECK: [0,0] DeeeeeER . . st2 { v1.16b, v2.16b }, [x27], #32 +# CHECK-NEXT: [0,1] .DeE---R . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeeeeER. . st2 { v1.2d, v2.2d }, [x27], x28 +# CHECK-NEXT: [0,3] . DeE---R. . add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeeeER . st2 { v1.2s, v2.2s }, [x27], x28 +# CHECK-NEXT: [0,5] . DeE--R . add x0, x27, #1 +# CHECK-NEXT: [0,6] . .DeeeeER . st2 { v1.4h, v2.4h }, [x27], x28 +# CHECK-NEXT: [0,7] . . DeE--R . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . DeeeeeER st2 { v1.4s, v2.4s }, [x27], x28 +# CHECK-NEXT: [0,9] . . DeE---R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -4314,43 +4320,43 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st2 { v1.16b, v2.16b }, [x27], #32 -# CHECK-NEXT: 1. 1 2.0 0.0 3.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 1.0 0.0 0.0 st2 { v1.2d, v2.2d }, [x27], x28 -# CHECK-NEXT: 3. 1 2.0 0.0 3.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 1.0 0.0 0.0 st2 { v1.2s, v2.2s }, [x27], x28 -# CHECK-NEXT: 5. 1 2.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 2.0 0.0 0.0 st2 { v1.4h, v2.4h }, [x27], x28 -# CHECK-NEXT: 7. 1 3.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 2.0 0.0 0.0 st2 { v1.4s, v2.4s }, [x27], x28 -# CHECK-NEXT: 9. 1 3.0 0.0 3.0 add x0, x27, #1 -# CHECK-NEXT: 1 1.9 0.1 1.3 +# CHECK-NEXT: 1. 1 1.0 0.0 3.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 1.0 1.0 0.0 st2 { v1.2d, v2.2d }, [x27], x28 +# CHECK-NEXT: 3. 1 1.0 0.0 3.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 1.0 0.0 st2 { v1.2s, v2.2s }, [x27], x28 +# CHECK-NEXT: 5. 1 1.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 1.0 0.0 st2 { v1.4h, v2.4h }, [x27], x28 +# CHECK-NEXT: 7. 1 1.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 1.0 0.0 st2 { v1.4s, v2.4s }, [x27], x28 +# CHECK-NEXT: 9. 1 1.0 0.0 3.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.0 0.5 1.3 # CHECK: [69] Code Region - G70 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 506 +# CHECK-NEXT: Total Cycles: 1005 # CHECK-NEXT: Total uOps: 2400 -# CHECK: Dispatch Width: 8 -# CHECK-NEXT: uOps Per Cycle: 4.74 -# CHECK-NEXT: IPC: 1.98 -# CHECK-NEXT: Block RThroughput: 3.5 +# CHECK: Dispatch Width: 3 +# CHECK-NEXT: uOps Per Cycle: 2.39 +# CHECK-NEXT: IPC: 1.00 +# CHECK-NEXT: Block RThroughput: 8.0 # CHECK: Timeline view: -# CHECK-NEXT: 0 +# CHECK-NEXT: 01234 # CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeeER . st2 { v1.8b, v2.8b }, [x27], x28 -# CHECK-NEXT: [0,1] D=eE--R . add x0, x27, #1 -# CHECK-NEXT: [0,2] .DeeeeeER . st2 { v1.8h, v2.8h }, [x27], x28 -# CHECK-NEXT: [0,3] .D=eE---R . add x0, x27, #1 -# CHECK-NEXT: [0,4] . DeeeeeER. st2 { v1.16b, v2.16b }, [x27], x28 -# CHECK-NEXT: [0,5] . D=eE---R. add x0, x27, #1 -# CHECK-NEXT: [0,6] . DeeeeER. st2 { v1.b, v2.b }[0], [x27], #2 -# CHECK-NEXT: [0,7] . D=eE--R. add x0, x27, #1 -# CHECK-NEXT: [0,8] . D=eeeeER st2 { v1.b, v2.b }[8], [x27], #2 -# CHECK-NEXT: [0,9] . D==eE--R add x0, x27, #1 +# CHECK: [0,0] DeeeeER . . st2 { v1.8b, v2.8b }, [x27], x28 +# CHECK-NEXT: [0,1] .DeE--R . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeeeeER. . st2 { v1.8h, v2.8h }, [x27], x28 +# CHECK-NEXT: [0,3] . DeE---R. . add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeeeeER . st2 { v1.16b, v2.16b }, [x27], x28 +# CHECK-NEXT: [0,5] . DeE---R . add x0, x27, #1 +# CHECK-NEXT: [0,6] . .DeeeeER . st2 { v1.b, v2.b }[0], [x27], #2 +# CHECK-NEXT: [0,7] . . DeE--R . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . DeeeeER st2 { v1.b, v2.b }[8], [x27], #2 +# CHECK-NEXT: [0,9] . . DeE--R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -4360,43 +4366,43 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st2 { v1.8b, v2.8b }, [x27], x28 -# CHECK-NEXT: 1. 1 2.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 1.0 0.0 0.0 st2 { v1.8h, v2.8h }, [x27], x28 -# CHECK-NEXT: 3. 1 2.0 0.0 3.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 1.0 0.0 0.0 st2 { v1.16b, v2.16b }, [x27], x28 -# CHECK-NEXT: 5. 1 2.0 0.0 3.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 1.0 0.0 0.0 st2 { v1.b, v2.b }[0], [x27], #2 -# CHECK-NEXT: 7. 1 2.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 2.0 0.0 0.0 st2 { v1.b, v2.b }[8], [x27], #2 -# CHECK-NEXT: 9. 1 3.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 1 1.7 0.1 1.2 +# CHECK-NEXT: 1. 1 1.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 1.0 1.0 0.0 st2 { v1.8h, v2.8h }, [x27], x28 +# CHECK-NEXT: 3. 1 1.0 0.0 3.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 1.0 0.0 st2 { v1.16b, v2.16b }, [x27], x28 +# CHECK-NEXT: 5. 1 1.0 0.0 3.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 1.0 0.0 st2 { v1.b, v2.b }[0], [x27], #2 +# CHECK-NEXT: 7. 1 1.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 1.0 0.0 st2 { v1.b, v2.b }[8], [x27], #2 +# CHECK-NEXT: 9. 1 1.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.0 0.5 1.2 # CHECK: [70] Code Region - G71 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 506 +# CHECK-NEXT: Total Cycles: 1005 # CHECK-NEXT: Total uOps: 2000 -# CHECK: Dispatch Width: 8 -# CHECK-NEXT: uOps Per Cycle: 3.95 -# CHECK-NEXT: IPC: 1.98 -# CHECK-NEXT: Block RThroughput: 3.3 +# CHECK: Dispatch Width: 3 +# CHECK-NEXT: uOps Per Cycle: 1.99 +# CHECK-NEXT: IPC: 1.00 +# CHECK-NEXT: Block RThroughput: 6.7 # CHECK: Timeline view: -# CHECK-NEXT: 0 +# CHECK-NEXT: 01234 # CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeeER . st2 { v1.b, v2.b }[0], [x27], x28 -# CHECK-NEXT: [0,1] D=eE--R . add x0, x27, #1 -# CHECK-NEXT: [0,2] D=eeeeER . st2 { v1.b, v2.b }[8], [x27], x28 -# CHECK-NEXT: [0,3] D==eE--R . add x0, x27, #1 -# CHECK-NEXT: [0,4] .D=eeeeER . st2 { v1.h, v2.h }[0], [x27], #4 -# CHECK-NEXT: [0,5] .D==eE--R . add x0, x27, #1 -# CHECK-NEXT: [0,6] .D==eeeeER. st2 { v1.h, v2.h }[4], [x27], #4 -# CHECK-NEXT: [0,7] .D===eE--R. add x0, x27, #1 -# CHECK-NEXT: [0,8] . D==eeeeER st2 { v1.h, v2.h }[0], [x27], x28 -# CHECK-NEXT: [0,9] . D===eE--R add x0, x27, #1 +# CHECK: [0,0] DeeeeER . . st2 { v1.b, v2.b }[0], [x27], x28 +# CHECK-NEXT: [0,1] .DeE--R . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeeeER . . st2 { v1.b, v2.b }[8], [x27], x28 +# CHECK-NEXT: [0,3] . DeE--R . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeeeER . st2 { v1.h, v2.h }[0], [x27], #4 +# CHECK-NEXT: [0,5] . DeE--R . add x0, x27, #1 +# CHECK-NEXT: [0,6] . .DeeeeER . st2 { v1.h, v2.h }[4], [x27], #4 +# CHECK-NEXT: [0,7] . . DeE--R . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . DeeeeER st2 { v1.h, v2.h }[0], [x27], x28 +# CHECK-NEXT: [0,9] . . DeE--R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -4406,43 +4412,43 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st2 { v1.b, v2.b }[0], [x27], x28 -# CHECK-NEXT: 1. 1 2.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st2 { v1.b, v2.b }[8], [x27], x28 -# CHECK-NEXT: 3. 1 3.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 2.0 0.0 0.0 st2 { v1.h, v2.h }[0], [x27], #4 -# CHECK-NEXT: 5. 1 3.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 3.0 0.0 0.0 st2 { v1.h, v2.h }[4], [x27], #4 -# CHECK-NEXT: 7. 1 4.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 3.0 0.0 0.0 st2 { v1.h, v2.h }[0], [x27], x28 -# CHECK-NEXT: 9. 1 4.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 1 2.7 0.1 1.0 +# CHECK-NEXT: 1. 1 1.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 1.0 1.0 0.0 st2 { v1.b, v2.b }[8], [x27], x28 +# CHECK-NEXT: 3. 1 1.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 1.0 0.0 st2 { v1.h, v2.h }[0], [x27], #4 +# CHECK-NEXT: 5. 1 1.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 1.0 0.0 st2 { v1.h, v2.h }[4], [x27], #4 +# CHECK-NEXT: 7. 1 1.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 1.0 0.0 st2 { v1.h, v2.h }[0], [x27], x28 +# CHECK-NEXT: 9. 1 1.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.0 0.5 1.0 # CHECK: [71] Code Region - G72 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 506 +# CHECK-NEXT: Total Cycles: 1005 # CHECK-NEXT: Total uOps: 2000 -# CHECK: Dispatch Width: 8 -# CHECK-NEXT: uOps Per Cycle: 3.95 -# CHECK-NEXT: IPC: 1.98 -# CHECK-NEXT: Block RThroughput: 3.3 +# CHECK: Dispatch Width: 3 +# CHECK-NEXT: uOps Per Cycle: 1.99 +# CHECK-NEXT: IPC: 1.00 +# CHECK-NEXT: Block RThroughput: 6.7 # CHECK: Timeline view: -# CHECK-NEXT: 0 +# CHECK-NEXT: 01234 # CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeeER . st2 { v1.h, v2.h }[4], [x27], x28 -# CHECK-NEXT: [0,1] D=eE--R . add x0, x27, #1 -# CHECK-NEXT: [0,2] D=eeeeER . st2 { v1.s, v2.s }[0], [x27], #8 -# CHECK-NEXT: [0,3] D==eE--R . add x0, x27, #1 -# CHECK-NEXT: [0,4] .D=eeeeER . st2 { v1.s, v2.s }[0], [x27], x28 -# CHECK-NEXT: [0,5] .D==eE--R . add x0, x27, #1 -# CHECK-NEXT: [0,6] .D==eeeeER. st2 { v1.d, v2.d }[0], [x27], #16 -# CHECK-NEXT: [0,7] .D===eE--R. add x0, x27, #1 -# CHECK-NEXT: [0,8] . D==eeeeER st2 { v1.d, v2.d }[0], [x27], x28 -# CHECK-NEXT: [0,9] . D===eE--R add x0, x27, #1 +# CHECK: [0,0] DeeeeER . . st2 { v1.h, v2.h }[4], [x27], x28 +# CHECK-NEXT: [0,1] .DeE--R . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeeeER . . st2 { v1.s, v2.s }[0], [x27], #8 +# CHECK-NEXT: [0,3] . DeE--R . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeeeER . st2 { v1.s, v2.s }[0], [x27], x28 +# CHECK-NEXT: [0,5] . DeE--R . add x0, x27, #1 +# CHECK-NEXT: [0,6] . .DeeeeER . st2 { v1.d, v2.d }[0], [x27], #16 +# CHECK-NEXT: [0,7] . . DeE--R . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . DeeeeER st2 { v1.d, v2.d }[0], [x27], x28 +# CHECK-NEXT: [0,9] . . DeE--R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -4452,39 +4458,39 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st2 { v1.h, v2.h }[4], [x27], x28 -# CHECK-NEXT: 1. 1 2.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st2 { v1.s, v2.s }[0], [x27], #8 -# CHECK-NEXT: 3. 1 3.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 2.0 0.0 0.0 st2 { v1.s, v2.s }[0], [x27], x28 -# CHECK-NEXT: 5. 1 3.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 3.0 0.0 0.0 st2 { v1.d, v2.d }[0], [x27], #16 -# CHECK-NEXT: 7. 1 4.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 3.0 0.0 0.0 st2 { v1.d, v2.d }[0], [x27], x28 -# CHECK-NEXT: 9. 1 4.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 1 2.7 0.1 1.0 +# CHECK-NEXT: 1. 1 1.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 1.0 1.0 0.0 st2 { v1.s, v2.s }[0], [x27], #8 +# CHECK-NEXT: 3. 1 1.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 1.0 0.0 st2 { v1.s, v2.s }[0], [x27], x28 +# CHECK-NEXT: 5. 1 1.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 1.0 0.0 st2 { v1.d, v2.d }[0], [x27], #16 +# CHECK-NEXT: 7. 1 1.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 1.0 0.0 st2 { v1.d, v2.d }[0], [x27], x28 +# CHECK-NEXT: 9. 1 1.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.0 0.5 1.0 # CHECK: [72] Code Region - G73 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 600 -# CHECK-NEXT: Total Cycles: 407 +# CHECK-NEXT: Total Cycles: 706 # CHECK-NEXT: Total uOps: 2000 -# CHECK: Dispatch Width: 8 -# CHECK-NEXT: uOps Per Cycle: 4.91 -# CHECK-NEXT: IPC: 1.47 -# CHECK-NEXT: Block RThroughput: 3.5 +# CHECK: Dispatch Width: 3 +# CHECK-NEXT: uOps Per Cycle: 2.83 +# CHECK-NEXT: IPC: 0.85 +# CHECK-NEXT: Block RThroughput: 6.7 # CHECK: Timeline view: -# CHECK-NEXT: 0 +# CHECK-NEXT: 012 # CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeeeeER . st3 { v1.2d, v2.2d, v3.2d }, [x27], #48 -# CHECK-NEXT: [0,1] D=eE----R . add x0, x27, #1 -# CHECK-NEXT: [0,2] .DeeeeeER . st3 { v1.2s, v2.2s, v3.2s }, [x27], #24 -# CHECK-NEXT: [0,3] .D=eE---R . add x0, x27, #1 -# CHECK-NEXT: [0,4] . D=eeeeeER st3 { v1.4h, v2.4h, v3.4h }, [x27], #24 -# CHECK-NEXT: [0,5] . D==eE---R add x0, x27, #1 +# CHECK: [0,0] DeeeeeeER . . st3 { v1.2d, v2.2d, v3.2d }, [x27], #48 +# CHECK-NEXT: [0,1] . DeE---R . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeeeeER . st3 { v1.2s, v2.2s, v3.2s }, [x27], #24 +# CHECK-NEXT: [0,3] . DeE---R . add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeeeeER st3 { v1.4h, v2.4h, v3.4h }, [x27], #24 +# CHECK-NEXT: [0,5] . .DeE---R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -4494,39 +4500,39 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st3 { v1.2d, v2.2d, v3.2d }, [x27], #48 -# CHECK-NEXT: 1. 1 2.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 1.0 0.0 0.0 st3 { v1.2s, v2.2s, v3.2s }, [x27], #24 -# CHECK-NEXT: 3. 1 2.0 0.0 3.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 2.0 1.0 0.0 st3 { v1.4h, v2.4h, v3.4h }, [x27], #24 -# CHECK-NEXT: 5. 1 3.0 0.0 3.0 add x0, x27, #1 -# CHECK-NEXT: 1 1.8 0.3 1.7 +# CHECK-NEXT: 1. 1 1.0 1.0 3.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 1.0 1.0 0.0 st3 { v1.2s, v2.2s, v3.2s }, [x27], #24 +# CHECK-NEXT: 3. 1 1.0 0.0 3.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 1.0 0.0 st3 { v1.4h, v2.4h, v3.4h }, [x27], #24 +# CHECK-NEXT: 5. 1 1.0 0.0 3.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.0 0.7 1.5 # CHECK: [73] Code Region - G74 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 708 +# CHECK-NEXT: Total Cycles: 1406 # CHECK-NEXT: Total uOps: 3800 -# CHECK: Dispatch Width: 8 -# CHECK-NEXT: uOps Per Cycle: 5.37 -# CHECK-NEXT: IPC: 1.41 -# CHECK-NEXT: Block RThroughput: 7.0 +# CHECK: Dispatch Width: 3 +# CHECK-NEXT: uOps Per Cycle: 2.70 +# CHECK-NEXT: IPC: 0.71 +# CHECK-NEXT: Block RThroughput: 12.7 # CHECK: Timeline view: -# CHECK-NEXT: 01234 +# CHECK-NEXT: 0123456789 # CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeeeeER . . st3 { v1.4s, v2.4s, v3.4s }, [x27], #48 -# CHECK-NEXT: [0,1] D=eE----R . . add x0, x27, #1 -# CHECK-NEXT: [0,2] .DeeeeeER . . st3 { v1.8b, v2.8b, v3.8b }, [x27], #24 -# CHECK-NEXT: [0,3] .D=eE---R . . add x0, x27, #1 -# CHECK-NEXT: [0,4] . D=eeeeeeER . st3 { v1.8h, v2.8h, v3.8h }, [x27], #48 -# CHECK-NEXT: [0,5] . D==eE----R . add x0, x27, #1 -# CHECK-NEXT: [0,6] . D=eeeeeeER . st3 { v1.16b, v2.16b, v3.16b }, [x27], #48 -# CHECK-NEXT: [0,7] . D==eE----R . add x0, x27, #1 -# CHECK-NEXT: [0,8] . D==eeeeeeER st3 { v1.2d, v2.2d, v3.2d }, [x27], x28 -# CHECK-NEXT: [0,9] . D===eE----R add x0, x27, #1 +# CHECK: [0,0] DeeeeeeER . . . st3 { v1.4s, v2.4s, v3.4s }, [x27], #48 +# CHECK-NEXT: [0,1] . DeE---R . . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeeeeER . . st3 { v1.8b, v2.8b, v3.8b }, [x27], #24 +# CHECK-NEXT: [0,3] . DeE---R . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeeeeeER . . st3 { v1.8h, v2.8h, v3.8h }, [x27], #48 +# CHECK-NEXT: [0,5] . . DeE---R . . add x0, x27, #1 +# CHECK-NEXT: [0,6] . . DeeeeeeER . st3 { v1.16b, v2.16b, v3.16b }, [x27], #48 +# CHECK-NEXT: [0,7] . . DeE---R . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . .DeeeeeeER st3 { v1.2d, v2.2d, v3.2d }, [x27], x28 +# CHECK-NEXT: [0,9] . . . DeE---R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -4536,43 +4542,43 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st3 { v1.4s, v2.4s, v3.4s }, [x27], #48 -# CHECK-NEXT: 1. 1 2.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 1.0 0.0 0.0 st3 { v1.8b, v2.8b, v3.8b }, [x27], #24 -# CHECK-NEXT: 3. 1 2.0 0.0 3.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 2.0 1.0 0.0 st3 { v1.8h, v2.8h, v3.8h }, [x27], #48 -# CHECK-NEXT: 5. 1 3.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 2.0 0.0 0.0 st3 { v1.16b, v2.16b, v3.16b }, [x27], #48 -# CHECK-NEXT: 7. 1 3.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 3.0 1.0 0.0 st3 { v1.2d, v2.2d, v3.2d }, [x27], x28 -# CHECK-NEXT: 9. 1 4.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 1 2.3 0.3 1.9 +# CHECK-NEXT: 1. 1 1.0 1.0 3.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 1.0 1.0 0.0 st3 { v1.8b, v2.8b, v3.8b }, [x27], #24 +# CHECK-NEXT: 3. 1 1.0 0.0 3.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 1.0 0.0 st3 { v1.8h, v2.8h, v3.8h }, [x27], #48 +# CHECK-NEXT: 5. 1 1.0 1.0 3.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 1.0 0.0 st3 { v1.16b, v2.16b, v3.16b }, [x27], #48 +# CHECK-NEXT: 7. 1 1.0 1.0 3.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 1.0 0.0 st3 { v1.2d, v2.2d, v3.2d }, [x27], x28 +# CHECK-NEXT: 9. 1 1.0 1.0 3.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.0 0.9 1.5 # CHECK: [74] Code Region - G75 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 707 +# CHECK-NEXT: Total Cycles: 1206 # CHECK-NEXT: Total uOps: 3400 -# CHECK: Dispatch Width: 8 -# CHECK-NEXT: uOps Per Cycle: 4.81 -# CHECK-NEXT: IPC: 1.41 -# CHECK-NEXT: Block RThroughput: 6.0 +# CHECK: Dispatch Width: 3 +# CHECK-NEXT: uOps Per Cycle: 2.82 +# CHECK-NEXT: IPC: 0.83 +# CHECK-NEXT: Block RThroughput: 11.3 # CHECK: Timeline view: -# CHECK-NEXT: 0123 +# CHECK-NEXT: 01234567 # CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeeeER . . st3 { v1.2s, v2.2s, v3.2s }, [x27], x28 -# CHECK-NEXT: [0,1] D=eE---R . . add x0, x27, #1 -# CHECK-NEXT: [0,2] .DeeeeeER . . st3 { v1.4h, v2.4h, v3.4h }, [x27], x28 -# CHECK-NEXT: [0,3] .D=eE---R . . add x0, x27, #1 -# CHECK-NEXT: [0,4] . DeeeeeeER . st3 { v1.4s, v2.4s, v3.4s }, [x27], x28 -# CHECK-NEXT: [0,5] . D=eE----R . add x0, x27, #1 -# CHECK-NEXT: [0,6] . DeeeeeER . st3 { v1.8b, v2.8b, v3.8b }, [x27], x28 -# CHECK-NEXT: [0,7] . D=eE---R . add x0, x27, #1 -# CHECK-NEXT: [0,8] . D=eeeeeeER st3 { v1.8h, v2.8h, v3.8h }, [x27], x28 -# CHECK-NEXT: [0,9] . D==eE----R add x0, x27, #1 +# CHECK: [0,0] DeeeeeER . . . st3 { v1.2s, v2.2s, v3.2s }, [x27], x28 +# CHECK-NEXT: [0,1] .DeE---R . . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeeeeER. . . st3 { v1.4h, v2.4h, v3.4h }, [x27], x28 +# CHECK-NEXT: [0,3] . DeE---R. . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeeeeeER . . st3 { v1.4s, v2.4s, v3.4s }, [x27], x28 +# CHECK-NEXT: [0,5] . .DeE---R . . add x0, x27, #1 +# CHECK-NEXT: [0,6] . . DeeeeeER. . st3 { v1.8b, v2.8b, v3.8b }, [x27], x28 +# CHECK-NEXT: [0,7] . . DeE---R. . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . DeeeeeeER st3 { v1.8h, v2.8h, v3.8h }, [x27], x28 +# CHECK-NEXT: [0,9] . . .DeE---R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -4582,43 +4588,43 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st3 { v1.2s, v2.2s, v3.2s }, [x27], x28 -# CHECK-NEXT: 1. 1 2.0 0.0 3.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 1.0 0.0 0.0 st3 { v1.4h, v2.4h, v3.4h }, [x27], x28 -# CHECK-NEXT: 3. 1 2.0 0.0 3.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 1.0 0.0 0.0 st3 { v1.4s, v2.4s, v3.4s }, [x27], x28 -# CHECK-NEXT: 5. 1 2.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 1.0 0.0 0.0 st3 { v1.8b, v2.8b, v3.8b }, [x27], x28 -# CHECK-NEXT: 7. 1 2.0 0.0 3.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 2.0 1.0 0.0 st3 { v1.8h, v2.8h, v3.8h }, [x27], x28 -# CHECK-NEXT: 9. 1 3.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 1 1.7 0.2 1.7 +# CHECK-NEXT: 1. 1 1.0 0.0 3.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 1.0 1.0 0.0 st3 { v1.4h, v2.4h, v3.4h }, [x27], x28 +# CHECK-NEXT: 3. 1 1.0 0.0 3.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 1.0 0.0 st3 { v1.4s, v2.4s, v3.4s }, [x27], x28 +# CHECK-NEXT: 5. 1 1.0 1.0 3.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 1.0 0.0 st3 { v1.8b, v2.8b, v3.8b }, [x27], x28 +# CHECK-NEXT: 7. 1 1.0 0.0 3.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 1.0 0.0 st3 { v1.8h, v2.8h, v3.8h }, [x27], x28 +# CHECK-NEXT: 9. 1 1.0 1.0 3.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.0 0.7 1.5 # CHECK: [75] Code Region - G76 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 755 +# CHECK-NEXT: Total Cycles: 1504 # CHECK-NEXT: Total uOps: 4000 -# CHECK: Dispatch Width: 8 -# CHECK-NEXT: uOps Per Cycle: 5.30 -# CHECK-NEXT: IPC: 1.32 -# CHECK-NEXT: Block RThroughput: 7.5 +# CHECK: Dispatch Width: 3 +# CHECK-NEXT: uOps Per Cycle: 2.66 +# CHECK-NEXT: IPC: 0.66 +# CHECK-NEXT: Block RThroughput: 13.3 # CHECK: Timeline view: -# CHECK-NEXT: 012 +# CHECK-NEXT: 012345678 # CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeeeeER . . st3 { v1.16b, v2.16b, v3.16b }, [x27], x28 -# CHECK-NEXT: [0,1] D=eE----R . . add x0, x27, #1 -# CHECK-NEXT: [0,2] .DeeeeE-R . . st3 { v1.b, v2.b, v3.b }[0], [x27], #3 -# CHECK-NEXT: [0,3] .D=eE---R . . add x0, x27, #1 -# CHECK-NEXT: [0,4] . D=eeeeER. . st3 { v1.b, v2.b, v3.b }[8], [x27], #3 -# CHECK-NEXT: [0,5] . D==eE--R. . add x0, x27, #1 -# CHECK-NEXT: [0,6] . D=eeeeER . st3 { v1.b, v2.b, v3.b }[0], [x27], x28 -# CHECK-NEXT: [0,7] . D==eE--R . add x0, x27, #1 -# CHECK-NEXT: [0,8] . D==eeeeER st3 { v1.b, v2.b, v3.b }[8], [x27], x28 -# CHECK-NEXT: [0,9] . D===eE--R add x0, x27, #1 +# CHECK: [0,0] DeeeeeeER . . . st3 { v1.16b, v2.16b, v3.16b }, [x27], x28 +# CHECK-NEXT: [0,1] . DeE---R . . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeeeER. . . st3 { v1.b, v2.b, v3.b }[0], [x27], #3 +# CHECK-NEXT: [0,3] . DeE-R. . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . .DeeeeER . . st3 { v1.b, v2.b, v3.b }[8], [x27], #3 +# CHECK-NEXT: [0,5] . . DeE-R . . add x0, x27, #1 +# CHECK-NEXT: [0,6] . . DeeeeER . st3 { v1.b, v2.b, v3.b }[0], [x27], x28 +# CHECK-NEXT: [0,7] . . .DeE-R . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . . DeeeeER st3 { v1.b, v2.b, v3.b }[8], [x27], x28 +# CHECK-NEXT: [0,9] . . . DeE-R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -4628,43 +4634,43 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st3 { v1.16b, v2.16b, v3.16b }, [x27], x28 -# CHECK-NEXT: 1. 1 2.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 1.0 0.0 1.0 st3 { v1.b, v2.b, v3.b }[0], [x27], #3 -# CHECK-NEXT: 3. 1 2.0 0.0 3.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 2.0 1.0 0.0 st3 { v1.b, v2.b, v3.b }[8], [x27], #3 -# CHECK-NEXT: 5. 1 3.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 2.0 0.0 0.0 st3 { v1.b, v2.b, v3.b }[0], [x27], x28 -# CHECK-NEXT: 7. 1 3.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 3.0 1.0 0.0 st3 { v1.b, v2.b, v3.b }[8], [x27], x28 -# CHECK-NEXT: 9. 1 4.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 1 2.3 0.3 1.4 +# CHECK-NEXT: 1. 1 1.0 1.0 3.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 1.0 1.0 0.0 st3 { v1.b, v2.b, v3.b }[0], [x27], #3 +# CHECK-NEXT: 3. 1 1.0 1.0 1.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 1.0 0.0 st3 { v1.b, v2.b, v3.b }[8], [x27], #3 +# CHECK-NEXT: 5. 1 1.0 1.0 1.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 1.0 0.0 st3 { v1.b, v2.b, v3.b }[0], [x27], x28 +# CHECK-NEXT: 7. 1 1.0 1.0 1.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 1.0 0.0 st3 { v1.b, v2.b, v3.b }[8], [x27], x28 +# CHECK-NEXT: 9. 1 1.0 1.0 1.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.0 1.0 0.7 # CHECK: [76] Code Region - G77 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 755 +# CHECK-NEXT: Total Cycles: 1504 # CHECK-NEXT: Total uOps: 4000 -# CHECK: Dispatch Width: 8 -# CHECK-NEXT: uOps Per Cycle: 5.30 -# CHECK-NEXT: IPC: 1.32 -# CHECK-NEXT: Block RThroughput: 7.5 +# CHECK: Dispatch Width: 3 +# CHECK-NEXT: uOps Per Cycle: 2.66 +# CHECK-NEXT: IPC: 0.66 +# CHECK-NEXT: Block RThroughput: 13.3 # CHECK: Timeline view: -# CHECK-NEXT: 012 +# CHECK-NEXT: 012345678 # CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeeER . . st3 { v1.h, v2.h, v3.h }[0], [x27], #6 -# CHECK-NEXT: [0,1] D=eE--R . . add x0, x27, #1 -# CHECK-NEXT: [0,2] .DeeeeER . . st3 { v1.h, v2.h, v3.h }[4], [x27], #6 -# CHECK-NEXT: [0,3] .D=eE--R . . add x0, x27, #1 -# CHECK-NEXT: [0,4] . D=eeeeER. . st3 { v1.h, v2.h, v3.h }[0], [x27], x28 -# CHECK-NEXT: [0,5] . D==eE--R. . add x0, x27, #1 -# CHECK-NEXT: [0,6] . D=eeeeER . st3 { v1.h, v2.h, v3.h }[4], [x27], x28 -# CHECK-NEXT: [0,7] . D==eE--R . add x0, x27, #1 -# CHECK-NEXT: [0,8] . D==eeeeER st3 { v1.s, v2.s, v3.s }[0], [x27], #12 -# CHECK-NEXT: [0,9] . D===eE--R add x0, x27, #1 +# CHECK: [0,0] DeeeeER . . . st3 { v1.h, v2.h, v3.h }[0], [x27], #6 +# CHECK-NEXT: [0,1] . DeE-R . . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeeeER. . . st3 { v1.h, v2.h, v3.h }[4], [x27], #6 +# CHECK-NEXT: [0,3] . DeE-R. . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . .DeeeeER . . st3 { v1.h, v2.h, v3.h }[0], [x27], x28 +# CHECK-NEXT: [0,5] . . DeE-R . . add x0, x27, #1 +# CHECK-NEXT: [0,6] . . DeeeeER . st3 { v1.h, v2.h, v3.h }[4], [x27], x28 +# CHECK-NEXT: [0,7] . . .DeE-R . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . . DeeeeER st3 { v1.s, v2.s, v3.s }[0], [x27], #12 +# CHECK-NEXT: [0,9] . . . DeE-R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -4674,43 +4680,43 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st3 { v1.h, v2.h, v3.h }[0], [x27], #6 -# CHECK-NEXT: 1. 1 2.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 1.0 0.0 0.0 st3 { v1.h, v2.h, v3.h }[4], [x27], #6 -# CHECK-NEXT: 3. 1 2.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 2.0 1.0 0.0 st3 { v1.h, v2.h, v3.h }[0], [x27], x28 -# CHECK-NEXT: 5. 1 3.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 2.0 0.0 0.0 st3 { v1.h, v2.h, v3.h }[4], [x27], x28 -# CHECK-NEXT: 7. 1 3.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 3.0 1.0 0.0 st3 { v1.s, v2.s, v3.s }[0], [x27], #12 -# CHECK-NEXT: 9. 1 4.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 1 2.3 0.3 1.0 +# CHECK-NEXT: 1. 1 1.0 1.0 1.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 1.0 1.0 0.0 st3 { v1.h, v2.h, v3.h }[4], [x27], #6 +# CHECK-NEXT: 3. 1 1.0 1.0 1.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 1.0 0.0 st3 { v1.h, v2.h, v3.h }[0], [x27], x28 +# CHECK-NEXT: 5. 1 1.0 1.0 1.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 1.0 0.0 st3 { v1.h, v2.h, v3.h }[4], [x27], x28 +# CHECK-NEXT: 7. 1 1.0 1.0 1.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 1.0 0.0 st3 { v1.s, v2.s, v3.s }[0], [x27], #12 +# CHECK-NEXT: 9. 1 1.0 1.0 1.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.0 1.0 0.5 # CHECK: [77] Code Region - G78 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 808 +# CHECK-NEXT: Total Cycles: 1607 # CHECK-NEXT: Total uOps: 4200 -# CHECK: Dispatch Width: 8 -# CHECK-NEXT: uOps Per Cycle: 5.20 -# CHECK-NEXT: IPC: 1.24 -# CHECK-NEXT: Block RThroughput: 8.0 +# CHECK: Dispatch Width: 3 +# CHECK-NEXT: uOps Per Cycle: 2.61 +# CHECK-NEXT: IPC: 0.62 +# CHECK-NEXT: Block RThroughput: 14.0 # CHECK: Timeline view: -# CHECK-NEXT: 012345 -# CHECK-NEXT: Index 0123456789 - -# CHECK: [0,0] DeeeeER . . st3 { v1.s, v2.s, v3.s }[0], [x27], x28 -# CHECK-NEXT: [0,1] D=eE--R . . add x0, x27, #1 -# CHECK-NEXT: [0,2] .DeeeeeER . . st3 { v1.d, v2.d, v3.d }[0], [x27], #24 -# CHECK-NEXT: [0,3] .D=eE---R . . add x0, x27, #1 -# CHECK-NEXT: [0,4] . D=eeeeeER . st3 { v1.d, v2.d, v3.d }[0], [x27], x28 -# CHECK-NEXT: [0,5] . D==eE---R . add x0, x27, #1 -# CHECK-NEXT: [0,6] . D=eeeeeeER . st4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64 -# CHECK-NEXT: [0,7] . D=eE----R . add x0, x27, #1 -# CHECK-NEXT: [0,8] . D=eeeeeeeER st4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32 -# CHECK-NEXT: [0,9] . D==eE-----R add x0, x27, #1 +# CHECK-NEXT: 0123456789 +# CHECK-NEXT: Index 0123456789 012 + +# CHECK: [0,0] DeeeeER . . . . st3 { v1.s, v2.s, v3.s }[0], [x27], x28 +# CHECK-NEXT: [0,1] . DeE-R . . . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeeeeER . . . st3 { v1.d, v2.d, v3.d }[0], [x27], #24 +# CHECK-NEXT: [0,3] . DeE--R . . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . .DeeeeeER . . . st3 { v1.d, v2.d, v3.d }[0], [x27], x28 +# CHECK-NEXT: [0,5] . . DeE--R . . . add x0, x27, #1 +# CHECK-NEXT: [0,6] . . DeeeeeeER . . st4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64 +# CHECK-NEXT: [0,7] . . . DeE--R . . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . . DeeeeeeeER st4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32 +# CHECK-NEXT: [0,9] . . . DeE----R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -4720,43 +4726,43 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st3 { v1.s, v2.s, v3.s }[0], [x27], x28 -# CHECK-NEXT: 1. 1 2.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 1.0 0.0 0.0 st3 { v1.d, v2.d, v3.d }[0], [x27], #24 -# CHECK-NEXT: 3. 1 2.0 0.0 3.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 2.0 1.0 0.0 st3 { v1.d, v2.d, v3.d }[0], [x27], x28 -# CHECK-NEXT: 5. 1 3.0 0.0 3.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 2.0 0.0 0.0 st4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64 -# CHECK-NEXT: 7. 1 2.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 2.0 1.0 0.0 st4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32 -# CHECK-NEXT: 9. 1 3.0 0.0 5.0 add x0, x27, #1 -# CHECK-NEXT: 1 2.0 0.3 1.7 +# CHECK-NEXT: 1. 1 1.0 1.0 1.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 1.0 1.0 0.0 st3 { v1.d, v2.d, v3.d }[0], [x27], #24 +# CHECK-NEXT: 3. 1 1.0 1.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 1.0 0.0 st3 { v1.d, v2.d, v3.d }[0], [x27], x28 +# CHECK-NEXT: 5. 1 1.0 1.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 1.0 0.0 st4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64 +# CHECK-NEXT: 7. 1 1.0 1.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 1.0 0.0 st4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32 +# CHECK-NEXT: 9. 1 1.0 1.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.0 1.0 1.1 # CHECK: [78] Code Region - G79 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 1207 +# CHECK-NEXT: Total Cycles: 2107 # CHECK-NEXT: Total uOps: 5800 -# CHECK: Dispatch Width: 8 -# CHECK-NEXT: uOps Per Cycle: 4.81 -# CHECK-NEXT: IPC: 0.83 -# CHECK-NEXT: Block RThroughput: 12.0 +# CHECK: Dispatch Width: 3 +# CHECK-NEXT: uOps Per Cycle: 2.75 +# CHECK-NEXT: IPC: 0.47 +# CHECK-NEXT: Block RThroughput: 19.3 # CHECK: Timeline view: -# CHECK-NEXT: 012345678 -# CHECK-NEXT: Index 0123456789 +# CHECK-NEXT: 0123456789 +# CHECK-NEXT: Index 0123456789 01234567 -# CHECK: [0,0] DeeeeeeeER. . . st4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32 -# CHECK-NEXT: [0,1] D=eE-----R. . . add x0, x27, #1 -# CHECK-NEXT: [0,2] .DeeeeeeeeeER . . st4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64 -# CHECK-NEXT: [0,3] . DeE-------R . . add x0, x27, #1 -# CHECK-NEXT: [0,4] . DeeeeeeeER . . st4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32 -# CHECK-NEXT: [0,5] . D=eE-----R . . add x0, x27, #1 -# CHECK-NEXT: [0,6] . D==eeeeeeeeeER. st4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64 -# CHECK-NEXT: [0,7] . D==eE-------R. add x0, x27, #1 -# CHECK-NEXT: [0,8] . .D=eeeeeeeeeER st4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64 -# CHECK-NEXT: [0,9] . . D=eE-------R add x0, x27, #1 +# CHECK: [0,0] DeeeeeeeER. . . . . st4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32 +# CHECK-NEXT: [0,1] . DeE----R. . . . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeeeeeeeeER. . . . st4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64 +# CHECK-NEXT: [0,3] . . DeE----R. . . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . . DeeeeeeeER . . . st4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32 +# CHECK-NEXT: [0,5] . . DeE----R . . . add x0, x27, #1 +# CHECK-NEXT: [0,6] . . .DeeeeeeeeeER . . st4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64 +# CHECK-NEXT: [0,7] . . . DeE----R . . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . . .DeeeeeeeeeER st4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64 +# CHECK-NEXT: [0,9] . . . . DeE----R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -4766,43 +4772,43 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32 -# CHECK-NEXT: 1. 1 2.0 0.0 5.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 1.0 0.0 0.0 st4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64 -# CHECK-NEXT: 3. 1 1.0 0.0 7.0 add x0, x27, #1 +# CHECK-NEXT: 1. 1 1.0 1.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 1.0 1.0 0.0 st4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64 +# CHECK-NEXT: 3. 1 1.0 1.0 4.0 add x0, x27, #1 # CHECK-NEXT: 4. 1 1.0 1.0 0.0 st4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32 -# CHECK-NEXT: 5. 1 2.0 0.0 5.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 3.0 2.0 0.0 st4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64 -# CHECK-NEXT: 7. 1 3.0 0.0 7.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 2.0 0.0 0.0 st4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64 -# CHECK-NEXT: 9. 1 2.0 0.0 7.0 add x0, x27, #1 -# CHECK-NEXT: 1 1.8 0.4 3.1 +# CHECK-NEXT: 5. 1 1.0 1.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 1.0 0.0 st4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64 +# CHECK-NEXT: 7. 1 1.0 1.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 1.0 0.0 st4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64 +# CHECK-NEXT: 9. 1 1.0 1.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.0 1.0 2.0 # CHECK: [79] Code Region - G80 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 1007 +# CHECK-NEXT: Total Cycles: 1807 # CHECK-NEXT: Total uOps: 4800 -# CHECK: Dispatch Width: 8 -# CHECK-NEXT: uOps Per Cycle: 4.77 -# CHECK-NEXT: IPC: 0.99 -# CHECK-NEXT: Block RThroughput: 9.5 +# CHECK: Dispatch Width: 3 +# CHECK-NEXT: uOps Per Cycle: 2.66 +# CHECK-NEXT: IPC: 0.55 +# CHECK-NEXT: Block RThroughput: 16.0 # CHECK: Timeline view: -# CHECK-NEXT: 0123456 -# CHECK-NEXT: Index 0123456789 - -# CHECK: [0,0] DeeeeeeER . .. st4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28 -# CHECK-NEXT: [0,1] .DeE----R . .. add x0, x27, #1 -# CHECK-NEXT: [0,2] . DeeeeeeeER .. st4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28 -# CHECK-NEXT: [0,3] . D=eE-----R .. add x0, x27, #1 -# CHECK-NEXT: [0,4] . D=eeeeeeeER .. st4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28 -# CHECK-NEXT: [0,5] . D==eE-----R .. add x0, x27, #1 -# CHECK-NEXT: [0,6] . D=eeeeeeeeeER st4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28 -# CHECK-NEXT: [0,7] . D=eE-------R add x0, x27, #1 -# CHECK-NEXT: [0,8] . .D=eeeeeeeER st4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28 -# CHECK-NEXT: [0,9] . .D==eE-----R add x0, x27, #1 +# CHECK-NEXT: 0123456789 +# CHECK-NEXT: Index 0123456789 01234 + +# CHECK: [0,0] DeeeeeeER . . . . st4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28 +# CHECK-NEXT: [0,1] . DeE--R . . . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeeeeeeER . . . st4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28 +# CHECK-NEXT: [0,3] . .DeE----R . . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . . DeeeeeeeER . . st4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28 +# CHECK-NEXT: [0,5] . . DeE----R . . add x0, x27, #1 +# CHECK-NEXT: [0,6] . . DeeeeeeeeeER . st4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28 +# CHECK-NEXT: [0,7] . . . DeE----R . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . . DeeeeeeeER st4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28 +# CHECK-NEXT: [0,9] . . . . DeE----R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -4812,43 +4818,43 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28 -# CHECK-NEXT: 1. 1 1.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 1. 1 1.0 1.0 2.0 add x0, x27, #1 # CHECK-NEXT: 2. 1 1.0 1.0 0.0 st4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28 -# CHECK-NEXT: 3. 1 2.0 0.0 5.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 2.0 1.0 0.0 st4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28 -# CHECK-NEXT: 5. 1 3.0 0.0 5.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 2.0 0.0 0.0 st4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28 -# CHECK-NEXT: 7. 1 2.0 0.0 7.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 2.0 1.0 0.0 st4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28 -# CHECK-NEXT: 9. 1 3.0 0.0 5.0 add x0, x27, #1 -# CHECK-NEXT: 1 1.9 0.4 2.6 +# CHECK-NEXT: 3. 1 1.0 1.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 1.0 0.0 st4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28 +# CHECK-NEXT: 5. 1 1.0 1.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 1.0 0.0 st4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28 +# CHECK-NEXT: 7. 1 1.0 1.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 1.0 0.0 st4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28 +# CHECK-NEXT: 9. 1 1.0 1.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.0 1.0 1.8 # CHECK: [80] Code Region - G81 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 1057 +# CHECK-NEXT: Total Cycles: 1905 # CHECK-NEXT: Total uOps: 5200 -# CHECK: Dispatch Width: 8 -# CHECK-NEXT: uOps Per Cycle: 4.92 -# CHECK-NEXT: IPC: 0.95 -# CHECK-NEXT: Block RThroughput: 10.5 +# CHECK: Dispatch Width: 3 +# CHECK-NEXT: uOps Per Cycle: 2.73 +# CHECK-NEXT: IPC: 0.52 +# CHECK-NEXT: Block RThroughput: 17.3 # CHECK: Timeline view: -# CHECK-NEXT: 0123456 -# CHECK-NEXT: Index 0123456789 +# CHECK-NEXT: 0123456789 +# CHECK-NEXT: Index 0123456789 0123 -# CHECK: [0,0] DeeeeeeeeeER .. st4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28 -# CHECK-NEXT: [0,1] .DeE-------R .. add x0, x27, #1 -# CHECK-NEXT: [0,2] . DeeeeeeeeeER .. st4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28 -# CHECK-NEXT: [0,3] . DeE-------R .. add x0, x27, #1 -# CHECK-NEXT: [0,4] . D==eeeeeER .. st4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], #4 -# CHECK-NEXT: [0,5] . D===eE---R .. add x0, x27, #1 -# CHECK-NEXT: [0,6] . D===eeeeeER. st4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], #4 -# CHECK-NEXT: [0,7] . D====eE---R. add x0, x27, #1 -# CHECK-NEXT: [0,8] . .D===eeeeeER st4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], x28 -# CHECK-NEXT: [0,9] . .D====eE---R add x0, x27, #1 +# CHECK: [0,0] DeeeeeeeeeER . . . st4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28 +# CHECK-NEXT: [0,1] . DeE----R . . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeeeeeeeeER . . st4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28 +# CHECK-NEXT: [0,3] . . DeE----R . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . . DeeeeeER . . st4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], #4 +# CHECK-NEXT: [0,5] . . . DeE--R . . add x0, x27, #1 +# CHECK-NEXT: [0,6] . . . DeeeeeER . st4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], #4 +# CHECK-NEXT: [0,7] . . . DeE--R . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . . .DeeeeeER st4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], x28 +# CHECK-NEXT: [0,9] . . . . DeE--R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -4858,43 +4864,43 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28 -# CHECK-NEXT: 1. 1 1.0 0.0 7.0 add x0, x27, #1 +# CHECK-NEXT: 1. 1 1.0 1.0 4.0 add x0, x27, #1 # CHECK-NEXT: 2. 1 1.0 1.0 0.0 st4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28 -# CHECK-NEXT: 3. 1 1.0 0.0 7.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 3.0 3.0 0.0 st4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], #4 -# CHECK-NEXT: 5. 1 4.0 0.0 3.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 4.0 1.0 0.0 st4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], #4 -# CHECK-NEXT: 7. 1 5.0 0.0 3.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 4.0 0.0 0.0 st4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], x28 -# CHECK-NEXT: 9. 1 5.0 0.0 3.0 add x0, x27, #1 -# CHECK-NEXT: 1 2.9 0.6 2.3 +# CHECK-NEXT: 3. 1 1.0 1.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 1.0 0.0 st4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], #4 +# CHECK-NEXT: 5. 1 1.0 1.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 1.0 0.0 st4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], #4 +# CHECK-NEXT: 7. 1 1.0 1.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 1.0 0.0 st4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], x28 +# CHECK-NEXT: 9. 1 1.0 1.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.0 1.0 1.4 # CHECK: [81] Code Region - G82 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 756 +# CHECK-NEXT: Total Cycles: 1505 # CHECK-NEXT: Total uOps: 4000 -# CHECK: Dispatch Width: 8 -# CHECK-NEXT: uOps Per Cycle: 5.29 -# CHECK-NEXT: IPC: 1.32 -# CHECK-NEXT: Block RThroughput: 7.5 +# CHECK: Dispatch Width: 3 +# CHECK-NEXT: uOps Per Cycle: 2.66 +# CHECK-NEXT: IPC: 0.66 +# CHECK-NEXT: Block RThroughput: 13.3 # CHECK: Timeline view: -# CHECK-NEXT: 0123 +# CHECK-NEXT: 0123456789 # CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeeeER . . st4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], x28 -# CHECK-NEXT: [0,1] D=eE---R . . add x0, x27, #1 -# CHECK-NEXT: [0,2] .DeeeeeER . . st4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], #8 -# CHECK-NEXT: [0,3] .D=eE---R . . add x0, x27, #1 -# CHECK-NEXT: [0,4] . D=eeeeeER . st4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], #8 -# CHECK-NEXT: [0,5] . D==eE---R . add x0, x27, #1 -# CHECK-NEXT: [0,6] . D=eeeeeER . st4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], x28 -# CHECK-NEXT: [0,7] . D==eE---R . add x0, x27, #1 -# CHECK-NEXT: [0,8] . D==eeeeeER st4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], x28 -# CHECK-NEXT: [0,9] . D===eE---R add x0, x27, #1 +# CHECK: [0,0] DeeeeeER . . . st4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], x28 +# CHECK-NEXT: [0,1] . DeE--R . . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeeeeER . . st4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], #8 +# CHECK-NEXT: [0,3] . DeE--R . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . .DeeeeeER . . st4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], #8 +# CHECK-NEXT: [0,5] . . DeE--R . . add x0, x27, #1 +# CHECK-NEXT: [0,6] . . DeeeeeER . st4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], x28 +# CHECK-NEXT: [0,7] . . .DeE--R . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . . DeeeeeER st4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], x28 +# CHECK-NEXT: [0,9] . . . DeE--R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -4904,41 +4910,41 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], x28 -# CHECK-NEXT: 1. 1 2.0 0.0 3.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 1.0 0.0 0.0 st4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], #8 -# CHECK-NEXT: 3. 1 2.0 0.0 3.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 2.0 1.0 0.0 st4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], #8 -# CHECK-NEXT: 5. 1 3.0 0.0 3.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 2.0 0.0 0.0 st4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], x28 -# CHECK-NEXT: 7. 1 3.0 0.0 3.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 3.0 1.0 0.0 st4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], x28 -# CHECK-NEXT: 9. 1 4.0 0.0 3.0 add x0, x27, #1 -# CHECK-NEXT: 1 2.3 0.3 1.5 +# CHECK-NEXT: 1. 1 1.0 1.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 1.0 1.0 0.0 st4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], #8 +# CHECK-NEXT: 3. 1 1.0 1.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 1.0 0.0 st4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], #8 +# CHECK-NEXT: 5. 1 1.0 1.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 1.0 0.0 st4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], x28 +# CHECK-NEXT: 7. 1 1.0 1.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 1.0 0.0 st4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], x28 +# CHECK-NEXT: 9. 1 1.0 1.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.0 1.0 1.0 # CHECK: [82] Code Region - G83 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 800 -# CHECK-NEXT: Total Cycles: 605 +# CHECK-NEXT: Total Cycles: 1204 # CHECK-NEXT: Total uOps: 3200 -# CHECK: Dispatch Width: 8 -# CHECK-NEXT: uOps Per Cycle: 5.29 -# CHECK-NEXT: IPC: 1.32 -# CHECK-NEXT: Block RThroughput: 6.0 +# CHECK: Dispatch Width: 3 +# CHECK-NEXT: uOps Per Cycle: 2.66 +# CHECK-NEXT: IPC: 0.66 +# CHECK-NEXT: Block RThroughput: 10.7 # CHECK: Timeline view: -# CHECK-NEXT: 0 +# CHECK-NEXT: 012345 # CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeeeER . st4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], #16 -# CHECK-NEXT: [0,1] D=eE---R . add x0, x27, #1 -# CHECK-NEXT: [0,2] .DeeeeeER . st4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], x28 -# CHECK-NEXT: [0,3] .D=eE---R . add x0, x27, #1 -# CHECK-NEXT: [0,4] . D=eeeeER. st4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], #32 -# CHECK-NEXT: [0,5] . D==eE--R. add x0, x27, #1 -# CHECK-NEXT: [0,6] . D=eeeeER st4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], x28 -# CHECK-NEXT: [0,7] . D==eE--R add x0, x27, #1 +# CHECK: [0,0] DeeeeeER . . st4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], #16 +# CHECK-NEXT: [0,1] . DeE--R . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeeeeER . st4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], x28 +# CHECK-NEXT: [0,3] . DeE--R . add x0, x27, #1 +# CHECK-NEXT: [0,4] . .DeeeeER . st4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], #32 +# CHECK-NEXT: [0,5] . . DeE-R . add x0, x27, #1 +# CHECK-NEXT: [0,6] . . DeeeeER st4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], x28 +# CHECK-NEXT: [0,7] . . .DeE-R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -4948,34 +4954,34 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], #16 -# CHECK-NEXT: 1. 1 2.0 0.0 3.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 1.0 0.0 0.0 st4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], x28 -# CHECK-NEXT: 3. 1 2.0 0.0 3.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 2.0 1.0 0.0 st4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], #32 -# CHECK-NEXT: 5. 1 3.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 2.0 0.0 0.0 st4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], x28 -# CHECK-NEXT: 7. 1 3.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 1 2.0 0.3 1.3 +# CHECK-NEXT: 1. 1 1.0 1.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 1.0 1.0 0.0 st4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], x28 +# CHECK-NEXT: 3. 1 1.0 1.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 1.0 0.0 st4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], #32 +# CHECK-NEXT: 5. 1 1.0 1.0 1.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 1.0 0.0 st4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], x28 +# CHECK-NEXT: 7. 1 1.0 1.0 1.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.0 1.0 0.8 # CHECK: [83] Code Region - G84 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 400 -# CHECK-NEXT: Total Cycles: 204 +# CHECK-NEXT: Total Cycles: 403 # CHECK-NEXT: Total uOps: 1000 -# CHECK: Dispatch Width: 8 -# CHECK-NEXT: uOps Per Cycle: 4.90 -# CHECK-NEXT: IPC: 1.96 -# CHECK-NEXT: Block RThroughput: 1.5 +# CHECK: Dispatch Width: 3 +# CHECK-NEXT: uOps Per Cycle: 2.48 +# CHECK-NEXT: IPC: 0.99 +# CHECK-NEXT: Block RThroughput: 3.3 # CHECK: Timeline view: -# CHECK-NEXT: Index 012345 +# CHECK-NEXT: Index 0123456 -# CHECK: [0,0] DeeER. stp s1, s2, [x27], #248 -# CHECK-NEXT: [0,1] D=eER. add x0, x27, #1 -# CHECK-NEXT: [0,2] .DeeER stp d1, d2, [x27], #496 -# CHECK-NEXT: [0,3] .D=eER add x0, x27, #1 +# CHECK: [0,0] DeeER.. stp s1, s2, [x27], #248 +# CHECK-NEXT: [0,1] .DeER.. add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeER stp d1, d2, [x27], #496 +# CHECK-NEXT: [0,3] . DeER add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -4985,36 +4991,37 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 stp s1, s2, [x27], #248 -# CHECK-NEXT: 1. 1 2.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 1.0 0.0 0.0 stp d1, d2, [x27], #496 -# CHECK-NEXT: 3. 1 2.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 1.5 0.3 0.0 +# CHECK-NEXT: 1. 1 1.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 1.0 1.0 0.0 stp d1, d2, [x27], #496 +# CHECK-NEXT: 3. 1 1.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.0 0.5 0.0 # CHECK: [84] Code Region - G85 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 703 +# CHECK-NEXT: Total Cycles: 1203 # CHECK-NEXT: Total uOps: 3100 -# CHECK: Dispatch Width: 8 -# CHECK-NEXT: uOps Per Cycle: 4.41 -# CHECK-NEXT: IPC: 1.42 -# CHECK-NEXT: Block RThroughput: 6.5 +# CHECK: Dispatch Width: 3 +# CHECK-NEXT: uOps Per Cycle: 2.58 +# CHECK-NEXT: IPC: 0.83 +# CHECK-NEXT: Block RThroughput: 10.3 # CHECK: Timeline view: +# CHECK-NEXT: 01234 # CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeER . stp q1, q2, [x27], #992 -# CHECK-NEXT: [0,1] D=eE-R . add x0, x27, #1 -# CHECK-NEXT: [0,2] .DeeER . stp s1, s2, [x27, #248]! -# CHECK-NEXT: [0,3] .D=eER . add x0, x27, #1 -# CHECK-NEXT: [0,4] . DeeER . stp d1, d2, [x27, #496]! -# CHECK-NEXT: [0,5] . D=eER . add x0, x27, #1 -# CHECK-NEXT: [0,6] . D=eeeER stp q1, q2, [x27, #992]! -# CHECK-NEXT: [0,7] . D==eE-R add x0, x27, #1 -# CHECK-NEXT: [0,8] . D=eE-R stp w1, w2, [x27], #248 -# CHECK-NEXT: [0,9] . D==eER add x0, x27, #1 +# CHECK: [0,0] DeeeER . . stp q1, q2, [x27], #992 +# CHECK-NEXT: [0,1] . DeER . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeER . . stp s1, s2, [x27, #248]! +# CHECK-NEXT: [0,3] . DeER . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeER. . stp d1, d2, [x27, #496]! +# CHECK-NEXT: [0,5] . .DeER. . add x0, x27, #1 +# CHECK-NEXT: [0,6] . . DeeeER . stp q1, q2, [x27, #992]! +# CHECK-NEXT: [0,7] . . DeER . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . DeER. stp w1, w2, [x27], #248 +# CHECK-NEXT: [0,9] . . .DeER add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -5024,42 +5031,43 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 stp q1, q2, [x27], #992 -# CHECK-NEXT: 1. 1 2.0 0.0 1.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 1.0 0.0 0.0 stp s1, s2, [x27, #248]! -# CHECK-NEXT: 3. 1 2.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 1.0 0.0 0.0 stp d1, d2, [x27, #496]! -# CHECK-NEXT: 5. 1 2.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 2.0 1.0 0.0 stp q1, q2, [x27, #992]! -# CHECK-NEXT: 7. 1 3.0 0.0 1.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 2.0 0.0 1.0 stp w1, w2, [x27], #248 -# CHECK-NEXT: 9. 1 3.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 1.9 0.2 0.3 +# CHECK-NEXT: 1. 1 1.0 1.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 1.0 1.0 0.0 stp s1, s2, [x27, #248]! +# CHECK-NEXT: 3. 1 1.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 1.0 0.0 stp d1, d2, [x27, #496]! +# CHECK-NEXT: 5. 1 1.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 1.0 0.0 stp q1, q2, [x27, #992]! +# CHECK-NEXT: 7. 1 1.0 1.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 1.0 0.0 stp w1, w2, [x27], #248 +# CHECK-NEXT: 9. 1 1.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.0 0.7 0.0 # CHECK: [85] Code Region - G86 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 504 +# CHECK-NEXT: Total Cycles: 1003 # CHECK-NEXT: Total uOps: 2300 -# CHECK: Dispatch Width: 8 -# CHECK-NEXT: uOps Per Cycle: 4.56 -# CHECK-NEXT: IPC: 1.98 -# CHECK-NEXT: Block RThroughput: 4.0 +# CHECK: Dispatch Width: 3 +# CHECK-NEXT: uOps Per Cycle: 2.29 +# CHECK-NEXT: IPC: 1.00 +# CHECK-NEXT: Block RThroughput: 7.7 # CHECK: Timeline view: -# CHECK-NEXT: Index 012345678 - -# CHECK: [0,0] DeER . . stp x1, x2, [x27], #496 -# CHECK-NEXT: [0,1] D=eER. . add x0, x27, #1 -# CHECK-NEXT: [0,2] .DeER. . stp w1, w2, [x27, #248]! -# CHECK-NEXT: [0,3] .D=eER . add x0, x27, #1 -# CHECK-NEXT: [0,4] . DeER . stp x1, x2, [x27, #496]! -# CHECK-NEXT: [0,5] . D=eER . add x0, x27, #1 -# CHECK-NEXT: [0,6] . D=eeER. str b1, [x27], #254 -# CHECK-NEXT: [0,7] . D=eER. add x0, x27, #1 -# CHECK-NEXT: [0,8] . D=eeER str h1, [x27], #254 -# CHECK-NEXT: [0,9] . D==eER add x0, x27, #1 +# CHECK-NEXT: 012 +# CHECK-NEXT: Index 0123456789 + +# CHECK: [0,0] DeER . . . stp x1, x2, [x27], #496 +# CHECK-NEXT: [0,1] .DeER. . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeER . . stp w1, w2, [x27, #248]! +# CHECK-NEXT: [0,3] . DeER . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeER . . stp x1, x2, [x27, #496]! +# CHECK-NEXT: [0,5] . DeER . . add x0, x27, #1 +# CHECK-NEXT: [0,6] . .DeeER . str b1, [x27], #254 +# CHECK-NEXT: [0,7] . . DeER . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . DeeER str h1, [x27], #254 +# CHECK-NEXT: [0,9] . . DeER add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -5069,42 +5077,43 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 stp x1, x2, [x27], #496 -# CHECK-NEXT: 1. 1 2.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 1.0 0.0 0.0 stp w1, w2, [x27, #248]! -# CHECK-NEXT: 3. 1 2.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 1.0 0.0 0.0 stp x1, x2, [x27, #496]! -# CHECK-NEXT: 5. 1 2.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 2.0 0.0 0.0 str b1, [x27], #254 -# CHECK-NEXT: 7. 1 2.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 2.0 0.0 0.0 str h1, [x27], #254 -# CHECK-NEXT: 9. 1 3.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 1.8 0.1 0.0 +# CHECK-NEXT: 1. 1 1.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 1.0 1.0 0.0 stp w1, w2, [x27, #248]! +# CHECK-NEXT: 3. 1 1.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 1.0 0.0 stp x1, x2, [x27, #496]! +# CHECK-NEXT: 5. 1 1.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 1.0 0.0 str b1, [x27], #254 +# CHECK-NEXT: 7. 1 1.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 1.0 0.0 str h1, [x27], #254 +# CHECK-NEXT: 9. 1 1.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.0 0.5 0.0 # CHECK: [86] Code Region - G87 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 504 +# CHECK-NEXT: Total Cycles: 1003 # CHECK-NEXT: Total uOps: 2200 -# CHECK: Dispatch Width: 8 -# CHECK-NEXT: uOps Per Cycle: 4.37 -# CHECK-NEXT: IPC: 1.98 -# CHECK-NEXT: Block RThroughput: 3.3 +# CHECK: Dispatch Width: 3 +# CHECK-NEXT: uOps Per Cycle: 2.19 +# CHECK-NEXT: IPC: 1.00 +# CHECK-NEXT: Block RThroughput: 7.3 # CHECK: Timeline view: -# CHECK-NEXT: Index 012345678 - -# CHECK: [0,0] DeeER. . str s1, [x27], #254 -# CHECK-NEXT: [0,1] D=eER. . add x0, x27, #1 -# CHECK-NEXT: [0,2] D=eeER . str d1, [x27], #254 -# CHECK-NEXT: [0,3] D==eER . add x0, x27, #1 -# CHECK-NEXT: [0,4] .D=eeER . str q1, [x27], #254 -# CHECK-NEXT: [0,5] .D==eER . add x0, x27, #1 -# CHECK-NEXT: [0,6] . D=eeER. str b1, [x27, #254]! -# CHECK-NEXT: [0,7] . D==eER. add x0, x27, #1 -# CHECK-NEXT: [0,8] . D==eeER str h1, [x27, #254]! -# CHECK-NEXT: [0,9] . D===eER add x0, x27, #1 +# CHECK-NEXT: 012 +# CHECK-NEXT: Index 0123456789 + +# CHECK: [0,0] DeeER. . . str s1, [x27], #254 +# CHECK-NEXT: [0,1] .DeER. . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeER . . str d1, [x27], #254 +# CHECK-NEXT: [0,3] . DeER . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeER . . str q1, [x27], #254 +# CHECK-NEXT: [0,5] . DeER . . add x0, x27, #1 +# CHECK-NEXT: [0,6] . .DeeER . str b1, [x27, #254]! +# CHECK-NEXT: [0,7] . . DeER . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . DeeER str h1, [x27, #254]! +# CHECK-NEXT: [0,9] . . DeER add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -5114,42 +5123,43 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 str s1, [x27], #254 -# CHECK-NEXT: 1. 1 2.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 2.0 0.0 0.0 str d1, [x27], #254 -# CHECK-NEXT: 3. 1 3.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 2.0 0.0 0.0 str q1, [x27], #254 -# CHECK-NEXT: 5. 1 3.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 2.0 0.0 0.0 str b1, [x27, #254]! -# CHECK-NEXT: 7. 1 3.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 3.0 0.0 0.0 str h1, [x27, #254]! -# CHECK-NEXT: 9. 1 4.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 2.5 0.1 0.0 +# CHECK-NEXT: 1. 1 1.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 1.0 1.0 0.0 str d1, [x27], #254 +# CHECK-NEXT: 3. 1 1.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 1.0 0.0 str q1, [x27], #254 +# CHECK-NEXT: 5. 1 1.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 1.0 0.0 str b1, [x27, #254]! +# CHECK-NEXT: 7. 1 1.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 1.0 0.0 str h1, [x27, #254]! +# CHECK-NEXT: 9. 1 1.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.0 0.5 0.0 # CHECK: [87] Code Region - G88 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 504 +# CHECK-NEXT: Total Cycles: 1003 # CHECK-NEXT: Total uOps: 2200 -# CHECK: Dispatch Width: 8 -# CHECK-NEXT: uOps Per Cycle: 4.37 -# CHECK-NEXT: IPC: 1.98 -# CHECK-NEXT: Block RThroughput: 3.3 +# CHECK: Dispatch Width: 3 +# CHECK-NEXT: uOps Per Cycle: 2.19 +# CHECK-NEXT: IPC: 1.00 +# CHECK-NEXT: Block RThroughput: 7.3 # CHECK: Timeline view: -# CHECK-NEXT: Index 012345678 - -# CHECK: [0,0] DeeER. . str s1, [x27, #254]! -# CHECK-NEXT: [0,1] D=eER. . add x0, x27, #1 -# CHECK-NEXT: [0,2] D=eeER . str d1, [x27, #254]! -# CHECK-NEXT: [0,3] D==eER . add x0, x27, #1 -# CHECK-NEXT: [0,4] .D=eeER . str q1, [x27, #254]! -# CHECK-NEXT: [0,5] .D==eER . add x0, x27, #1 -# CHECK-NEXT: [0,6] . D=eER . str w1, [x27], #254 -# CHECK-NEXT: [0,7] . D==eER. add x0, x27, #1 -# CHECK-NEXT: [0,8] . D==eER. str x1, [x27], #254 -# CHECK-NEXT: [0,9] . D===eER add x0, x27, #1 +# CHECK-NEXT: 012 +# CHECK-NEXT: Index 0123456789 + +# CHECK: [0,0] DeeER. . . str s1, [x27, #254]! +# CHECK-NEXT: [0,1] .DeER. . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeER . . str d1, [x27, #254]! +# CHECK-NEXT: [0,3] . DeER . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeER . . str q1, [x27, #254]! +# CHECK-NEXT: [0,5] . DeER . . add x0, x27, #1 +# CHECK-NEXT: [0,6] . .DeER. . str w1, [x27], #254 +# CHECK-NEXT: [0,7] . . DeER . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . DeER. str x1, [x27], #254 +# CHECK-NEXT: [0,9] . . DeER add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -5159,42 +5169,43 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 str s1, [x27, #254]! -# CHECK-NEXT: 1. 1 2.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 2.0 0.0 0.0 str d1, [x27, #254]! -# CHECK-NEXT: 3. 1 3.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 2.0 0.0 0.0 str q1, [x27, #254]! -# CHECK-NEXT: 5. 1 3.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 2.0 0.0 0.0 str w1, [x27], #254 -# CHECK-NEXT: 7. 1 3.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 3.0 0.0 0.0 str x1, [x27], #254 -# CHECK-NEXT: 9. 1 4.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 2.5 0.1 0.0 +# CHECK-NEXT: 1. 1 1.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 1.0 1.0 0.0 str d1, [x27, #254]! +# CHECK-NEXT: 3. 1 1.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 1.0 0.0 str q1, [x27, #254]! +# CHECK-NEXT: 5. 1 1.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 1.0 0.0 str w1, [x27], #254 +# CHECK-NEXT: 7. 1 1.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 1.0 0.0 str x1, [x27], #254 +# CHECK-NEXT: 9. 1 1.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.0 0.5 0.0 # CHECK: [88] Code Region - G89 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 504 +# CHECK-NEXT: Total Cycles: 1003 # CHECK-NEXT: Total uOps: 2000 -# CHECK: Dispatch Width: 8 -# CHECK-NEXT: uOps Per Cycle: 3.97 -# CHECK-NEXT: IPC: 1.98 -# CHECK-NEXT: Block RThroughput: 3.3 +# CHECK: Dispatch Width: 3 +# CHECK-NEXT: uOps Per Cycle: 1.99 +# CHECK-NEXT: IPC: 1.00 +# CHECK-NEXT: Block RThroughput: 6.7 # CHECK: Timeline view: -# CHECK-NEXT: Index 012345678 - -# CHECK: [0,0] DeER . . str w1, [x27, #254]! -# CHECK-NEXT: [0,1] D=eER. . add x0, x27, #1 -# CHECK-NEXT: [0,2] D=eER. . str x1, [x27, #254]! -# CHECK-NEXT: [0,3] D==eER . add x0, x27, #1 -# CHECK-NEXT: [0,4] .D=eER . strb w1, [x27], #254 -# CHECK-NEXT: [0,5] .D==eER . add x0, x27, #1 -# CHECK-NEXT: [0,6] .D==eER . strb w1, [x27, #254]! -# CHECK-NEXT: [0,7] .D===eER. add x0, x27, #1 -# CHECK-NEXT: [0,8] . D==eER. strh w1, [x27], #254 -# CHECK-NEXT: [0,9] . D===eER add x0, x27, #1 +# CHECK-NEXT: 012 +# CHECK-NEXT: Index 0123456789 + +# CHECK: [0,0] DeER . . . str w1, [x27, #254]! +# CHECK-NEXT: [0,1] .DeER. . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeER . . str x1, [x27, #254]! +# CHECK-NEXT: [0,3] . DeER . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeER . . strb w1, [x27], #254 +# CHECK-NEXT: [0,5] . DeER . . add x0, x27, #1 +# CHECK-NEXT: [0,6] . .DeER. . strb w1, [x27, #254]! +# CHECK-NEXT: [0,7] . . DeER . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . DeER. strh w1, [x27], #254 +# CHECK-NEXT: [0,9] . . DeER add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -5204,34 +5215,34 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 str w1, [x27, #254]! -# CHECK-NEXT: 1. 1 2.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 2.0 0.0 0.0 str x1, [x27, #254]! -# CHECK-NEXT: 3. 1 3.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 2.0 0.0 0.0 strb w1, [x27], #254 -# CHECK-NEXT: 5. 1 3.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 3.0 0.0 0.0 strb w1, [x27, #254]! -# CHECK-NEXT: 7. 1 4.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 3.0 0.0 0.0 strh w1, [x27], #254 -# CHECK-NEXT: 9. 1 4.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 2.7 0.1 0.0 +# CHECK-NEXT: 1. 1 1.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 1.0 1.0 0.0 str x1, [x27, #254]! +# CHECK-NEXT: 3. 1 1.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 1.0 0.0 strb w1, [x27], #254 +# CHECK-NEXT: 5. 1 1.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 1.0 0.0 strb w1, [x27, #254]! +# CHECK-NEXT: 7. 1 1.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 1.0 0.0 strh w1, [x27], #254 +# CHECK-NEXT: 9. 1 1.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.0 0.5 0.0 # CHECK: [89] Code Region - G90 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 200 -# CHECK-NEXT: Total Cycles: 104 +# CHECK-NEXT: Total Cycles: 203 # CHECK-NEXT: Total uOps: 400 -# CHECK: Dispatch Width: 8 -# CHECK-NEXT: uOps Per Cycle: 3.85 -# CHECK-NEXT: IPC: 1.92 -# CHECK-NEXT: Block RThroughput: 0.7 +# CHECK: Dispatch Width: 3 +# CHECK-NEXT: uOps Per Cycle: 1.97 +# CHECK-NEXT: IPC: 0.99 +# CHECK-NEXT: Block RThroughput: 1.3 # CHECK: Timeline view: # CHECK-NEXT: Index 01234 # CHECK: [0,0] DeER. strh w1, [x27, #254]! -# CHECK-NEXT: [0,1] D=eER add x0, x27, #1 +# CHECK-NEXT: [0,1] .DeER add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -5241,20 +5252,20 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 strh w1, [x27, #254]! -# CHECK-NEXT: 1. 1 2.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 1.5 0.5 0.0 +# CHECK-NEXT: 1. 1 1.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.0 0.5 0.0 # CHECK: [90] Code Region - G91 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 400 -# CHECK-NEXT: Total Cycles: 142 +# CHECK-NEXT: Total Cycles: 209 # CHECK-NEXT: Total uOps: 600 -# CHECK: Dispatch Width: 8 -# CHECK-NEXT: uOps Per Cycle: 4.23 -# CHECK-NEXT: IPC: 2.82 -# CHECK-NEXT: Block RThroughput: 1.3 +# CHECK: Dispatch Width: 3 +# CHECK-NEXT: uOps Per Cycle: 2.87 +# CHECK-NEXT: IPC: 1.91 +# CHECK-NEXT: Block RThroughput: 2.0 # CHECK: Timeline view: # CHECK-NEXT: 0 @@ -5262,8 +5273,8 @@ add x0, x27, 1 # CHECK: [0,0] DeeeeER . ldr x1, [x27], #254 # CHECK-NEXT: [0,1] D=eE--R . add x0, x27, #1 -# CHECK-NEXT: [0,2] D====eeeeER ldr x2, [x1], #254 -# CHECK-NEXT: [0,3] D=eE------R add x0, x27, #1 +# CHECK-NEXT: [0,2] .D===eeeeER ldr x2, [x1], #254 +# CHECK-NEXT: [0,3] .DeE------R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -5274,6 +5285,6 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ldr x1, [x27], #254 # CHECK-NEXT: 1. 1 2.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 5.0 0.0 0.0 ldr x2, [x1], #254 -# CHECK-NEXT: 3. 1 2.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 1 2.5 0.3 2.0 +# CHECK-NEXT: 2. 1 4.0 0.0 0.0 ldr x2, [x1], #254 +# CHECK-NEXT: 3. 1 1.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 1 2.0 0.3 2.0 diff --git a/llvm/test/tools/llvm-mca/AArch64/Neoverse/N3-writeback.s b/llvm/test/tools/llvm-mca/AArch64/Neoverse/N3-writeback.s index 46bea36d38eb8..d105b8b8f69a1 100644 --- a/llvm/test/tools/llvm-mca/AArch64/Neoverse/N3-writeback.s +++ b/llvm/test/tools/llvm-mca/AArch64/Neoverse/N3-writeback.s @@ -1185,10 +1185,10 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 508 # CHECK-NEXT: Total uOps: 1500 -# CHECK: Dispatch Width: 10 +# CHECK: Dispatch Width: 5 # CHECK-NEXT: uOps Per Cycle: 2.95 # CHECK-NEXT: IPC: 1.97 -# CHECK-NEXT: Block RThroughput: 2.5 +# CHECK-NEXT: Block RThroughput: 3.0 # CHECK: Timeline view: # CHECK-NEXT: 012 @@ -1197,13 +1197,13 @@ add x0, x27, 1 # CHECK: [0,0] DeeeeeeER . . ld1 { v1.1d }, [x27], #8 # CHECK-NEXT: [0,1] D=eE----R . . add x0, x27, #1 # CHECK-NEXT: [0,2] D=eeeeeeER. . ld1 { v1.2d }, [x27], #16 -# CHECK-NEXT: [0,3] D==eE----R. . add x0, x27, #1 -# CHECK-NEXT: [0,4] D==eeeeeeER . ld1 { v1.2s }, [x27], #8 -# CHECK-NEXT: [0,5] D===eE----R . add x0, x27, #1 -# CHECK-NEXT: [0,6] .D==eeeeeeER. ld1 { v1.4h }, [x27], #8 -# CHECK-NEXT: [0,7] .D===eE----R. add x0, x27, #1 -# CHECK-NEXT: [0,8] .D===eeeeeeER ld1 { v1.4s }, [x27], #16 -# CHECK-NEXT: [0,9] .D====eE----R add x0, x27, #1 +# CHECK-NEXT: [0,3] .D=eE----R. . add x0, x27, #1 +# CHECK-NEXT: [0,4] .D=eeeeeeER . ld1 { v1.2s }, [x27], #8 +# CHECK-NEXT: [0,5] .D==eE----R . add x0, x27, #1 +# CHECK-NEXT: [0,6] . D=eeeeeeER. ld1 { v1.4h }, [x27], #8 +# CHECK-NEXT: [0,7] . D==eE----R. add x0, x27, #1 +# CHECK-NEXT: [0,8] . D==eeeeeeER ld1 { v1.4s }, [x27], #16 +# CHECK-NEXT: [0,9] . D==eE----R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -1215,14 +1215,14 @@ add x0, x27, 1 # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.1d }, [x27], #8 # CHECK-NEXT: 1. 1 2.0 0.0 4.0 add x0, x27, #1 # CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld1 { v1.2d }, [x27], #16 -# CHECK-NEXT: 3. 1 3.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ld1 { v1.2s }, [x27], #8 -# CHECK-NEXT: 5. 1 4.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ld1 { v1.4h }, [x27], #8 -# CHECK-NEXT: 7. 1 4.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 4.0 0.0 0.0 ld1 { v1.4s }, [x27], #16 -# CHECK-NEXT: 9. 1 5.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 1 3.1 0.1 2.0 +# CHECK-NEXT: 3. 1 2.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 2.0 0.0 0.0 ld1 { v1.2s }, [x27], #8 +# CHECK-NEXT: 5. 1 3.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 2.0 0.0 0.0 ld1 { v1.4h }, [x27], #8 +# CHECK-NEXT: 7. 1 3.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 3.0 0.0 0.0 ld1 { v1.4s }, [x27], #16 +# CHECK-NEXT: 9. 1 3.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 1 2.3 0.1 2.0 # CHECK: [1] Code Region - G02 @@ -1231,10 +1231,10 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 508 # CHECK-NEXT: Total uOps: 1500 -# CHECK: Dispatch Width: 10 +# CHECK: Dispatch Width: 5 # CHECK-NEXT: uOps Per Cycle: 2.95 # CHECK-NEXT: IPC: 1.97 -# CHECK-NEXT: Block RThroughput: 2.5 +# CHECK-NEXT: Block RThroughput: 3.0 # CHECK: Timeline view: # CHECK-NEXT: 012 @@ -1243,13 +1243,13 @@ add x0, x27, 1 # CHECK: [0,0] DeeeeeeER . . ld1 { v1.8b }, [x27], #8 # CHECK-NEXT: [0,1] D=eE----R . . add x0, x27, #1 # CHECK-NEXT: [0,2] D=eeeeeeER. . ld1 { v1.8h }, [x27], #16 -# CHECK-NEXT: [0,3] D==eE----R. . add x0, x27, #1 -# CHECK-NEXT: [0,4] D==eeeeeeER . ld1 { v1.16b }, [x27], #16 -# CHECK-NEXT: [0,5] D===eE----R . add x0, x27, #1 -# CHECK-NEXT: [0,6] .D==eeeeeeER. ld1 { v1.1d }, [x27], x28 -# CHECK-NEXT: [0,7] .D===eE----R. add x0, x27, #1 -# CHECK-NEXT: [0,8] .D===eeeeeeER ld1 { v1.2d }, [x27], x28 -# CHECK-NEXT: [0,9] .D====eE----R add x0, x27, #1 +# CHECK-NEXT: [0,3] .D=eE----R. . add x0, x27, #1 +# CHECK-NEXT: [0,4] .D=eeeeeeER . ld1 { v1.16b }, [x27], #16 +# CHECK-NEXT: [0,5] .D==eE----R . add x0, x27, #1 +# CHECK-NEXT: [0,6] . D=eeeeeeER. ld1 { v1.1d }, [x27], x28 +# CHECK-NEXT: [0,7] . D==eE----R. add x0, x27, #1 +# CHECK-NEXT: [0,8] . D==eeeeeeER ld1 { v1.2d }, [x27], x28 +# CHECK-NEXT: [0,9] . D==eE----R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -1261,14 +1261,14 @@ add x0, x27, 1 # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.8b }, [x27], #8 # CHECK-NEXT: 1. 1 2.0 0.0 4.0 add x0, x27, #1 # CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld1 { v1.8h }, [x27], #16 -# CHECK-NEXT: 3. 1 3.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ld1 { v1.16b }, [x27], #16 -# CHECK-NEXT: 5. 1 4.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ld1 { v1.1d }, [x27], x28 -# CHECK-NEXT: 7. 1 4.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 4.0 0.0 0.0 ld1 { v1.2d }, [x27], x28 -# CHECK-NEXT: 9. 1 5.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 1 3.1 0.1 2.0 +# CHECK-NEXT: 3. 1 2.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 2.0 0.0 0.0 ld1 { v1.16b }, [x27], #16 +# CHECK-NEXT: 5. 1 3.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 2.0 0.0 0.0 ld1 { v1.1d }, [x27], x28 +# CHECK-NEXT: 7. 1 3.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 3.0 0.0 0.0 ld1 { v1.2d }, [x27], x28 +# CHECK-NEXT: 9. 1 3.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 1 2.3 0.1 2.0 # CHECK: [2] Code Region - G03 @@ -1277,10 +1277,10 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 508 # CHECK-NEXT: Total uOps: 1500 -# CHECK: Dispatch Width: 10 +# CHECK: Dispatch Width: 5 # CHECK-NEXT: uOps Per Cycle: 2.95 # CHECK-NEXT: IPC: 1.97 -# CHECK-NEXT: Block RThroughput: 2.5 +# CHECK-NEXT: Block RThroughput: 3.0 # CHECK: Timeline view: # CHECK-NEXT: 012 @@ -1289,13 +1289,13 @@ add x0, x27, 1 # CHECK: [0,0] DeeeeeeER . . ld1 { v1.2s }, [x27], x28 # CHECK-NEXT: [0,1] D=eE----R . . add x0, x27, #1 # CHECK-NEXT: [0,2] D=eeeeeeER. . ld1 { v1.4h }, [x27], x28 -# CHECK-NEXT: [0,3] D==eE----R. . add x0, x27, #1 -# CHECK-NEXT: [0,4] D==eeeeeeER . ld1 { v1.4s }, [x27], x28 -# CHECK-NEXT: [0,5] D===eE----R . add x0, x27, #1 -# CHECK-NEXT: [0,6] .D==eeeeeeER. ld1 { v1.8b }, [x27], x28 -# CHECK-NEXT: [0,7] .D===eE----R. add x0, x27, #1 -# CHECK-NEXT: [0,8] .D===eeeeeeER ld1 { v1.8h }, [x27], x28 -# CHECK-NEXT: [0,9] .D====eE----R add x0, x27, #1 +# CHECK-NEXT: [0,3] .D=eE----R. . add x0, x27, #1 +# CHECK-NEXT: [0,4] .D=eeeeeeER . ld1 { v1.4s }, [x27], x28 +# CHECK-NEXT: [0,5] .D==eE----R . add x0, x27, #1 +# CHECK-NEXT: [0,6] . D=eeeeeeER. ld1 { v1.8b }, [x27], x28 +# CHECK-NEXT: [0,7] . D==eE----R. add x0, x27, #1 +# CHECK-NEXT: [0,8] . D==eeeeeeER ld1 { v1.8h }, [x27], x28 +# CHECK-NEXT: [0,9] . D==eE----R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -1307,14 +1307,14 @@ add x0, x27, 1 # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.2s }, [x27], x28 # CHECK-NEXT: 1. 1 2.0 0.0 4.0 add x0, x27, #1 # CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld1 { v1.4h }, [x27], x28 -# CHECK-NEXT: 3. 1 3.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ld1 { v1.4s }, [x27], x28 -# CHECK-NEXT: 5. 1 4.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ld1 { v1.8b }, [x27], x28 -# CHECK-NEXT: 7. 1 4.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 4.0 0.0 0.0 ld1 { v1.8h }, [x27], x28 -# CHECK-NEXT: 9. 1 5.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 1 3.1 0.1 2.0 +# CHECK-NEXT: 3. 1 2.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 2.0 0.0 0.0 ld1 { v1.4s }, [x27], x28 +# CHECK-NEXT: 5. 1 3.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 2.0 0.0 0.0 ld1 { v1.8b }, [x27], x28 +# CHECK-NEXT: 7. 1 3.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 3.0 0.0 0.0 ld1 { v1.8h }, [x27], x28 +# CHECK-NEXT: 9. 1 3.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 1 2.3 0.1 2.0 # CHECK: [3] Code Region - G04 @@ -1323,10 +1323,10 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 508 # CHECK-NEXT: Total uOps: 1900 -# CHECK: Dispatch Width: 10 +# CHECK: Dispatch Width: 5 # CHECK-NEXT: uOps Per Cycle: 3.74 # CHECK-NEXT: IPC: 1.97 -# CHECK-NEXT: Block RThroughput: 3.0 +# CHECK-NEXT: Block RThroughput: 3.8 # CHECK: Timeline view: # CHECK-NEXT: 012 @@ -1334,14 +1334,14 @@ add x0, x27, 1 # CHECK: [0,0] DeeeeeeER . . ld1 { v1.16b }, [x27], x28 # CHECK-NEXT: [0,1] D=eE----R . . add x0, x27, #1 -# CHECK-NEXT: [0,2] D=eeeeeeER. . ld1 { v1.1d, v2.1d }, [x27], #16 -# CHECK-NEXT: [0,3] D==eE----R. . add x0, x27, #1 -# CHECK-NEXT: [0,4] D==eeeeeeER . ld1 { v1.2d, v2.2d }, [x27], #32 -# CHECK-NEXT: [0,5] .D==eE----R . add x0, x27, #1 -# CHECK-NEXT: [0,6] .D==eeeeeeER. ld1 { v1.2s, v2.2s }, [x27], #16 -# CHECK-NEXT: [0,7] .D===eE----R. add x0, x27, #1 -# CHECK-NEXT: [0,8] .D===eeeeeeER ld1 { v1.4h, v2.4h }, [x27], #16 -# CHECK-NEXT: [0,9] .D====eE----R add x0, x27, #1 +# CHECK-NEXT: [0,2] .DeeeeeeER. . ld1 { v1.1d, v2.1d }, [x27], #16 +# CHECK-NEXT: [0,3] .D=eE----R. . add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeeeeeER . ld1 { v1.2d, v2.2d }, [x27], #32 +# CHECK-NEXT: [0,5] . D=eE----R . add x0, x27, #1 +# CHECK-NEXT: [0,6] . DeeeeeeER. ld1 { v1.2s, v2.2s }, [x27], #16 +# CHECK-NEXT: [0,7] . D=eE----R. add x0, x27, #1 +# CHECK-NEXT: [0,8] . DeeeeeeER ld1 { v1.4h, v2.4h }, [x27], #16 +# CHECK-NEXT: [0,9] . D=eE----R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -1352,15 +1352,15 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.16b }, [x27], x28 # CHECK-NEXT: 1. 1 2.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld1 { v1.1d, v2.1d }, [x27], #16 -# CHECK-NEXT: 3. 1 3.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ld1 { v1.2d, v2.2d }, [x27], #32 -# CHECK-NEXT: 5. 1 3.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ld1 { v1.2s, v2.2s }, [x27], #16 -# CHECK-NEXT: 7. 1 4.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 4.0 0.0 0.0 ld1 { v1.4h, v2.4h }, [x27], #16 -# CHECK-NEXT: 9. 1 5.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 1 3.0 0.1 2.0 +# CHECK-NEXT: 2. 1 1.0 0.0 0.0 ld1 { v1.1d, v2.1d }, [x27], #16 +# CHECK-NEXT: 3. 1 2.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 0.0 0.0 ld1 { v1.2d, v2.2d }, [x27], #32 +# CHECK-NEXT: 5. 1 2.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 0.0 0.0 ld1 { v1.2s, v2.2s }, [x27], #16 +# CHECK-NEXT: 7. 1 2.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 0.0 0.0 ld1 { v1.4h, v2.4h }, [x27], #16 +# CHECK-NEXT: 9. 1 2.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.5 0.1 2.0 # CHECK: [4] Code Region - G05 @@ -1369,10 +1369,10 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 508 # CHECK-NEXT: Total uOps: 2000 -# CHECK: Dispatch Width: 10 +# CHECK: Dispatch Width: 5 # CHECK-NEXT: uOps Per Cycle: 3.94 # CHECK-NEXT: IPC: 1.97 -# CHECK-NEXT: Block RThroughput: 3.3 +# CHECK-NEXT: Block RThroughput: 4.0 # CHECK: Timeline view: # CHECK-NEXT: 012 @@ -1380,14 +1380,14 @@ add x0, x27, 1 # CHECK: [0,0] DeeeeeeER . . ld1 { v1.4s, v2.4s }, [x27], #32 # CHECK-NEXT: [0,1] D=eE----R . . add x0, x27, #1 -# CHECK-NEXT: [0,2] D=eeeeeeER. . ld1 { v1.8b, v2.8b }, [x27], #16 -# CHECK-NEXT: [0,3] D==eE----R. . add x0, x27, #1 -# CHECK-NEXT: [0,4] .D=eeeeeeER . ld1 { v1.8h, v2.8h }, [x27], #32 -# CHECK-NEXT: [0,5] .D==eE----R . add x0, x27, #1 -# CHECK-NEXT: [0,6] .D==eeeeeeER. ld1 { v1.16b, v2.16b }, [x27], #32 -# CHECK-NEXT: [0,7] .D===eE----R. add x0, x27, #1 -# CHECK-NEXT: [0,8] . D==eeeeeeER ld1 { v1.1d, v2.1d }, [x27], x28 -# CHECK-NEXT: [0,9] . D===eE----R add x0, x27, #1 +# CHECK-NEXT: [0,2] .DeeeeeeER. . ld1 { v1.8b, v2.8b }, [x27], #16 +# CHECK-NEXT: [0,3] .D=eE----R. . add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeeeeeER . ld1 { v1.8h, v2.8h }, [x27], #32 +# CHECK-NEXT: [0,5] . D=eE----R . add x0, x27, #1 +# CHECK-NEXT: [0,6] . DeeeeeeER. ld1 { v1.16b, v2.16b }, [x27], #32 +# CHECK-NEXT: [0,7] . D=eE----R. add x0, x27, #1 +# CHECK-NEXT: [0,8] . DeeeeeeER ld1 { v1.1d, v2.1d }, [x27], x28 +# CHECK-NEXT: [0,9] . D=eE----R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -1398,15 +1398,15 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.4s, v2.4s }, [x27], #32 # CHECK-NEXT: 1. 1 2.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld1 { v1.8b, v2.8b }, [x27], #16 -# CHECK-NEXT: 3. 1 3.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 2.0 0.0 0.0 ld1 { v1.8h, v2.8h }, [x27], #32 -# CHECK-NEXT: 5. 1 3.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ld1 { v1.16b, v2.16b }, [x27], #32 -# CHECK-NEXT: 7. 1 4.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 3.0 0.0 0.0 ld1 { v1.1d, v2.1d }, [x27], x28 -# CHECK-NEXT: 9. 1 4.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 1 2.7 0.1 2.0 +# CHECK-NEXT: 2. 1 1.0 0.0 0.0 ld1 { v1.8b, v2.8b }, [x27], #16 +# CHECK-NEXT: 3. 1 2.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 0.0 0.0 ld1 { v1.8h, v2.8h }, [x27], #32 +# CHECK-NEXT: 5. 1 2.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 0.0 0.0 ld1 { v1.16b, v2.16b }, [x27], #32 +# CHECK-NEXT: 7. 1 2.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 0.0 0.0 ld1 { v1.1d, v2.1d }, [x27], x28 +# CHECK-NEXT: 9. 1 2.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.5 0.1 2.0 # CHECK: [5] Code Region - G06 @@ -1415,10 +1415,10 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 508 # CHECK-NEXT: Total uOps: 2000 -# CHECK: Dispatch Width: 10 +# CHECK: Dispatch Width: 5 # CHECK-NEXT: uOps Per Cycle: 3.94 # CHECK-NEXT: IPC: 1.97 -# CHECK-NEXT: Block RThroughput: 3.3 +# CHECK-NEXT: Block RThroughput: 4.0 # CHECK: Timeline view: # CHECK-NEXT: 012 @@ -1426,14 +1426,14 @@ add x0, x27, 1 # CHECK: [0,0] DeeeeeeER . . ld1 { v1.2d, v2.2d }, [x27], x28 # CHECK-NEXT: [0,1] D=eE----R . . add x0, x27, #1 -# CHECK-NEXT: [0,2] D=eeeeeeER. . ld1 { v1.2s, v2.2s }, [x27], x28 -# CHECK-NEXT: [0,3] D==eE----R. . add x0, x27, #1 -# CHECK-NEXT: [0,4] .D=eeeeeeER . ld1 { v1.4h, v2.4h }, [x27], x28 -# CHECK-NEXT: [0,5] .D==eE----R . add x0, x27, #1 -# CHECK-NEXT: [0,6] .D==eeeeeeER. ld1 { v1.4s, v2.4s }, [x27], x28 -# CHECK-NEXT: [0,7] .D===eE----R. add x0, x27, #1 -# CHECK-NEXT: [0,8] . D==eeeeeeER ld1 { v1.8b, v2.8b }, [x27], x28 -# CHECK-NEXT: [0,9] . D===eE----R add x0, x27, #1 +# CHECK-NEXT: [0,2] .DeeeeeeER. . ld1 { v1.2s, v2.2s }, [x27], x28 +# CHECK-NEXT: [0,3] .D=eE----R. . add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeeeeeER . ld1 { v1.4h, v2.4h }, [x27], x28 +# CHECK-NEXT: [0,5] . D=eE----R . add x0, x27, #1 +# CHECK-NEXT: [0,6] . DeeeeeeER. ld1 { v1.4s, v2.4s }, [x27], x28 +# CHECK-NEXT: [0,7] . D=eE----R. add x0, x27, #1 +# CHECK-NEXT: [0,8] . DeeeeeeER ld1 { v1.8b, v2.8b }, [x27], x28 +# CHECK-NEXT: [0,9] . D=eE----R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -1444,15 +1444,15 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.2d, v2.2d }, [x27], x28 # CHECK-NEXT: 1. 1 2.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld1 { v1.2s, v2.2s }, [x27], x28 -# CHECK-NEXT: 3. 1 3.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 2.0 0.0 0.0 ld1 { v1.4h, v2.4h }, [x27], x28 -# CHECK-NEXT: 5. 1 3.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ld1 { v1.4s, v2.4s }, [x27], x28 -# CHECK-NEXT: 7. 1 4.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 3.0 0.0 0.0 ld1 { v1.8b, v2.8b }, [x27], x28 -# CHECK-NEXT: 9. 1 4.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 1 2.7 0.1 2.0 +# CHECK-NEXT: 2. 1 1.0 0.0 0.0 ld1 { v1.2s, v2.2s }, [x27], x28 +# CHECK-NEXT: 3. 1 2.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 0.0 0.0 ld1 { v1.4h, v2.4h }, [x27], x28 +# CHECK-NEXT: 5. 1 2.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 0.0 0.0 ld1 { v1.4s, v2.4s }, [x27], x28 +# CHECK-NEXT: 7. 1 2.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 0.0 0.0 ld1 { v1.8b, v2.8b }, [x27], x28 +# CHECK-NEXT: 9. 1 2.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.5 0.1 2.0 # CHECK: [6] Code Region - G07 @@ -1461,10 +1461,10 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 508 # CHECK-NEXT: Total uOps: 2300 -# CHECK: Dispatch Width: 10 +# CHECK: Dispatch Width: 5 # CHECK-NEXT: uOps Per Cycle: 4.53 # CHECK-NEXT: IPC: 1.97 -# CHECK-NEXT: Block RThroughput: 4.3 +# CHECK-NEXT: Block RThroughput: 4.6 # CHECK: Timeline view: # CHECK-NEXT: 012 @@ -1472,14 +1472,14 @@ add x0, x27, 1 # CHECK: [0,0] DeeeeeeER . . ld1 { v1.8h, v2.8h }, [x27], x28 # CHECK-NEXT: [0,1] D=eE----R . . add x0, x27, #1 -# CHECK-NEXT: [0,2] D=eeeeeeER. . ld1 { v1.16b, v2.16b }, [x27], x28 -# CHECK-NEXT: [0,3] D==eE----R. . add x0, x27, #1 -# CHECK-NEXT: [0,4] .D=eeeeeeER . ld1 { v1.1d, v2.1d, v3.1d }, [x27], #24 -# CHECK-NEXT: [0,5] .D==eE----R . add x0, x27, #1 -# CHECK-NEXT: [0,6] .D==eeeeeeER. ld1 { v1.2d, v2.2d, v3.2d }, [x27], #48 -# CHECK-NEXT: [0,7] .D===eE----R. add x0, x27, #1 -# CHECK-NEXT: [0,8] . D==eeeeeeER ld1 { v1.2s, v2.2s, v3.2s }, [x27], #24 -# CHECK-NEXT: [0,9] . D===eE----R add x0, x27, #1 +# CHECK-NEXT: [0,2] .DeeeeeeER. . ld1 { v1.16b, v2.16b }, [x27], x28 +# CHECK-NEXT: [0,3] .D=eE----R. . add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeeeeeER . ld1 { v1.1d, v2.1d, v3.1d }, [x27], #24 +# CHECK-NEXT: [0,5] . D=eE----R . add x0, x27, #1 +# CHECK-NEXT: [0,6] . DeeeeeeER. ld1 { v1.2d, v2.2d, v3.2d }, [x27], #48 +# CHECK-NEXT: [0,7] . D=eE----R. add x0, x27, #1 +# CHECK-NEXT: [0,8] . DeeeeeeER ld1 { v1.2s, v2.2s, v3.2s }, [x27], #24 +# CHECK-NEXT: [0,9] . D=eE----R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -1490,15 +1490,15 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.8h, v2.8h }, [x27], x28 # CHECK-NEXT: 1. 1 2.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld1 { v1.16b, v2.16b }, [x27], x28 -# CHECK-NEXT: 3. 1 3.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 2.0 0.0 0.0 ld1 { v1.1d, v2.1d, v3.1d }, [x27], #24 -# CHECK-NEXT: 5. 1 3.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ld1 { v1.2d, v2.2d, v3.2d }, [x27], #48 -# CHECK-NEXT: 7. 1 4.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 3.0 0.0 0.0 ld1 { v1.2s, v2.2s, v3.2s }, [x27], #24 -# CHECK-NEXT: 9. 1 4.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 1 2.7 0.1 2.0 +# CHECK-NEXT: 2. 1 1.0 0.0 0.0 ld1 { v1.16b, v2.16b }, [x27], x28 +# CHECK-NEXT: 3. 1 2.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 0.0 0.0 ld1 { v1.1d, v2.1d, v3.1d }, [x27], #24 +# CHECK-NEXT: 5. 1 2.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 0.0 0.0 ld1 { v1.2d, v2.2d, v3.2d }, [x27], #48 +# CHECK-NEXT: 7. 1 2.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 0.0 0.0 ld1 { v1.2s, v2.2s, v3.2s }, [x27], #24 +# CHECK-NEXT: 9. 1 2.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.5 0.1 2.0 # CHECK: [7] Code Region - G08 @@ -1507,7 +1507,7 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 508 # CHECK-NEXT: Total uOps: 2500 -# CHECK: Dispatch Width: 10 +# CHECK: Dispatch Width: 5 # CHECK-NEXT: uOps Per Cycle: 4.92 # CHECK-NEXT: IPC: 1.97 # CHECK-NEXT: Block RThroughput: 5.0 @@ -1518,14 +1518,14 @@ add x0, x27, 1 # CHECK: [0,0] DeeeeeeER . . ld1 { v1.4h, v2.4h, v3.4h }, [x27], #24 # CHECK-NEXT: [0,1] D=eE----R . . add x0, x27, #1 -# CHECK-NEXT: [0,2] D=eeeeeeER. . ld1 { v1.4s, v2.4s, v3.4s }, [x27], #48 -# CHECK-NEXT: [0,3] D==eE----R. . add x0, x27, #1 -# CHECK-NEXT: [0,4] .D=eeeeeeER . ld1 { v1.8b, v2.8b, v3.8b }, [x27], #24 -# CHECK-NEXT: [0,5] .D==eE----R . add x0, x27, #1 -# CHECK-NEXT: [0,6] .D==eeeeeeER. ld1 { v1.8h, v2.8h, v3.8h }, [x27], #48 -# CHECK-NEXT: [0,7] .D===eE----R. add x0, x27, #1 -# CHECK-NEXT: [0,8] . D==eeeeeeER ld1 { v1.16b, v2.16b, v3.16b }, [x27], #48 -# CHECK-NEXT: [0,9] . D===eE----R add x0, x27, #1 +# CHECK-NEXT: [0,2] .DeeeeeeER. . ld1 { v1.4s, v2.4s, v3.4s }, [x27], #48 +# CHECK-NEXT: [0,3] .D=eE----R. . add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeeeeeER . ld1 { v1.8b, v2.8b, v3.8b }, [x27], #24 +# CHECK-NEXT: [0,5] . D=eE----R . add x0, x27, #1 +# CHECK-NEXT: [0,6] . DeeeeeeER. ld1 { v1.8h, v2.8h, v3.8h }, [x27], #48 +# CHECK-NEXT: [0,7] . D=eE----R. add x0, x27, #1 +# CHECK-NEXT: [0,8] . DeeeeeeER ld1 { v1.16b, v2.16b, v3.16b }, [x27], #48 +# CHECK-NEXT: [0,9] . D=eE----R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -1536,15 +1536,15 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.4h, v2.4h, v3.4h }, [x27], #24 # CHECK-NEXT: 1. 1 2.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld1 { v1.4s, v2.4s, v3.4s }, [x27], #48 -# CHECK-NEXT: 3. 1 3.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 2.0 0.0 0.0 ld1 { v1.8b, v2.8b, v3.8b }, [x27], #24 -# CHECK-NEXT: 5. 1 3.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ld1 { v1.8h, v2.8h, v3.8h }, [x27], #48 -# CHECK-NEXT: 7. 1 4.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 3.0 0.0 0.0 ld1 { v1.16b, v2.16b, v3.16b }, [x27], #48 -# CHECK-NEXT: 9. 1 4.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 1 2.7 0.1 2.0 +# CHECK-NEXT: 2. 1 1.0 0.0 0.0 ld1 { v1.4s, v2.4s, v3.4s }, [x27], #48 +# CHECK-NEXT: 3. 1 2.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 0.0 0.0 ld1 { v1.8b, v2.8b, v3.8b }, [x27], #24 +# CHECK-NEXT: 5. 1 2.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 0.0 0.0 ld1 { v1.8h, v2.8h, v3.8h }, [x27], #48 +# CHECK-NEXT: 7. 1 2.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 0.0 0.0 ld1 { v1.16b, v2.16b, v3.16b }, [x27], #48 +# CHECK-NEXT: 9. 1 2.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.5 0.1 2.0 # CHECK: [8] Code Region - G09 @@ -1553,7 +1553,7 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 508 # CHECK-NEXT: Total uOps: 2500 -# CHECK: Dispatch Width: 10 +# CHECK: Dispatch Width: 5 # CHECK-NEXT: uOps Per Cycle: 4.92 # CHECK-NEXT: IPC: 1.97 # CHECK-NEXT: Block RThroughput: 5.0 @@ -1564,14 +1564,14 @@ add x0, x27, 1 # CHECK: [0,0] DeeeeeeER . . ld1 { v1.1d, v2.1d, v3.1d }, [x27], x28 # CHECK-NEXT: [0,1] D=eE----R . . add x0, x27, #1 -# CHECK-NEXT: [0,2] D=eeeeeeER. . ld1 { v1.2d, v2.2d, v3.2d }, [x27], x28 -# CHECK-NEXT: [0,3] D==eE----R. . add x0, x27, #1 -# CHECK-NEXT: [0,4] .D=eeeeeeER . ld1 { v1.2s, v2.2s, v3.2s }, [x27], x28 -# CHECK-NEXT: [0,5] .D==eE----R . add x0, x27, #1 -# CHECK-NEXT: [0,6] .D==eeeeeeER. ld1 { v1.4h, v2.4h, v3.4h }, [x27], x28 -# CHECK-NEXT: [0,7] .D===eE----R. add x0, x27, #1 -# CHECK-NEXT: [0,8] . D==eeeeeeER ld1 { v1.4s, v2.4s, v3.4s }, [x27], x28 -# CHECK-NEXT: [0,9] . D===eE----R add x0, x27, #1 +# CHECK-NEXT: [0,2] .DeeeeeeER. . ld1 { v1.2d, v2.2d, v3.2d }, [x27], x28 +# CHECK-NEXT: [0,3] .D=eE----R. . add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeeeeeER . ld1 { v1.2s, v2.2s, v3.2s }, [x27], x28 +# CHECK-NEXT: [0,5] . D=eE----R . add x0, x27, #1 +# CHECK-NEXT: [0,6] . DeeeeeeER. ld1 { v1.4h, v2.4h, v3.4h }, [x27], x28 +# CHECK-NEXT: [0,7] . D=eE----R. add x0, x27, #1 +# CHECK-NEXT: [0,8] . DeeeeeeER ld1 { v1.4s, v2.4s, v3.4s }, [x27], x28 +# CHECK-NEXT: [0,9] . D=eE----R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -1582,42 +1582,42 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.1d, v2.1d, v3.1d }, [x27], x28 # CHECK-NEXT: 1. 1 2.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld1 { v1.2d, v2.2d, v3.2d }, [x27], x28 -# CHECK-NEXT: 3. 1 3.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 2.0 0.0 0.0 ld1 { v1.2s, v2.2s, v3.2s }, [x27], x28 -# CHECK-NEXT: 5. 1 3.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ld1 { v1.4h, v2.4h, v3.4h }, [x27], x28 -# CHECK-NEXT: 7. 1 4.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 3.0 0.0 0.0 ld1 { v1.4s, v2.4s, v3.4s }, [x27], x28 -# CHECK-NEXT: 9. 1 4.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 1 2.7 0.1 2.0 +# CHECK-NEXT: 2. 1 1.0 0.0 0.0 ld1 { v1.2d, v2.2d, v3.2d }, [x27], x28 +# CHECK-NEXT: 3. 1 2.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 0.0 0.0 ld1 { v1.2s, v2.2s, v3.2s }, [x27], x28 +# CHECK-NEXT: 5. 1 2.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 0.0 0.0 ld1 { v1.4h, v2.4h, v3.4h }, [x27], x28 +# CHECK-NEXT: 7. 1 2.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 0.0 0.0 ld1 { v1.4s, v2.4s, v3.4s }, [x27], x28 +# CHECK-NEXT: 9. 1 2.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.5 0.1 2.0 # CHECK: [9] Code Region - G10 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 608 +# CHECK-NEXT: Total Cycles: 708 # CHECK-NEXT: Total uOps: 2700 -# CHECK: Dispatch Width: 10 -# CHECK-NEXT: uOps Per Cycle: 4.44 -# CHECK-NEXT: IPC: 1.64 +# CHECK: Dispatch Width: 5 +# CHECK-NEXT: uOps Per Cycle: 3.81 +# CHECK-NEXT: IPC: 1.41 # CHECK-NEXT: Block RThroughput: 5.7 # CHECK: Timeline view: -# CHECK-NEXT: 0123 +# CHECK-NEXT: 01234 # CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeeeeER . . ld1 { v1.8b, v2.8b, v3.8b }, [x27], x28 -# CHECK-NEXT: [0,1] D=eE----R . . add x0, x27, #1 -# CHECK-NEXT: [0,2] D=eeeeeeER. . ld1 { v1.8h, v2.8h, v3.8h }, [x27], x28 -# CHECK-NEXT: [0,3] D==eE----R. . add x0, x27, #1 -# CHECK-NEXT: [0,4] .D=eeeeeeER . ld1 { v1.16b, v2.16b, v3.16b }, [x27], x28 -# CHECK-NEXT: [0,5] .D==eE----R . add x0, x27, #1 -# CHECK-NEXT: [0,6] .D==eeeeeeeER. ld1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32 -# CHECK-NEXT: [0,7] . D==eE-----R. add x0, x27, #1 -# CHECK-NEXT: [0,8] . D==eeeeeeeER ld1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64 -# CHECK-NEXT: [0,9] . D===eE-----R add x0, x27, #1 +# CHECK: [0,0] DeeeeeeER . . ld1 { v1.8b, v2.8b, v3.8b }, [x27], x28 +# CHECK-NEXT: [0,1] D=eE----R . . add x0, x27, #1 +# CHECK-NEXT: [0,2] .DeeeeeeER. . ld1 { v1.8h, v2.8h, v3.8h }, [x27], x28 +# CHECK-NEXT: [0,3] .D=eE----R. . add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeeeeeER . ld1 { v1.16b, v2.16b, v3.16b }, [x27], x28 +# CHECK-NEXT: [0,5] . D=eE----R . add x0, x27, #1 +# CHECK-NEXT: [0,6] . DeeeeeeeER . ld1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32 +# CHECK-NEXT: [0,7] . DeE-----R . add x0, x27, #1 +# CHECK-NEXT: [0,8] . DeeeeeeeER ld1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64 +# CHECK-NEXT: [0,9] . .DeE-----R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -1628,42 +1628,42 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.8b, v2.8b, v3.8b }, [x27], x28 # CHECK-NEXT: 1. 1 2.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld1 { v1.8h, v2.8h, v3.8h }, [x27], x28 -# CHECK-NEXT: 3. 1 3.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 2.0 0.0 0.0 ld1 { v1.16b, v2.16b, v3.16b }, [x27], x28 -# CHECK-NEXT: 5. 1 3.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ld1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32 -# CHECK-NEXT: 7. 1 3.0 0.0 5.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 3.0 0.0 0.0 ld1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64 -# CHECK-NEXT: 9. 1 4.0 0.0 5.0 add x0, x27, #1 -# CHECK-NEXT: 1 2.6 0.1 2.2 +# CHECK-NEXT: 2. 1 1.0 0.0 0.0 ld1 { v1.8h, v2.8h, v3.8h }, [x27], x28 +# CHECK-NEXT: 3. 1 2.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 0.0 0.0 ld1 { v1.16b, v2.16b, v3.16b }, [x27], x28 +# CHECK-NEXT: 5. 1 2.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 0.0 0.0 ld1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32 +# CHECK-NEXT: 7. 1 1.0 0.0 5.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 1.0 0.0 ld1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64 +# CHECK-NEXT: 9. 1 1.0 0.0 5.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.3 0.2 2.2 # CHECK: [10] Code Region - G11 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 675 +# CHECK-NEXT: Total Cycles: 1008 # CHECK-NEXT: Total uOps: 3000 -# CHECK: Dispatch Width: 10 -# CHECK-NEXT: uOps Per Cycle: 4.44 -# CHECK-NEXT: IPC: 1.48 +# CHECK: Dispatch Width: 5 +# CHECK-NEXT: uOps Per Cycle: 2.98 +# CHECK-NEXT: IPC: 0.99 # CHECK-NEXT: Block RThroughput: 6.7 # CHECK: Timeline view: -# CHECK-NEXT: 01234 +# CHECK-NEXT: 01234567 # CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeeeeeER. . ld1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32 -# CHECK-NEXT: [0,1] D=eE-----R. . add x0, x27, #1 -# CHECK-NEXT: [0,2] .DeeeeeeeER . ld1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32 -# CHECK-NEXT: [0,3] .D=eE-----R . add x0, x27, #1 -# CHECK-NEXT: [0,4] . DeeeeeeeER . ld1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64 -# CHECK-NEXT: [0,5] . D=eE-----R . add x0, x27, #1 -# CHECK-NEXT: [0,6] . D=eeeeeeeER. ld1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32 -# CHECK-NEXT: [0,7] . D==eE-----R. add x0, x27, #1 -# CHECK-NEXT: [0,8] . D=eeeeeeeER ld1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64 -# CHECK-NEXT: [0,9] . D==eE-----R add x0, x27, #1 +# CHECK: [0,0] DeeeeeeeER. . . ld1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32 +# CHECK-NEXT: [0,1] .DeE-----R. . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeeeeeeER . . ld1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32 +# CHECK-NEXT: [0,3] . DeE-----R . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeeeeeeER . . ld1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64 +# CHECK-NEXT: [0,5] . DeE-----R . . add x0, x27, #1 +# CHECK-NEXT: [0,6] . .DeeeeeeeER . ld1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32 +# CHECK-NEXT: [0,7] . . DeE-----R . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . DeeeeeeeER ld1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64 +# CHECK-NEXT: [0,9] . . DeE-----R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -1673,43 +1673,43 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32 -# CHECK-NEXT: 1. 1 2.0 0.0 5.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 1.0 0.0 0.0 ld1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32 -# CHECK-NEXT: 3. 1 2.0 0.0 5.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 1.0 0.0 0.0 ld1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64 -# CHECK-NEXT: 5. 1 2.0 0.0 5.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 2.0 1.0 0.0 ld1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32 -# CHECK-NEXT: 7. 1 3.0 0.0 5.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 2.0 0.0 0.0 ld1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64 -# CHECK-NEXT: 9. 1 3.0 0.0 5.0 add x0, x27, #1 -# CHECK-NEXT: 1 1.9 0.2 2.5 +# CHECK-NEXT: 1. 1 1.0 0.0 5.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 1.0 1.0 0.0 ld1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32 +# CHECK-NEXT: 3. 1 1.0 0.0 5.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 1.0 0.0 ld1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64 +# CHECK-NEXT: 5. 1 1.0 0.0 5.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 1.0 0.0 ld1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32 +# CHECK-NEXT: 7. 1 1.0 0.0 5.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 1.0 0.0 ld1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64 +# CHECK-NEXT: 9. 1 1.0 0.0 5.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.0 0.5 2.5 # CHECK: [11] Code Region - G12 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 675 +# CHECK-NEXT: Total Cycles: 1008 # CHECK-NEXT: Total uOps: 3000 -# CHECK: Dispatch Width: 10 -# CHECK-NEXT: uOps Per Cycle: 4.44 -# CHECK-NEXT: IPC: 1.48 +# CHECK: Dispatch Width: 5 +# CHECK-NEXT: uOps Per Cycle: 2.98 +# CHECK-NEXT: IPC: 0.99 # CHECK-NEXT: Block RThroughput: 6.7 # CHECK: Timeline view: -# CHECK-NEXT: 01234 +# CHECK-NEXT: 01234567 # CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeeeeeER. . ld1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64 -# CHECK-NEXT: [0,1] D=eE-----R. . add x0, x27, #1 -# CHECK-NEXT: [0,2] .DeeeeeeeER . ld1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28 -# CHECK-NEXT: [0,3] .D=eE-----R . add x0, x27, #1 -# CHECK-NEXT: [0,4] . DeeeeeeeER . ld1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28 -# CHECK-NEXT: [0,5] . D=eE-----R . add x0, x27, #1 -# CHECK-NEXT: [0,6] . D=eeeeeeeER. ld1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28 -# CHECK-NEXT: [0,7] . D==eE-----R. add x0, x27, #1 -# CHECK-NEXT: [0,8] . D=eeeeeeeER ld1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28 -# CHECK-NEXT: [0,9] . D==eE-----R add x0, x27, #1 +# CHECK: [0,0] DeeeeeeeER. . . ld1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64 +# CHECK-NEXT: [0,1] .DeE-----R. . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeeeeeeER . . ld1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28 +# CHECK-NEXT: [0,3] . DeE-----R . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeeeeeeER . . ld1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28 +# CHECK-NEXT: [0,5] . DeE-----R . . add x0, x27, #1 +# CHECK-NEXT: [0,6] . .DeeeeeeeER . ld1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28 +# CHECK-NEXT: [0,7] . . DeE-----R . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . DeeeeeeeER ld1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28 +# CHECK-NEXT: [0,9] . . DeE-----R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -1719,43 +1719,43 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64 -# CHECK-NEXT: 1. 1 2.0 0.0 5.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 1.0 0.0 0.0 ld1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28 -# CHECK-NEXT: 3. 1 2.0 0.0 5.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 1.0 0.0 0.0 ld1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28 -# CHECK-NEXT: 5. 1 2.0 0.0 5.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 2.0 1.0 0.0 ld1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28 -# CHECK-NEXT: 7. 1 3.0 0.0 5.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 2.0 0.0 0.0 ld1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28 -# CHECK-NEXT: 9. 1 3.0 0.0 5.0 add x0, x27, #1 -# CHECK-NEXT: 1 1.9 0.2 2.5 +# CHECK-NEXT: 1. 1 1.0 0.0 5.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 1.0 1.0 0.0 ld1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28 +# CHECK-NEXT: 3. 1 1.0 0.0 5.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 1.0 0.0 ld1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28 +# CHECK-NEXT: 5. 1 1.0 0.0 5.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 1.0 0.0 ld1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28 +# CHECK-NEXT: 7. 1 1.0 0.0 5.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 1.0 0.0 ld1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28 +# CHECK-NEXT: 9. 1 1.0 0.0 5.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.0 0.5 2.5 # CHECK: [12] Code Region - G13 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 1210 +# CHECK-NEXT: Total Cycles: 1212 # CHECK-NEXT: Total uOps: 2800 -# CHECK: Dispatch Width: 10 +# CHECK: Dispatch Width: 5 # CHECK-NEXT: uOps Per Cycle: 2.31 # CHECK-NEXT: IPC: 0.83 # CHECK-NEXT: Block RThroughput: 5.7 # CHECK: Timeline view: # CHECK-NEXT: 0123456789 -# CHECK-NEXT: Index 0123456789 01 - -# CHECK: [0,0] DeeeeeeeER. . .. ld1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28 -# CHECK-NEXT: [0,1] D=eE-----R. . .. add x0, x27, #1 -# CHECK-NEXT: [0,2] .DeeeeeeeER . .. ld1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28 -# CHECK-NEXT: [0,3] .D=eE-----R . .. add x0, x27, #1 -# CHECK-NEXT: [0,4] . DeeeeeeeER . .. ld1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28 -# CHECK-NEXT: [0,5] . D=eE-----R . .. add x0, x27, #1 -# CHECK-NEXT: [0,6] . D=eeeeeeeER . .. ld1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28 -# CHECK-NEXT: [0,7] . D==eE-----R . .. add x0, x27, #1 -# CHECK-NEXT: [0,8] . D========eeeeeeeeER ld1 { v1.b }[0], [x27], #1 -# CHECK-NEXT: [0,9] . D=========eE------R add x0, x27, #1 +# CHECK-NEXT: Index 0123456789 0123 + +# CHECK: [0,0] DeeeeeeeER. . . . ld1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28 +# CHECK-NEXT: [0,1] .DeE-----R. . . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeeeeeeER . . . ld1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28 +# CHECK-NEXT: [0,3] . DeE-----R . . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeeeeeeER . . . ld1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28 +# CHECK-NEXT: [0,5] . DeE-----R . . . add x0, x27, #1 +# CHECK-NEXT: [0,6] . .DeeeeeeeER . . ld1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28 +# CHECK-NEXT: [0,7] . . DeE-----R . . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . D======eeeeeeeeER ld1 { v1.b }[0], [x27], #1 +# CHECK-NEXT: [0,9] . . D=======eE------R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -1765,16 +1765,16 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28 -# CHECK-NEXT: 1. 1 2.0 0.0 5.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 1.0 0.0 0.0 ld1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28 -# CHECK-NEXT: 3. 1 2.0 0.0 5.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 1.0 0.0 0.0 ld1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28 -# CHECK-NEXT: 5. 1 2.0 0.0 5.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 2.0 1.0 0.0 ld1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28 -# CHECK-NEXT: 7. 1 3.0 0.0 5.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 9.0 0.0 0.0 ld1 { v1.b }[0], [x27], #1 -# CHECK-NEXT: 9. 1 10.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 1 3.3 0.2 2.6 +# CHECK-NEXT: 1. 1 1.0 0.0 5.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 1.0 1.0 0.0 ld1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28 +# CHECK-NEXT: 3. 1 1.0 0.0 5.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 1.0 0.0 ld1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28 +# CHECK-NEXT: 5. 1 1.0 0.0 5.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 1.0 0.0 ld1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28 +# CHECK-NEXT: 7. 1 1.0 0.0 5.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 7.0 0.0 0.0 ld1 { v1.b }[0], [x27], #1 +# CHECK-NEXT: 9. 1 8.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 1 2.3 0.4 2.6 # CHECK: [13] Code Region - G14 @@ -1783,10 +1783,10 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 4003 # CHECK-NEXT: Total uOps: 2000 -# CHECK: Dispatch Width: 10 +# CHECK: Dispatch Width: 5 # CHECK-NEXT: uOps Per Cycle: 0.50 # CHECK-NEXT: IPC: 0.25 -# CHECK-NEXT: Block RThroughput: 2.5 +# CHECK-NEXT: Block RThroughput: 4.0 # CHECK: Timeline view: # CHECK-NEXT: 0123456789 0123456789 @@ -1794,14 +1794,14 @@ add x0, x27, 1 # CHECK: [0,0] DeeeeeeeeER . . . . . . . ld1 { v1.b }[8], [x27], #1 # CHECK-NEXT: [0,1] D=eE------R . . . . . . . add x0, x27, #1 -# CHECK-NEXT: [0,2] D========eeeeeeeeER . . . . . . ld1 { v1.b }[0], [x27], x28 -# CHECK-NEXT: [0,3] D=========eE------R . . . . . . add x0, x27, #1 -# CHECK-NEXT: [0,4] .D===============eeeeeeeeER . . . . ld1 { v1.b }[8], [x27], x28 -# CHECK-NEXT: [0,5] .D================eE------R . . . . add x0, x27, #1 -# CHECK-NEXT: [0,6] .D=======================eeeeeeeeER. . . ld1 { v1.h }[0], [x27], #2 -# CHECK-NEXT: [0,7] .D========================eE------R. . . add x0, x27, #1 -# CHECK-NEXT: [0,8] . D==============================eeeeeeeeER ld1 { v1.h }[4], [x27], #2 -# CHECK-NEXT: [0,9] . D===============================eE------R add x0, x27, #1 +# CHECK-NEXT: [0,2] .D=======eeeeeeeeER . . . . . . ld1 { v1.b }[0], [x27], x28 +# CHECK-NEXT: [0,3] .D========eE------R . . . . . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . D==============eeeeeeeeER . . . . ld1 { v1.b }[8], [x27], x28 +# CHECK-NEXT: [0,5] . D===============eE------R . . . . add x0, x27, #1 +# CHECK-NEXT: [0,6] . D=====================eeeeeeeeER. . . ld1 { v1.h }[0], [x27], #2 +# CHECK-NEXT: [0,7] . D======================eE------R. . . add x0, x27, #1 +# CHECK-NEXT: [0,8] . D============================eeeeeeeeER ld1 { v1.h }[4], [x27], #2 +# CHECK-NEXT: [0,9] . D=============================eE------R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -1812,15 +1812,15 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.b }[8], [x27], #1 # CHECK-NEXT: 1. 1 2.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 9.0 0.0 0.0 ld1 { v1.b }[0], [x27], x28 -# CHECK-NEXT: 3. 1 10.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 16.0 0.0 0.0 ld1 { v1.b }[8], [x27], x28 -# CHECK-NEXT: 5. 1 17.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 24.0 0.0 0.0 ld1 { v1.h }[0], [x27], #2 -# CHECK-NEXT: 7. 1 25.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 31.0 0.0 0.0 ld1 { v1.h }[4], [x27], #2 -# CHECK-NEXT: 9. 1 32.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 1 16.7 0.1 3.0 +# CHECK-NEXT: 2. 1 8.0 0.0 0.0 ld1 { v1.b }[0], [x27], x28 +# CHECK-NEXT: 3. 1 9.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 15.0 0.0 0.0 ld1 { v1.b }[8], [x27], x28 +# CHECK-NEXT: 5. 1 16.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 22.0 0.0 0.0 ld1 { v1.h }[0], [x27], #2 +# CHECK-NEXT: 7. 1 23.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 29.0 0.0 0.0 ld1 { v1.h }[4], [x27], #2 +# CHECK-NEXT: 9. 1 30.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 1 15.5 0.1 3.0 # CHECK: [14] Code Region - G15 @@ -1829,10 +1829,10 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 4003 # CHECK-NEXT: Total uOps: 2000 -# CHECK: Dispatch Width: 10 +# CHECK: Dispatch Width: 5 # CHECK-NEXT: uOps Per Cycle: 0.50 # CHECK-NEXT: IPC: 0.25 -# CHECK-NEXT: Block RThroughput: 2.5 +# CHECK-NEXT: Block RThroughput: 4.0 # CHECK: Timeline view: # CHECK-NEXT: 0123456789 0123456789 @@ -1840,14 +1840,14 @@ add x0, x27, 1 # CHECK: [0,0] DeeeeeeeeER . . . . . . . ld1 { v1.h }[0], [x27], x28 # CHECK-NEXT: [0,1] D=eE------R . . . . . . . add x0, x27, #1 -# CHECK-NEXT: [0,2] D========eeeeeeeeER . . . . . . ld1 { v1.h }[4], [x27], x28 -# CHECK-NEXT: [0,3] D=========eE------R . . . . . . add x0, x27, #1 -# CHECK-NEXT: [0,4] .D===============eeeeeeeeER . . . . ld1 { v1.s }[0], [x27], #4 -# CHECK-NEXT: [0,5] .D================eE------R . . . . add x0, x27, #1 -# CHECK-NEXT: [0,6] .D=======================eeeeeeeeER. . . ld1 { v1.s }[0], [x27], x28 -# CHECK-NEXT: [0,7] .D========================eE------R. . . add x0, x27, #1 -# CHECK-NEXT: [0,8] . D==============================eeeeeeeeER ld1 { v1.d }[0], [x27], #8 -# CHECK-NEXT: [0,9] . D===============================eE------R add x0, x27, #1 +# CHECK-NEXT: [0,2] .D=======eeeeeeeeER . . . . . . ld1 { v1.h }[4], [x27], x28 +# CHECK-NEXT: [0,3] .D========eE------R . . . . . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . D==============eeeeeeeeER . . . . ld1 { v1.s }[0], [x27], #4 +# CHECK-NEXT: [0,5] . D===============eE------R . . . . add x0, x27, #1 +# CHECK-NEXT: [0,6] . D=====================eeeeeeeeER. . . ld1 { v1.s }[0], [x27], x28 +# CHECK-NEXT: [0,7] . D======================eE------R. . . add x0, x27, #1 +# CHECK-NEXT: [0,8] . D============================eeeeeeeeER ld1 { v1.d }[0], [x27], #8 +# CHECK-NEXT: [0,9] . D=============================eE------R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -1858,15 +1858,15 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.h }[0], [x27], x28 # CHECK-NEXT: 1. 1 2.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 9.0 0.0 0.0 ld1 { v1.h }[4], [x27], x28 -# CHECK-NEXT: 3. 1 10.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 16.0 0.0 0.0 ld1 { v1.s }[0], [x27], #4 -# CHECK-NEXT: 5. 1 17.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 24.0 0.0 0.0 ld1 { v1.s }[0], [x27], x28 -# CHECK-NEXT: 7. 1 25.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 31.0 0.0 0.0 ld1 { v1.d }[0], [x27], #8 -# CHECK-NEXT: 9. 1 32.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 1 16.7 0.1 3.0 +# CHECK-NEXT: 2. 1 8.0 0.0 0.0 ld1 { v1.h }[4], [x27], x28 +# CHECK-NEXT: 3. 1 9.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 15.0 0.0 0.0 ld1 { v1.s }[0], [x27], #4 +# CHECK-NEXT: 5. 1 16.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 22.0 0.0 0.0 ld1 { v1.s }[0], [x27], x28 +# CHECK-NEXT: 7. 1 23.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 29.0 0.0 0.0 ld1 { v1.d }[0], [x27], #8 +# CHECK-NEXT: 9. 1 30.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 1 15.5 0.1 3.0 # CHECK: [15] Code Region - G16 @@ -1875,10 +1875,10 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 1003 # CHECK-NEXT: Total uOps: 1600 -# CHECK: Dispatch Width: 10 +# CHECK: Dispatch Width: 5 # CHECK-NEXT: uOps Per Cycle: 1.60 # CHECK-NEXT: IPC: 1.00 -# CHECK-NEXT: Block RThroughput: 2.5 +# CHECK-NEXT: Block RThroughput: 3.2 # CHECK: Timeline view: # CHECK-NEXT: 012 @@ -1886,14 +1886,14 @@ add x0, x27, 1 # CHECK: [0,0] DeeeeeeeeER . ld1 { v1.d }[0], [x27], x28 # CHECK-NEXT: [0,1] D=eE------R . add x0, x27, #1 -# CHECK-NEXT: [0,2] D=eeeeeeE-R . ld1r { v1.1d }, [x27], #8 -# CHECK-NEXT: [0,3] D==eE-----R . add x0, x27, #1 -# CHECK-NEXT: [0,4] D==eeeeeeER . ld1r { v1.2d }, [x27], #8 -# CHECK-NEXT: [0,5] D===eE----R . add x0, x27, #1 -# CHECK-NEXT: [0,6] .D==eeeeeeER. ld1r { v1.2s }, [x27], #4 -# CHECK-NEXT: [0,7] .D===eE----R. add x0, x27, #1 -# CHECK-NEXT: [0,8] .D===eeeeeeER ld1r { v1.4h }, [x27], #2 -# CHECK-NEXT: [0,9] .D====eE----R add x0, x27, #1 +# CHECK-NEXT: [0,2] .DeeeeeeE-R . ld1r { v1.1d }, [x27], #8 +# CHECK-NEXT: [0,3] .D=eE-----R . add x0, x27, #1 +# CHECK-NEXT: [0,4] .D=eeeeeeER . ld1r { v1.2d }, [x27], #8 +# CHECK-NEXT: [0,5] . D=eE----R . add x0, x27, #1 +# CHECK-NEXT: [0,6] . D=eeeeeeER. ld1r { v1.2s }, [x27], #4 +# CHECK-NEXT: [0,7] . D==eE----R. add x0, x27, #1 +# CHECK-NEXT: [0,8] . D=eeeeeeER ld1r { v1.4h }, [x27], #2 +# CHECK-NEXT: [0,9] . D==eE----R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -1904,15 +1904,15 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.d }[0], [x27], x28 # CHECK-NEXT: 1. 1 2.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 2.0 0.0 1.0 ld1r { v1.1d }, [x27], #8 -# CHECK-NEXT: 3. 1 3.0 0.0 5.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ld1r { v1.2d }, [x27], #8 -# CHECK-NEXT: 5. 1 4.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ld1r { v1.2s }, [x27], #4 -# CHECK-NEXT: 7. 1 4.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 4.0 0.0 0.0 ld1r { v1.4h }, [x27], #2 -# CHECK-NEXT: 9. 1 5.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 1 3.1 0.1 2.4 +# CHECK-NEXT: 2. 1 1.0 0.0 1.0 ld1r { v1.1d }, [x27], #8 +# CHECK-NEXT: 3. 1 2.0 0.0 5.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 2.0 0.0 0.0 ld1r { v1.2d }, [x27], #8 +# CHECK-NEXT: 5. 1 2.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 2.0 0.0 0.0 ld1r { v1.2s }, [x27], #4 +# CHECK-NEXT: 7. 1 3.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 2.0 0.0 0.0 ld1r { v1.4h }, [x27], #2 +# CHECK-NEXT: 9. 1 3.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 1 2.0 0.1 2.4 # CHECK: [16] Code Region - G17 @@ -1921,10 +1921,10 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 508 # CHECK-NEXT: Total uOps: 1500 -# CHECK: Dispatch Width: 10 +# CHECK: Dispatch Width: 5 # CHECK-NEXT: uOps Per Cycle: 2.95 # CHECK-NEXT: IPC: 1.97 -# CHECK-NEXT: Block RThroughput: 2.5 +# CHECK-NEXT: Block RThroughput: 3.0 # CHECK: Timeline view: # CHECK-NEXT: 012 @@ -1933,13 +1933,13 @@ add x0, x27, 1 # CHECK: [0,0] DeeeeeeER . . ld1r { v1.4s }, [x27], #4 # CHECK-NEXT: [0,1] D=eE----R . . add x0, x27, #1 # CHECK-NEXT: [0,2] D=eeeeeeER. . ld1r { v1.8b }, [x27], #1 -# CHECK-NEXT: [0,3] D==eE----R. . add x0, x27, #1 -# CHECK-NEXT: [0,4] D==eeeeeeER . ld1r { v1.8h }, [x27], #2 -# CHECK-NEXT: [0,5] D===eE----R . add x0, x27, #1 -# CHECK-NEXT: [0,6] .D==eeeeeeER. ld1r { v1.16b }, [x27], #1 -# CHECK-NEXT: [0,7] .D===eE----R. add x0, x27, #1 -# CHECK-NEXT: [0,8] .D===eeeeeeER ld1r { v1.1d }, [x27], x28 -# CHECK-NEXT: [0,9] .D====eE----R add x0, x27, #1 +# CHECK-NEXT: [0,3] .D=eE----R. . add x0, x27, #1 +# CHECK-NEXT: [0,4] .D=eeeeeeER . ld1r { v1.8h }, [x27], #2 +# CHECK-NEXT: [0,5] .D==eE----R . add x0, x27, #1 +# CHECK-NEXT: [0,6] . D=eeeeeeER. ld1r { v1.16b }, [x27], #1 +# CHECK-NEXT: [0,7] . D==eE----R. add x0, x27, #1 +# CHECK-NEXT: [0,8] . D==eeeeeeER ld1r { v1.1d }, [x27], x28 +# CHECK-NEXT: [0,9] . D==eE----R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -1951,14 +1951,14 @@ add x0, x27, 1 # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1r { v1.4s }, [x27], #4 # CHECK-NEXT: 1. 1 2.0 0.0 4.0 add x0, x27, #1 # CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld1r { v1.8b }, [x27], #1 -# CHECK-NEXT: 3. 1 3.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ld1r { v1.8h }, [x27], #2 -# CHECK-NEXT: 5. 1 4.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ld1r { v1.16b }, [x27], #1 -# CHECK-NEXT: 7. 1 4.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 4.0 0.0 0.0 ld1r { v1.1d }, [x27], x28 -# CHECK-NEXT: 9. 1 5.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 1 3.1 0.1 2.0 +# CHECK-NEXT: 3. 1 2.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 2.0 0.0 0.0 ld1r { v1.8h }, [x27], #2 +# CHECK-NEXT: 5. 1 3.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 2.0 0.0 0.0 ld1r { v1.16b }, [x27], #1 +# CHECK-NEXT: 7. 1 3.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 3.0 0.0 0.0 ld1r { v1.1d }, [x27], x28 +# CHECK-NEXT: 9. 1 3.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 1 2.3 0.1 2.0 # CHECK: [17] Code Region - G18 @@ -1967,10 +1967,10 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 508 # CHECK-NEXT: Total uOps: 1500 -# CHECK: Dispatch Width: 10 +# CHECK: Dispatch Width: 5 # CHECK-NEXT: uOps Per Cycle: 2.95 # CHECK-NEXT: IPC: 1.97 -# CHECK-NEXT: Block RThroughput: 2.5 +# CHECK-NEXT: Block RThroughput: 3.0 # CHECK: Timeline view: # CHECK-NEXT: 012 @@ -1979,13 +1979,13 @@ add x0, x27, 1 # CHECK: [0,0] DeeeeeeER . . ld1r { v1.2d }, [x27], x28 # CHECK-NEXT: [0,1] D=eE----R . . add x0, x27, #1 # CHECK-NEXT: [0,2] D=eeeeeeER. . ld1r { v1.2s }, [x27], x28 -# CHECK-NEXT: [0,3] D==eE----R. . add x0, x27, #1 -# CHECK-NEXT: [0,4] D==eeeeeeER . ld1r { v1.4h }, [x27], x28 -# CHECK-NEXT: [0,5] D===eE----R . add x0, x27, #1 -# CHECK-NEXT: [0,6] .D==eeeeeeER. ld1r { v1.4s }, [x27], x28 -# CHECK-NEXT: [0,7] .D===eE----R. add x0, x27, #1 -# CHECK-NEXT: [0,8] .D===eeeeeeER ld1r { v1.8b }, [x27], x28 -# CHECK-NEXT: [0,9] .D====eE----R add x0, x27, #1 +# CHECK-NEXT: [0,3] .D=eE----R. . add x0, x27, #1 +# CHECK-NEXT: [0,4] .D=eeeeeeER . ld1r { v1.4h }, [x27], x28 +# CHECK-NEXT: [0,5] .D==eE----R . add x0, x27, #1 +# CHECK-NEXT: [0,6] . D=eeeeeeER. ld1r { v1.4s }, [x27], x28 +# CHECK-NEXT: [0,7] . D==eE----R. add x0, x27, #1 +# CHECK-NEXT: [0,8] . D==eeeeeeER ld1r { v1.8b }, [x27], x28 +# CHECK-NEXT: [0,9] . D==eE----R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -1997,14 +1997,14 @@ add x0, x27, 1 # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1r { v1.2d }, [x27], x28 # CHECK-NEXT: 1. 1 2.0 0.0 4.0 add x0, x27, #1 # CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld1r { v1.2s }, [x27], x28 -# CHECK-NEXT: 3. 1 3.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ld1r { v1.4h }, [x27], x28 -# CHECK-NEXT: 5. 1 4.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ld1r { v1.4s }, [x27], x28 -# CHECK-NEXT: 7. 1 4.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 4.0 0.0 0.0 ld1r { v1.8b }, [x27], x28 -# CHECK-NEXT: 9. 1 5.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 1 3.1 0.1 2.0 +# CHECK-NEXT: 3. 1 2.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 2.0 0.0 0.0 ld1r { v1.4h }, [x27], x28 +# CHECK-NEXT: 5. 1 3.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 2.0 0.0 0.0 ld1r { v1.4s }, [x27], x28 +# CHECK-NEXT: 7. 1 3.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 3.0 0.0 0.0 ld1r { v1.8b }, [x27], x28 +# CHECK-NEXT: 9. 1 3.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 1 2.3 0.1 2.0 # CHECK: [18] Code Region - G19 @@ -2013,10 +2013,10 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 510 # CHECK-NEXT: Total uOps: 1900 -# CHECK: Dispatch Width: 10 +# CHECK: Dispatch Width: 5 # CHECK-NEXT: uOps Per Cycle: 3.73 # CHECK-NEXT: IPC: 1.96 -# CHECK-NEXT: Block RThroughput: 2.5 +# CHECK-NEXT: Block RThroughput: 3.8 # CHECK: Timeline view: # CHECK-NEXT: 01234 @@ -2025,13 +2025,13 @@ add x0, x27, 1 # CHECK: [0,0] DeeeeeeER . . ld1r { v1.8h }, [x27], x28 # CHECK-NEXT: [0,1] D=eE----R . . add x0, x27, #1 # CHECK-NEXT: [0,2] D=eeeeeeER. . ld1r { v1.16b }, [x27], x28 -# CHECK-NEXT: [0,3] D==eE----R. . add x0, x27, #1 -# CHECK-NEXT: [0,4] D==eeeeeeeeER . ld2 { v1.2d, v2.2d }, [x27], #32 -# CHECK-NEXT: [0,5] .D==eE------R . add x0, x27, #1 -# CHECK-NEXT: [0,6] .D==eeeeeeeeER. ld2 { v1.2s, v2.2s }, [x27], #16 -# CHECK-NEXT: [0,7] .D===eE------R. add x0, x27, #1 -# CHECK-NEXT: [0,8] .D===eeeeeeeeER ld2 { v1.4h, v2.4h }, [x27], #16 -# CHECK-NEXT: [0,9] .D====eE------R add x0, x27, #1 +# CHECK-NEXT: [0,3] .D=eE----R. . add x0, x27, #1 +# CHECK-NEXT: [0,4] .D=eeeeeeeeER . ld2 { v1.2d, v2.2d }, [x27], #32 +# CHECK-NEXT: [0,5] . D=eE------R . add x0, x27, #1 +# CHECK-NEXT: [0,6] . D=eeeeeeeeER. ld2 { v1.2s, v2.2s }, [x27], #16 +# CHECK-NEXT: [0,7] . D==eE------R. add x0, x27, #1 +# CHECK-NEXT: [0,8] . D=eeeeeeeeER ld2 { v1.4h, v2.4h }, [x27], #16 +# CHECK-NEXT: [0,9] . D==eE------R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -2043,14 +2043,14 @@ add x0, x27, 1 # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1r { v1.8h }, [x27], x28 # CHECK-NEXT: 1. 1 2.0 0.0 4.0 add x0, x27, #1 # CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld1r { v1.16b }, [x27], x28 -# CHECK-NEXT: 3. 1 3.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ld2 { v1.2d, v2.2d }, [x27], #32 -# CHECK-NEXT: 5. 1 3.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ld2 { v1.2s, v2.2s }, [x27], #16 -# CHECK-NEXT: 7. 1 4.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 4.0 0.0 0.0 ld2 { v1.4h, v2.4h }, [x27], #16 -# CHECK-NEXT: 9. 1 5.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 1 3.0 0.1 2.6 +# CHECK-NEXT: 3. 1 2.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 2.0 0.0 0.0 ld2 { v1.2d, v2.2d }, [x27], #32 +# CHECK-NEXT: 5. 1 2.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 2.0 0.0 0.0 ld2 { v1.2s, v2.2s }, [x27], #16 +# CHECK-NEXT: 7. 1 3.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 2.0 0.0 0.0 ld2 { v1.4h, v2.4h }, [x27], #16 +# CHECK-NEXT: 9. 1 3.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 1 2.1 0.1 2.6 # CHECK: [19] Code Region - G20 @@ -2059,10 +2059,10 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 510 # CHECK-NEXT: Total uOps: 2400 -# CHECK: Dispatch Width: 10 +# CHECK: Dispatch Width: 5 # CHECK-NEXT: uOps Per Cycle: 4.71 # CHECK-NEXT: IPC: 1.96 -# CHECK-NEXT: Block RThroughput: 3.0 +# CHECK-NEXT: Block RThroughput: 4.8 # CHECK: Timeline view: # CHECK-NEXT: 01234 @@ -2070,14 +2070,14 @@ add x0, x27, 1 # CHECK: [0,0] DeeeeeeeeER . ld2 { v1.4s, v2.4s }, [x27], #32 # CHECK-NEXT: [0,1] D=eE------R . add x0, x27, #1 -# CHECK-NEXT: [0,2] D=eeeeeeeeER . ld2 { v1.8b, v2.8b }, [x27], #16 -# CHECK-NEXT: [0,3] D==eE------R . add x0, x27, #1 -# CHECK-NEXT: [0,4] .D=eeeeeeeeER . ld2 { v1.8h, v2.8h }, [x27], #32 -# CHECK-NEXT: [0,5] .D==eE------R . add x0, x27, #1 -# CHECK-NEXT: [0,6] .D==eeeeeeeeER. ld2 { v1.16b, v2.16b }, [x27], #32 -# CHECK-NEXT: [0,7] .D===eE------R. add x0, x27, #1 -# CHECK-NEXT: [0,8] . D==eeeeeeeeER ld2 { v1.2d, v2.2d }, [x27], x28 -# CHECK-NEXT: [0,9] . D===eE------R add x0, x27, #1 +# CHECK-NEXT: [0,2] .DeeeeeeeeER . ld2 { v1.8b, v2.8b }, [x27], #16 +# CHECK-NEXT: [0,3] .D=eE------R . add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeeeeeeeER . ld2 { v1.8h, v2.8h }, [x27], #32 +# CHECK-NEXT: [0,5] . D=eE------R . add x0, x27, #1 +# CHECK-NEXT: [0,6] . DeeeeeeeeER. ld2 { v1.16b, v2.16b }, [x27], #32 +# CHECK-NEXT: [0,7] . D=eE------R. add x0, x27, #1 +# CHECK-NEXT: [0,8] . DeeeeeeeeER ld2 { v1.2d, v2.2d }, [x27], x28 +# CHECK-NEXT: [0,9] . D=eE------R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -2088,15 +2088,15 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld2 { v1.4s, v2.4s }, [x27], #32 # CHECK-NEXT: 1. 1 2.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld2 { v1.8b, v2.8b }, [x27], #16 -# CHECK-NEXT: 3. 1 3.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 2.0 0.0 0.0 ld2 { v1.8h, v2.8h }, [x27], #32 -# CHECK-NEXT: 5. 1 3.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ld2 { v1.16b, v2.16b }, [x27], #32 -# CHECK-NEXT: 7. 1 4.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 3.0 0.0 0.0 ld2 { v1.2d, v2.2d }, [x27], x28 -# CHECK-NEXT: 9. 1 4.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 1 2.7 0.1 3.0 +# CHECK-NEXT: 2. 1 1.0 0.0 0.0 ld2 { v1.8b, v2.8b }, [x27], #16 +# CHECK-NEXT: 3. 1 2.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 0.0 0.0 ld2 { v1.8h, v2.8h }, [x27], #32 +# CHECK-NEXT: 5. 1 2.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 0.0 0.0 ld2 { v1.16b, v2.16b }, [x27], #32 +# CHECK-NEXT: 7. 1 2.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 0.0 0.0 ld2 { v1.2d, v2.2d }, [x27], x28 +# CHECK-NEXT: 9. 1 2.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.5 0.1 3.0 # CHECK: [20] Code Region - G21 @@ -2105,10 +2105,10 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 510 # CHECK-NEXT: Total uOps: 2200 -# CHECK: Dispatch Width: 10 +# CHECK: Dispatch Width: 5 # CHECK-NEXT: uOps Per Cycle: 4.31 # CHECK-NEXT: IPC: 1.96 -# CHECK-NEXT: Block RThroughput: 2.5 +# CHECK-NEXT: Block RThroughput: 4.4 # CHECK: Timeline view: # CHECK-NEXT: 01234 @@ -2116,14 +2116,14 @@ add x0, x27, 1 # CHECK: [0,0] DeeeeeeeeER . ld2 { v1.2s, v2.2s }, [x27], x28 # CHECK-NEXT: [0,1] D=eE------R . add x0, x27, #1 -# CHECK-NEXT: [0,2] D=eeeeeeeeER . ld2 { v1.4h, v2.4h }, [x27], x28 -# CHECK-NEXT: [0,3] D==eE------R . add x0, x27, #1 -# CHECK-NEXT: [0,4] .D=eeeeeeeeER . ld2 { v1.4s, v2.4s }, [x27], x28 -# CHECK-NEXT: [0,5] .D==eE------R . add x0, x27, #1 -# CHECK-NEXT: [0,6] .D==eeeeeeeeER. ld2 { v1.8b, v2.8b }, [x27], x28 -# CHECK-NEXT: [0,7] .D===eE------R. add x0, x27, #1 -# CHECK-NEXT: [0,8] . D==eeeeeeeeER ld2 { v1.8h, v2.8h }, [x27], x28 -# CHECK-NEXT: [0,9] . D===eE------R add x0, x27, #1 +# CHECK-NEXT: [0,2] .DeeeeeeeeER . ld2 { v1.4h, v2.4h }, [x27], x28 +# CHECK-NEXT: [0,3] .D=eE------R . add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeeeeeeeER . ld2 { v1.4s, v2.4s }, [x27], x28 +# CHECK-NEXT: [0,5] . D=eE------R . add x0, x27, #1 +# CHECK-NEXT: [0,6] . DeeeeeeeeER. ld2 { v1.8b, v2.8b }, [x27], x28 +# CHECK-NEXT: [0,7] . D=eE------R. add x0, x27, #1 +# CHECK-NEXT: [0,8] . DeeeeeeeeER ld2 { v1.8h, v2.8h }, [x27], x28 +# CHECK-NEXT: [0,9] . D=eE------R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -2134,15 +2134,15 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld2 { v1.2s, v2.2s }, [x27], x28 # CHECK-NEXT: 1. 1 2.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld2 { v1.4h, v2.4h }, [x27], x28 -# CHECK-NEXT: 3. 1 3.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 2.0 0.0 0.0 ld2 { v1.4s, v2.4s }, [x27], x28 -# CHECK-NEXT: 5. 1 3.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ld2 { v1.8b, v2.8b }, [x27], x28 -# CHECK-NEXT: 7. 1 4.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 3.0 0.0 0.0 ld2 { v1.8h, v2.8h }, [x27], x28 -# CHECK-NEXT: 9. 1 4.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 1 2.7 0.1 3.0 +# CHECK-NEXT: 2. 1 1.0 0.0 0.0 ld2 { v1.4h, v2.4h }, [x27], x28 +# CHECK-NEXT: 3. 1 2.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 0.0 0.0 ld2 { v1.4s, v2.4s }, [x27], x28 +# CHECK-NEXT: 5. 1 2.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 0.0 0.0 ld2 { v1.8b, v2.8b }, [x27], x28 +# CHECK-NEXT: 7. 1 2.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 0.0 0.0 ld2 { v1.8h, v2.8h }, [x27], x28 +# CHECK-NEXT: 9. 1 2.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.5 0.1 3.0 # CHECK: [21] Code Region - G22 @@ -2151,10 +2151,10 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 3310 # CHECK-NEXT: Total uOps: 2100 -# CHECK: Dispatch Width: 10 +# CHECK: Dispatch Width: 5 # CHECK-NEXT: uOps Per Cycle: 0.63 # CHECK-NEXT: IPC: 0.30 -# CHECK-NEXT: Block RThroughput: 2.5 +# CHECK-NEXT: Block RThroughput: 4.2 # CHECK: Timeline view: # CHECK-NEXT: 0123456789 0123456789 @@ -2162,14 +2162,14 @@ add x0, x27, 1 # CHECK: [0,0] DeeeeeeeeER . . . . . . . ld2 { v1.16b, v2.16b }, [x27], x28 # CHECK-NEXT: [0,1] D=eE------R . . . . . . . add x0, x27, #1 -# CHECK-NEXT: [0,2] D========eeeeeeeeER . . . . . . ld2 { v1.b, v2.b }[0], [x27], #2 -# CHECK-NEXT: [0,3] D=========eE------R . . . . . . add x0, x27, #1 -# CHECK-NEXT: [0,4] .D===============eeeeeeeeER . . . . ld2 { v1.b, v2.b }[8], [x27], #2 -# CHECK-NEXT: [0,5] .D================eE------R . . . . add x0, x27, #1 -# CHECK-NEXT: [0,6] .D=======================eeeeeeeeER. . . ld2 { v1.b, v2.b }[0], [x27], x28 -# CHECK-NEXT: [0,7] .D========================eE------R. . . add x0, x27, #1 -# CHECK-NEXT: [0,8] . D==============================eeeeeeeeER ld2 { v1.b, v2.b }[8], [x27], x28 -# CHECK-NEXT: [0,9] . D===============================eE------R add x0, x27, #1 +# CHECK-NEXT: [0,2] .D=======eeeeeeeeER . . . . . . ld2 { v1.b, v2.b }[0], [x27], #2 +# CHECK-NEXT: [0,3] .D========eE------R . . . . . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . D==============eeeeeeeeER . . . . ld2 { v1.b, v2.b }[8], [x27], #2 +# CHECK-NEXT: [0,5] . D===============eE------R . . . . add x0, x27, #1 +# CHECK-NEXT: [0,6] . D=====================eeeeeeeeER. . . ld2 { v1.b, v2.b }[0], [x27], x28 +# CHECK-NEXT: [0,7] . D======================eE------R. . . add x0, x27, #1 +# CHECK-NEXT: [0,8] . D============================eeeeeeeeER ld2 { v1.b, v2.b }[8], [x27], x28 +# CHECK-NEXT: [0,9] . D=============================eE------R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -2180,15 +2180,15 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld2 { v1.16b, v2.16b }, [x27], x28 # CHECK-NEXT: 1. 1 2.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 9.0 0.0 0.0 ld2 { v1.b, v2.b }[0], [x27], #2 -# CHECK-NEXT: 3. 1 10.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 16.0 0.0 0.0 ld2 { v1.b, v2.b }[8], [x27], #2 -# CHECK-NEXT: 5. 1 17.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 24.0 0.0 0.0 ld2 { v1.b, v2.b }[0], [x27], x28 -# CHECK-NEXT: 7. 1 25.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 31.0 0.0 0.0 ld2 { v1.b, v2.b }[8], [x27], x28 -# CHECK-NEXT: 9. 1 32.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 1 16.7 0.1 3.0 +# CHECK-NEXT: 2. 1 8.0 0.0 0.0 ld2 { v1.b, v2.b }[0], [x27], #2 +# CHECK-NEXT: 3. 1 9.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 15.0 0.0 0.0 ld2 { v1.b, v2.b }[8], [x27], #2 +# CHECK-NEXT: 5. 1 16.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 22.0 0.0 0.0 ld2 { v1.b, v2.b }[0], [x27], x28 +# CHECK-NEXT: 7. 1 23.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 29.0 0.0 0.0 ld2 { v1.b, v2.b }[8], [x27], x28 +# CHECK-NEXT: 9. 1 30.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 1 15.5 0.1 3.0 # CHECK: [22] Code Region - G23 @@ -2197,10 +2197,10 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 4003 # CHECK-NEXT: Total uOps: 2000 -# CHECK: Dispatch Width: 10 +# CHECK: Dispatch Width: 5 # CHECK-NEXT: uOps Per Cycle: 0.50 # CHECK-NEXT: IPC: 0.25 -# CHECK-NEXT: Block RThroughput: 2.5 +# CHECK-NEXT: Block RThroughput: 4.0 # CHECK: Timeline view: # CHECK-NEXT: 0123456789 0123456789 @@ -2208,14 +2208,14 @@ add x0, x27, 1 # CHECK: [0,0] DeeeeeeeeER . . . . . . . ld2 { v1.h, v2.h }[0], [x27], #4 # CHECK-NEXT: [0,1] D=eE------R . . . . . . . add x0, x27, #1 -# CHECK-NEXT: [0,2] D========eeeeeeeeER . . . . . . ld2 { v1.h, v2.h }[4], [x27], #4 -# CHECK-NEXT: [0,3] D=========eE------R . . . . . . add x0, x27, #1 -# CHECK-NEXT: [0,4] .D===============eeeeeeeeER . . . . ld2 { v1.h, v2.h }[0], [x27], x28 -# CHECK-NEXT: [0,5] .D================eE------R . . . . add x0, x27, #1 -# CHECK-NEXT: [0,6] .D=======================eeeeeeeeER. . . ld2 { v1.h, v2.h }[4], [x27], x28 -# CHECK-NEXT: [0,7] .D========================eE------R. . . add x0, x27, #1 -# CHECK-NEXT: [0,8] . D==============================eeeeeeeeER ld2 { v1.s, v2.s }[0], [x27], #8 -# CHECK-NEXT: [0,9] . D===============================eE------R add x0, x27, #1 +# CHECK-NEXT: [0,2] .D=======eeeeeeeeER . . . . . . ld2 { v1.h, v2.h }[4], [x27], #4 +# CHECK-NEXT: [0,3] .D========eE------R . . . . . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . D==============eeeeeeeeER . . . . ld2 { v1.h, v2.h }[0], [x27], x28 +# CHECK-NEXT: [0,5] . D===============eE------R . . . . add x0, x27, #1 +# CHECK-NEXT: [0,6] . D=====================eeeeeeeeER. . . ld2 { v1.h, v2.h }[4], [x27], x28 +# CHECK-NEXT: [0,7] . D======================eE------R. . . add x0, x27, #1 +# CHECK-NEXT: [0,8] . D============================eeeeeeeeER ld2 { v1.s, v2.s }[0], [x27], #8 +# CHECK-NEXT: [0,9] . D=============================eE------R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -2226,15 +2226,15 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld2 { v1.h, v2.h }[0], [x27], #4 # CHECK-NEXT: 1. 1 2.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 9.0 0.0 0.0 ld2 { v1.h, v2.h }[4], [x27], #4 -# CHECK-NEXT: 3. 1 10.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 16.0 0.0 0.0 ld2 { v1.h, v2.h }[0], [x27], x28 -# CHECK-NEXT: 5. 1 17.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 24.0 0.0 0.0 ld2 { v1.h, v2.h }[4], [x27], x28 -# CHECK-NEXT: 7. 1 25.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 31.0 0.0 0.0 ld2 { v1.s, v2.s }[0], [x27], #8 -# CHECK-NEXT: 9. 1 32.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 1 16.7 0.1 3.0 +# CHECK-NEXT: 2. 1 8.0 0.0 0.0 ld2 { v1.h, v2.h }[4], [x27], #4 +# CHECK-NEXT: 3. 1 9.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 15.0 0.0 0.0 ld2 { v1.h, v2.h }[0], [x27], x28 +# CHECK-NEXT: 5. 1 16.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 22.0 0.0 0.0 ld2 { v1.h, v2.h }[4], [x27], x28 +# CHECK-NEXT: 7. 1 23.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 29.0 0.0 0.0 ld2 { v1.s, v2.s }[0], [x27], #8 +# CHECK-NEXT: 9. 1 30.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 1 15.5 0.1 3.0 # CHECK: [23] Code Region - G24 @@ -2243,10 +2243,10 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 2403 # CHECK-NEXT: Total uOps: 2000 -# CHECK: Dispatch Width: 10 +# CHECK: Dispatch Width: 5 # CHECK-NEXT: uOps Per Cycle: 0.83 # CHECK-NEXT: IPC: 0.42 -# CHECK-NEXT: Block RThroughput: 2.5 +# CHECK-NEXT: Block RThroughput: 4.0 # CHECK: Timeline view: # CHECK-NEXT: 0123456789 @@ -2254,14 +2254,14 @@ add x0, x27, 1 # CHECK: [0,0] DeeeeeeeeER . . .. ld2 { v1.s, v2.s }[0], [x27], x28 # CHECK-NEXT: [0,1] D=eE------R . . .. add x0, x27, #1 -# CHECK-NEXT: [0,2] D========eeeeeeeeER . .. ld2 { v1.d, v2.d }[0], [x27], #16 -# CHECK-NEXT: [0,3] D=========eE------R . .. add x0, x27, #1 -# CHECK-NEXT: [0,4] .D===============eeeeeeeeER ld2 { v1.d, v2.d }[0], [x27], x28 -# CHECK-NEXT: [0,5] .D================eE------R add x0, x27, #1 -# CHECK-NEXT: [0,6] .D================eeeeeeE-R ld2r { v1.1d, v2.1d }, [x27], #16 -# CHECK-NEXT: [0,7] .D=================eE-----R add x0, x27, #1 -# CHECK-NEXT: [0,8] . D================eeeeeeER ld2r { v1.2d, v2.2d }, [x27], #16 -# CHECK-NEXT: [0,9] . D=================eE----R add x0, x27, #1 +# CHECK-NEXT: [0,2] .D=======eeeeeeeeER . .. ld2 { v1.d, v2.d }[0], [x27], #16 +# CHECK-NEXT: [0,3] .D========eE------R . .. add x0, x27, #1 +# CHECK-NEXT: [0,4] . D==============eeeeeeeeER ld2 { v1.d, v2.d }[0], [x27], x28 +# CHECK-NEXT: [0,5] . D===============eE------R add x0, x27, #1 +# CHECK-NEXT: [0,6] . D==============eeeeeeE-R ld2r { v1.1d, v2.1d }, [x27], #16 +# CHECK-NEXT: [0,7] . D===============eE-----R add x0, x27, #1 +# CHECK-NEXT: [0,8] . D==============eeeeeeER ld2r { v1.2d, v2.2d }, [x27], #16 +# CHECK-NEXT: [0,9] . D===============eE----R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -2272,15 +2272,15 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld2 { v1.s, v2.s }[0], [x27], x28 # CHECK-NEXT: 1. 1 2.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 9.0 0.0 0.0 ld2 { v1.d, v2.d }[0], [x27], #16 -# CHECK-NEXT: 3. 1 10.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 16.0 0.0 0.0 ld2 { v1.d, v2.d }[0], [x27], x28 -# CHECK-NEXT: 5. 1 17.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 17.0 0.0 1.0 ld2r { v1.1d, v2.1d }, [x27], #16 -# CHECK-NEXT: 7. 1 18.0 0.0 5.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 17.0 0.0 0.0 ld2r { v1.2d, v2.2d }, [x27], #16 -# CHECK-NEXT: 9. 1 18.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 1 12.5 0.1 2.8 +# CHECK-NEXT: 2. 1 8.0 0.0 0.0 ld2 { v1.d, v2.d }[0], [x27], #16 +# CHECK-NEXT: 3. 1 9.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 15.0 0.0 0.0 ld2 { v1.d, v2.d }[0], [x27], x28 +# CHECK-NEXT: 5. 1 16.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 15.0 0.0 1.0 ld2r { v1.1d, v2.1d }, [x27], #16 +# CHECK-NEXT: 7. 1 16.0 0.0 5.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 15.0 0.0 0.0 ld2r { v1.2d, v2.2d }, [x27], #16 +# CHECK-NEXT: 9. 1 16.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 1 11.3 0.1 2.8 # CHECK: [24] Code Region - G25 @@ -2289,10 +2289,10 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 508 # CHECK-NEXT: Total uOps: 2000 -# CHECK: Dispatch Width: 10 +# CHECK: Dispatch Width: 5 # CHECK-NEXT: uOps Per Cycle: 3.94 # CHECK-NEXT: IPC: 1.97 -# CHECK-NEXT: Block RThroughput: 3.3 +# CHECK-NEXT: Block RThroughput: 4.0 # CHECK: Timeline view: # CHECK-NEXT: 012 @@ -2300,14 +2300,14 @@ add x0, x27, 1 # CHECK: [0,0] DeeeeeeER . . ld2r { v1.2s, v2.2s }, [x27], #8 # CHECK-NEXT: [0,1] D=eE----R . . add x0, x27, #1 -# CHECK-NEXT: [0,2] D=eeeeeeER. . ld2r { v1.4h, v2.4h }, [x27], #4 -# CHECK-NEXT: [0,3] D==eE----R. . add x0, x27, #1 -# CHECK-NEXT: [0,4] .D=eeeeeeER . ld2r { v1.4s, v2.4s }, [x27], #8 -# CHECK-NEXT: [0,5] .D==eE----R . add x0, x27, #1 -# CHECK-NEXT: [0,6] .D==eeeeeeER. ld2r { v1.8b, v2.8b }, [x27], #2 -# CHECK-NEXT: [0,7] .D===eE----R. add x0, x27, #1 -# CHECK-NEXT: [0,8] . D==eeeeeeER ld2r { v1.8h, v2.8h }, [x27], #4 -# CHECK-NEXT: [0,9] . D===eE----R add x0, x27, #1 +# CHECK-NEXT: [0,2] .DeeeeeeER. . ld2r { v1.4h, v2.4h }, [x27], #4 +# CHECK-NEXT: [0,3] .D=eE----R. . add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeeeeeER . ld2r { v1.4s, v2.4s }, [x27], #8 +# CHECK-NEXT: [0,5] . D=eE----R . add x0, x27, #1 +# CHECK-NEXT: [0,6] . DeeeeeeER. ld2r { v1.8b, v2.8b }, [x27], #2 +# CHECK-NEXT: [0,7] . D=eE----R. add x0, x27, #1 +# CHECK-NEXT: [0,8] . DeeeeeeER ld2r { v1.8h, v2.8h }, [x27], #4 +# CHECK-NEXT: [0,9] . D=eE----R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -2318,15 +2318,15 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld2r { v1.2s, v2.2s }, [x27], #8 # CHECK-NEXT: 1. 1 2.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld2r { v1.4h, v2.4h }, [x27], #4 -# CHECK-NEXT: 3. 1 3.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 2.0 0.0 0.0 ld2r { v1.4s, v2.4s }, [x27], #8 -# CHECK-NEXT: 5. 1 3.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ld2r { v1.8b, v2.8b }, [x27], #2 -# CHECK-NEXT: 7. 1 4.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 3.0 0.0 0.0 ld2r { v1.8h, v2.8h }, [x27], #4 -# CHECK-NEXT: 9. 1 4.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 1 2.7 0.1 2.0 +# CHECK-NEXT: 2. 1 1.0 0.0 0.0 ld2r { v1.4h, v2.4h }, [x27], #4 +# CHECK-NEXT: 3. 1 2.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 0.0 0.0 ld2r { v1.4s, v2.4s }, [x27], #8 +# CHECK-NEXT: 5. 1 2.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 0.0 0.0 ld2r { v1.8b, v2.8b }, [x27], #2 +# CHECK-NEXT: 7. 1 2.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 0.0 0.0 ld2r { v1.8h, v2.8h }, [x27], #4 +# CHECK-NEXT: 9. 1 2.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.5 0.1 2.0 # CHECK: [25] Code Region - G26 @@ -2335,10 +2335,10 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 508 # CHECK-NEXT: Total uOps: 2000 -# CHECK: Dispatch Width: 10 +# CHECK: Dispatch Width: 5 # CHECK-NEXT: uOps Per Cycle: 3.94 # CHECK-NEXT: IPC: 1.97 -# CHECK-NEXT: Block RThroughput: 3.3 +# CHECK-NEXT: Block RThroughput: 4.0 # CHECK: Timeline view: # CHECK-NEXT: 012 @@ -2346,14 +2346,14 @@ add x0, x27, 1 # CHECK: [0,0] DeeeeeeER . . ld2r { v1.16b, v2.16b }, [x27], #2 # CHECK-NEXT: [0,1] D=eE----R . . add x0, x27, #1 -# CHECK-NEXT: [0,2] D=eeeeeeER. . ld2r { v1.1d, v2.1d }, [x27], x28 -# CHECK-NEXT: [0,3] D==eE----R. . add x0, x27, #1 -# CHECK-NEXT: [0,4] .D=eeeeeeER . ld2r { v1.2d, v2.2d }, [x27], x28 -# CHECK-NEXT: [0,5] .D==eE----R . add x0, x27, #1 -# CHECK-NEXT: [0,6] .D==eeeeeeER. ld2r { v1.2s, v2.2s }, [x27], x28 -# CHECK-NEXT: [0,7] .D===eE----R. add x0, x27, #1 -# CHECK-NEXT: [0,8] . D==eeeeeeER ld2r { v1.4h, v2.4h }, [x27], x28 -# CHECK-NEXT: [0,9] . D===eE----R add x0, x27, #1 +# CHECK-NEXT: [0,2] .DeeeeeeER. . ld2r { v1.1d, v2.1d }, [x27], x28 +# CHECK-NEXT: [0,3] .D=eE----R. . add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeeeeeER . ld2r { v1.2d, v2.2d }, [x27], x28 +# CHECK-NEXT: [0,5] . D=eE----R . add x0, x27, #1 +# CHECK-NEXT: [0,6] . DeeeeeeER. ld2r { v1.2s, v2.2s }, [x27], x28 +# CHECK-NEXT: [0,7] . D=eE----R. add x0, x27, #1 +# CHECK-NEXT: [0,8] . DeeeeeeER ld2r { v1.4h, v2.4h }, [x27], x28 +# CHECK-NEXT: [0,9] . D=eE----R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -2364,27 +2364,27 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld2r { v1.16b, v2.16b }, [x27], #2 # CHECK-NEXT: 1. 1 2.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld2r { v1.1d, v2.1d }, [x27], x28 -# CHECK-NEXT: 3. 1 3.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 2.0 0.0 0.0 ld2r { v1.2d, v2.2d }, [x27], x28 -# CHECK-NEXT: 5. 1 3.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ld2r { v1.2s, v2.2s }, [x27], x28 -# CHECK-NEXT: 7. 1 4.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 3.0 0.0 0.0 ld2r { v1.4h, v2.4h }, [x27], x28 -# CHECK-NEXT: 9. 1 4.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 1 2.7 0.1 2.0 +# CHECK-NEXT: 2. 1 1.0 0.0 0.0 ld2r { v1.1d, v2.1d }, [x27], x28 +# CHECK-NEXT: 3. 1 2.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 0.0 0.0 ld2r { v1.2d, v2.2d }, [x27], x28 +# CHECK-NEXT: 5. 1 2.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 0.0 0.0 ld2r { v1.2s, v2.2s }, [x27], x28 +# CHECK-NEXT: 7. 1 2.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 0.0 0.0 ld2r { v1.4h, v2.4h }, [x27], x28 +# CHECK-NEXT: 9. 1 2.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.5 0.1 2.0 # CHECK: [26] Code Region - G27 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 512 +# CHECK-NEXT: Total Cycles: 611 # CHECK-NEXT: Total uOps: 2400 -# CHECK: Dispatch Width: 10 -# CHECK-NEXT: uOps Per Cycle: 4.69 -# CHECK-NEXT: IPC: 1.95 -# CHECK-NEXT: Block RThroughput: 3.7 +# CHECK: Dispatch Width: 5 +# CHECK-NEXT: uOps Per Cycle: 3.93 +# CHECK-NEXT: IPC: 1.64 +# CHECK-NEXT: Block RThroughput: 4.8 # CHECK: Timeline view: # CHECK-NEXT: 0123456 @@ -2392,14 +2392,14 @@ add x0, x27, 1 # CHECK: [0,0] DeeeeeeER . .. ld2r { v1.4s, v2.4s }, [x27], x28 # CHECK-NEXT: [0,1] D=eE----R . .. add x0, x27, #1 -# CHECK-NEXT: [0,2] D=eeeeeeER. .. ld2r { v1.8b, v2.8b }, [x27], x28 -# CHECK-NEXT: [0,3] D==eE----R. .. add x0, x27, #1 -# CHECK-NEXT: [0,4] .D=eeeeeeER .. ld2r { v1.8h, v2.8h }, [x27], x28 -# CHECK-NEXT: [0,5] .D==eE----R .. add x0, x27, #1 -# CHECK-NEXT: [0,6] .D==eeeeeeER .. ld2r { v1.16b, v2.16b }, [x27], x28 -# CHECK-NEXT: [0,7] .D===eE----R .. add x0, x27, #1 -# CHECK-NEXT: [0,8] . D==eeeeeeeeeeER ld3 { v1.2d, v2.2d, v3.2d }, [x27], #48 -# CHECK-NEXT: [0,9] . D===eE--------R add x0, x27, #1 +# CHECK-NEXT: [0,2] .DeeeeeeER. .. ld2r { v1.8b, v2.8b }, [x27], x28 +# CHECK-NEXT: [0,3] .D=eE----R. .. add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeeeeeER .. ld2r { v1.8h, v2.8h }, [x27], x28 +# CHECK-NEXT: [0,5] . D=eE----R .. add x0, x27, #1 +# CHECK-NEXT: [0,6] . DeeeeeeER .. ld2r { v1.16b, v2.16b }, [x27], x28 +# CHECK-NEXT: [0,7] . D=eE----R .. add x0, x27, #1 +# CHECK-NEXT: [0,8] . DeeeeeeeeeeER ld3 { v1.2d, v2.2d, v3.2d }, [x27], #48 +# CHECK-NEXT: [0,9] . DeE--------R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -2410,42 +2410,42 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld2r { v1.4s, v2.4s }, [x27], x28 # CHECK-NEXT: 1. 1 2.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld2r { v1.8b, v2.8b }, [x27], x28 -# CHECK-NEXT: 3. 1 3.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 2.0 0.0 0.0 ld2r { v1.8h, v2.8h }, [x27], x28 -# CHECK-NEXT: 5. 1 3.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ld2r { v1.16b, v2.16b }, [x27], x28 -# CHECK-NEXT: 7. 1 4.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 3.0 0.0 0.0 ld3 { v1.2d, v2.2d, v3.2d }, [x27], #48 -# CHECK-NEXT: 9. 1 4.0 0.0 8.0 add x0, x27, #1 -# CHECK-NEXT: 1 2.7 0.1 2.4 +# CHECK-NEXT: 2. 1 1.0 0.0 0.0 ld2r { v1.8b, v2.8b }, [x27], x28 +# CHECK-NEXT: 3. 1 2.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 0.0 0.0 ld2r { v1.8h, v2.8h }, [x27], x28 +# CHECK-NEXT: 5. 1 2.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 0.0 0.0 ld2r { v1.16b, v2.16b }, [x27], x28 +# CHECK-NEXT: 7. 1 2.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 0.0 0.0 ld3 { v1.2d, v2.2d, v3.2d }, [x27], #48 +# CHECK-NEXT: 9. 1 1.0 0.0 8.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.4 0.1 2.4 # CHECK: [27] Code Region - G28 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 761 +# CHECK-NEXT: Total Cycles: 1011 # CHECK-NEXT: Total uOps: 4000 -# CHECK: Dispatch Width: 10 -# CHECK-NEXT: uOps Per Cycle: 5.26 -# CHECK-NEXT: IPC: 1.31 -# CHECK-NEXT: Block RThroughput: 7.5 +# CHECK: Dispatch Width: 5 +# CHECK-NEXT: uOps Per Cycle: 3.96 +# CHECK-NEXT: IPC: 0.99 +# CHECK-NEXT: Block RThroughput: 8.0 # CHECK: Timeline view: -# CHECK-NEXT: 012345678 -# CHECK-NEXT: Index 0123456789 - -# CHECK: [0,0] DeeeeeeeeER . . ld3 { v1.2s, v2.2s, v3.2s }, [x27], #24 -# CHECK-NEXT: [0,1] D=eE------R . . add x0, x27, #1 -# CHECK-NEXT: [0,2] .DeeeeeeeeER . . ld3 { v1.4h, v2.4h, v3.4h }, [x27], #24 -# CHECK-NEXT: [0,3] .D=eE------R . . add x0, x27, #1 -# CHECK-NEXT: [0,4] . D=eeeeeeeeeeER . ld3 { v1.4s, v2.4s, v3.4s }, [x27], #48 -# CHECK-NEXT: [0,5] . D==eE--------R . add x0, x27, #1 -# CHECK-NEXT: [0,6] . D=eeeeeeeeE-R . ld3 { v1.8b, v2.8b, v3.8b }, [x27], #24 -# CHECK-NEXT: [0,7] . D==eE-------R . add x0, x27, #1 -# CHECK-NEXT: [0,8] . D==eeeeeeeeeeER ld3 { v1.8h, v2.8h, v3.8h }, [x27], #48 -# CHECK-NEXT: [0,9] . D===eE--------R add x0, x27, #1 +# CHECK-NEXT: 0123456789 +# CHECK-NEXT: Index 0123456789 0 + +# CHECK: [0,0] DeeeeeeeeER . . ld3 { v1.2s, v2.2s, v3.2s }, [x27], #24 +# CHECK-NEXT: [0,1] .DeE------R . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeeeeeeeER . . ld3 { v1.4h, v2.4h, v3.4h }, [x27], #24 +# CHECK-NEXT: [0,3] . DeE------R . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeeeeeeeeeER . ld3 { v1.4s, v2.4s, v3.4s }, [x27], #48 +# CHECK-NEXT: [0,5] . DeE--------R . add x0, x27, #1 +# CHECK-NEXT: [0,6] . .DeeeeeeeeER . ld3 { v1.8b, v2.8b, v3.8b }, [x27], #24 +# CHECK-NEXT: [0,7] . . DeE------R . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . DeeeeeeeeeeER ld3 { v1.8h, v2.8h, v3.8h }, [x27], #48 +# CHECK-NEXT: [0,9] . . DeE--------R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -2455,43 +2455,43 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld3 { v1.2s, v2.2s, v3.2s }, [x27], #24 -# CHECK-NEXT: 1. 1 2.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 1.0 0.0 0.0 ld3 { v1.4h, v2.4h, v3.4h }, [x27], #24 -# CHECK-NEXT: 3. 1 2.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 2.0 1.0 0.0 ld3 { v1.4s, v2.4s, v3.4s }, [x27], #48 -# CHECK-NEXT: 5. 1 3.0 0.0 8.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 2.0 0.0 1.0 ld3 { v1.8b, v2.8b, v3.8b }, [x27], #24 -# CHECK-NEXT: 7. 1 3.0 0.0 7.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 3.0 1.0 0.0 ld3 { v1.8h, v2.8h, v3.8h }, [x27], #48 -# CHECK-NEXT: 9. 1 4.0 0.0 8.0 add x0, x27, #1 -# CHECK-NEXT: 1 2.3 0.3 3.6 +# CHECK-NEXT: 1. 1 1.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 1.0 1.0 0.0 ld3 { v1.4h, v2.4h, v3.4h }, [x27], #24 +# CHECK-NEXT: 3. 1 1.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 1.0 0.0 ld3 { v1.4s, v2.4s, v3.4s }, [x27], #48 +# CHECK-NEXT: 5. 1 1.0 0.0 8.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 1.0 0.0 ld3 { v1.8b, v2.8b, v3.8b }, [x27], #24 +# CHECK-NEXT: 7. 1 1.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 1.0 0.0 ld3 { v1.8h, v2.8h, v3.8h }, [x27], #48 +# CHECK-NEXT: 9. 1 1.0 0.0 8.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.0 0.5 3.4 # CHECK: [28] Code Region - G29 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 761 +# CHECK-NEXT: Total Cycles: 1011 # CHECK-NEXT: Total uOps: 4000 -# CHECK: Dispatch Width: 10 -# CHECK-NEXT: uOps Per Cycle: 5.26 -# CHECK-NEXT: IPC: 1.31 -# CHECK-NEXT: Block RThroughput: 7.5 +# CHECK: Dispatch Width: 5 +# CHECK-NEXT: uOps Per Cycle: 3.96 +# CHECK-NEXT: IPC: 0.99 +# CHECK-NEXT: Block RThroughput: 8.0 # CHECK: Timeline view: -# CHECK-NEXT: 012345678 -# CHECK-NEXT: Index 0123456789 - -# CHECK: [0,0] DeeeeeeeeeeER . . ld3 { v1.16b, v2.16b, v3.16b }, [x27], #48 -# CHECK-NEXT: [0,1] D=eE--------R . . add x0, x27, #1 -# CHECK-NEXT: [0,2] .DeeeeeeeeeeER . . ld3 { v1.2d, v2.2d, v3.2d }, [x27], x28 -# CHECK-NEXT: [0,3] .D=eE--------R . . add x0, x27, #1 -# CHECK-NEXT: [0,4] . D=eeeeeeeeER . . ld3 { v1.2s, v2.2s, v3.2s }, [x27], x28 -# CHECK-NEXT: [0,5] . D==eE------R . . add x0, x27, #1 -# CHECK-NEXT: [0,6] . D=eeeeeeeeER. . ld3 { v1.4h, v2.4h, v3.4h }, [x27], x28 -# CHECK-NEXT: [0,7] . D==eE------R. . add x0, x27, #1 -# CHECK-NEXT: [0,8] . D==eeeeeeeeeeER ld3 { v1.4s, v2.4s, v3.4s }, [x27], x28 -# CHECK-NEXT: [0,9] . D===eE--------R add x0, x27, #1 +# CHECK-NEXT: 0123456789 +# CHECK-NEXT: Index 0123456789 0 + +# CHECK: [0,0] DeeeeeeeeeeER . . ld3 { v1.16b, v2.16b, v3.16b }, [x27], #48 +# CHECK-NEXT: [0,1] .DeE--------R . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeeeeeeeeeER. . ld3 { v1.2d, v2.2d, v3.2d }, [x27], x28 +# CHECK-NEXT: [0,3] . DeE--------R. . add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeeeeeeeER. . ld3 { v1.2s, v2.2s, v3.2s }, [x27], x28 +# CHECK-NEXT: [0,5] . DeE------R. . add x0, x27, #1 +# CHECK-NEXT: [0,6] . .DeeeeeeeeER . ld3 { v1.4h, v2.4h, v3.4h }, [x27], x28 +# CHECK-NEXT: [0,7] . . DeE------R . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . DeeeeeeeeeeER ld3 { v1.4s, v2.4s, v3.4s }, [x27], x28 +# CHECK-NEXT: [0,9] . . DeE--------R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -2501,43 +2501,43 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld3 { v1.16b, v2.16b, v3.16b }, [x27], #48 -# CHECK-NEXT: 1. 1 2.0 0.0 8.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 1.0 0.0 0.0 ld3 { v1.2d, v2.2d, v3.2d }, [x27], x28 -# CHECK-NEXT: 3. 1 2.0 0.0 8.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 2.0 1.0 0.0 ld3 { v1.2s, v2.2s, v3.2s }, [x27], x28 -# CHECK-NEXT: 5. 1 3.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 2.0 0.0 0.0 ld3 { v1.4h, v2.4h, v3.4h }, [x27], x28 -# CHECK-NEXT: 7. 1 3.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 3.0 1.0 0.0 ld3 { v1.4s, v2.4s, v3.4s }, [x27], x28 -# CHECK-NEXT: 9. 1 4.0 0.0 8.0 add x0, x27, #1 -# CHECK-NEXT: 1 2.3 0.3 3.6 +# CHECK-NEXT: 1. 1 1.0 0.0 8.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 1.0 1.0 0.0 ld3 { v1.2d, v2.2d, v3.2d }, [x27], x28 +# CHECK-NEXT: 3. 1 1.0 0.0 8.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 1.0 0.0 ld3 { v1.2s, v2.2s, v3.2s }, [x27], x28 +# CHECK-NEXT: 5. 1 1.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 1.0 0.0 ld3 { v1.4h, v2.4h, v3.4h }, [x27], x28 +# CHECK-NEXT: 7. 1 1.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 1.0 0.0 ld3 { v1.4s, v2.4s, v3.4s }, [x27], x28 +# CHECK-NEXT: 9. 1 1.0 0.0 8.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.0 0.5 3.6 # CHECK: [29] Code Region - G30 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 2210 +# CHECK-NEXT: Total Cycles: 2211 # CHECK-NEXT: Total uOps: 4000 -# CHECK: Dispatch Width: 10 +# CHECK: Dispatch Width: 5 # CHECK-NEXT: uOps Per Cycle: 1.81 # CHECK-NEXT: IPC: 0.45 -# CHECK-NEXT: Block RThroughput: 7.5 +# CHECK-NEXT: Block RThroughput: 8.0 # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 01 +# CHECK-NEXT: 0123456789 012 # CHECK-NEXT: Index 0123456789 0123456789 -# CHECK: [0,0] DeeeeeeeeER . . . .. ld3 { v1.8b, v2.8b, v3.8b }, [x27], x28 -# CHECK-NEXT: [0,1] D=eE------R . . . .. add x0, x27, #1 -# CHECK-NEXT: [0,2] .DeeeeeeeeeeER . . . .. ld3 { v1.8h, v2.8h, v3.8h }, [x27], x28 -# CHECK-NEXT: [0,3] .D=eE--------R . . . .. add x0, x27, #1 -# CHECK-NEXT: [0,4] . D=eeeeeeeeeeER . . .. ld3 { v1.16b, v2.16b, v3.16b }, [x27], x28 -# CHECK-NEXT: [0,5] . D==eE--------R . . .. add x0, x27, #1 -# CHECK-NEXT: [0,6] . D==========eeeeeeeeER . .. ld3 { v1.b, v2.b, v3.b }[0], [x27], #3 -# CHECK-NEXT: [0,7] . D===========eE------R . .. add x0, x27, #1 -# CHECK-NEXT: [0,8] . D=================eeeeeeeeER ld3 { v1.b, v2.b, v3.b }[8], [x27], #3 -# CHECK-NEXT: [0,9] . D==================eE------R add x0, x27, #1 +# CHECK: [0,0] DeeeeeeeeER . . . . . ld3 { v1.8b, v2.8b, v3.8b }, [x27], x28 +# CHECK-NEXT: [0,1] .DeE------R . . . . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeeeeeeeeeER. . . . . ld3 { v1.8h, v2.8h, v3.8h }, [x27], x28 +# CHECK-NEXT: [0,3] . DeE--------R. . . . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeeeeeeeeeER . . . . ld3 { v1.16b, v2.16b, v3.16b }, [x27], x28 +# CHECK-NEXT: [0,5] . DeE--------R . . . . add x0, x27, #1 +# CHECK-NEXT: [0,6] . .D========eeeeeeeeER. . . ld3 { v1.b, v2.b, v3.b }[0], [x27], #3 +# CHECK-NEXT: [0,7] . . D========eE------R. . . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . D==============eeeeeeeeER ld3 { v1.b, v2.b, v3.b }[8], [x27], #3 +# CHECK-NEXT: [0,9] . . D==============eE------R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -2547,16 +2547,16 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld3 { v1.8b, v2.8b, v3.8b }, [x27], x28 -# CHECK-NEXT: 1. 1 2.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 1.0 0.0 0.0 ld3 { v1.8h, v2.8h, v3.8h }, [x27], x28 -# CHECK-NEXT: 3. 1 2.0 0.0 8.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 2.0 1.0 0.0 ld3 { v1.16b, v2.16b, v3.16b }, [x27], x28 -# CHECK-NEXT: 5. 1 3.0 0.0 8.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 11.0 0.0 0.0 ld3 { v1.b, v2.b, v3.b }[0], [x27], #3 -# CHECK-NEXT: 7. 1 12.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 18.0 0.0 0.0 ld3 { v1.b, v2.b, v3.b }[8], [x27], #3 -# CHECK-NEXT: 9. 1 19.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 1 7.1 0.2 3.4 +# CHECK-NEXT: 1. 1 1.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 1.0 1.0 0.0 ld3 { v1.8h, v2.8h, v3.8h }, [x27], x28 +# CHECK-NEXT: 3. 1 1.0 0.0 8.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 1.0 0.0 ld3 { v1.16b, v2.16b, v3.16b }, [x27], x28 +# CHECK-NEXT: 5. 1 1.0 0.0 8.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 9.0 0.0 0.0 ld3 { v1.b, v2.b, v3.b }[0], [x27], #3 +# CHECK-NEXT: 7. 1 9.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 15.0 0.0 0.0 ld3 { v1.b, v2.b, v3.b }[8], [x27], #3 +# CHECK-NEXT: 9. 1 15.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 1 5.4 0.3 3.4 # CHECK: [30] Code Region - G31 @@ -2565,25 +2565,25 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 4003 # CHECK-NEXT: Total uOps: 4000 -# CHECK: Dispatch Width: 10 +# CHECK: Dispatch Width: 5 # CHECK-NEXT: uOps Per Cycle: 1.00 # CHECK-NEXT: IPC: 0.25 -# CHECK-NEXT: Block RThroughput: 7.5 +# CHECK-NEXT: Block RThroughput: 8.0 # CHECK: Timeline view: # CHECK-NEXT: 0123456789 0123456789 # CHECK-NEXT: Index 0123456789 0123456789 012 # CHECK: [0,0] DeeeeeeeeER . . . . . . . ld3 { v1.b, v2.b, v3.b }[0], [x27], x28 -# CHECK-NEXT: [0,1] D=eE------R . . . . . . . add x0, x27, #1 -# CHECK-NEXT: [0,2] .D=======eeeeeeeeER . . . . . . ld3 { v1.b, v2.b, v3.b }[8], [x27], x28 -# CHECK-NEXT: [0,3] .D========eE------R . . . . . . add x0, x27, #1 -# CHECK-NEXT: [0,4] . D==============eeeeeeeeER . . . . ld3 { v1.h, v2.h, v3.h }[0], [x27], #6 -# CHECK-NEXT: [0,5] . D===============eE------R . . . . add x0, x27, #1 -# CHECK-NEXT: [0,6] . D=====================eeeeeeeeER. . . ld3 { v1.h, v2.h, v3.h }[4], [x27], #6 -# CHECK-NEXT: [0,7] . D======================eE------R. . . add x0, x27, #1 -# CHECK-NEXT: [0,8] . D============================eeeeeeeeER ld3 { v1.h, v2.h, v3.h }[0], [x27], x28 -# CHECK-NEXT: [0,9] . D=============================eE------R add x0, x27, #1 +# CHECK-NEXT: [0,1] .DeE------R . . . . . . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . D======eeeeeeeeER . . . . . . ld3 { v1.b, v2.b, v3.b }[8], [x27], x28 +# CHECK-NEXT: [0,3] . D======eE------R . . . . . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . D============eeeeeeeeER . . . . ld3 { v1.h, v2.h, v3.h }[0], [x27], #6 +# CHECK-NEXT: [0,5] . D============eE------R . . . . add x0, x27, #1 +# CHECK-NEXT: [0,6] . .D==================eeeeeeeeER. . . ld3 { v1.h, v2.h, v3.h }[4], [x27], #6 +# CHECK-NEXT: [0,7] . . D==================eE------R. . . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . D========================eeeeeeeeER ld3 { v1.h, v2.h, v3.h }[0], [x27], x28 +# CHECK-NEXT: [0,9] . . D========================eE------R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -2593,16 +2593,16 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld3 { v1.b, v2.b, v3.b }[0], [x27], x28 -# CHECK-NEXT: 1. 1 2.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 8.0 0.0 0.0 ld3 { v1.b, v2.b, v3.b }[8], [x27], x28 -# CHECK-NEXT: 3. 1 9.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 15.0 0.0 0.0 ld3 { v1.h, v2.h, v3.h }[0], [x27], #6 -# CHECK-NEXT: 5. 1 16.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 22.0 0.0 0.0 ld3 { v1.h, v2.h, v3.h }[4], [x27], #6 -# CHECK-NEXT: 7. 1 23.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 29.0 0.0 0.0 ld3 { v1.h, v2.h, v3.h }[0], [x27], x28 -# CHECK-NEXT: 9. 1 30.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 1 15.5 0.1 3.0 +# CHECK-NEXT: 1. 1 1.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 7.0 0.0 0.0 ld3 { v1.b, v2.b, v3.b }[8], [x27], x28 +# CHECK-NEXT: 3. 1 7.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 13.0 0.0 0.0 ld3 { v1.h, v2.h, v3.h }[0], [x27], #6 +# CHECK-NEXT: 5. 1 13.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 19.0 0.0 0.0 ld3 { v1.h, v2.h, v3.h }[4], [x27], #6 +# CHECK-NEXT: 7. 1 19.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 25.0 0.0 0.0 ld3 { v1.h, v2.h, v3.h }[0], [x27], x28 +# CHECK-NEXT: 9. 1 25.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 1 13.0 0.1 3.0 # CHECK: [31] Code Region - G32 @@ -2611,25 +2611,25 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 4003 # CHECK-NEXT: Total uOps: 4000 -# CHECK: Dispatch Width: 10 +# CHECK: Dispatch Width: 5 # CHECK-NEXT: uOps Per Cycle: 1.00 # CHECK-NEXT: IPC: 0.25 -# CHECK-NEXT: Block RThroughput: 7.5 +# CHECK-NEXT: Block RThroughput: 8.0 # CHECK: Timeline view: # CHECK-NEXT: 0123456789 0123456789 # CHECK-NEXT: Index 0123456789 0123456789 012 # CHECK: [0,0] DeeeeeeeeER . . . . . . . ld3 { v1.h, v2.h, v3.h }[4], [x27], x28 -# CHECK-NEXT: [0,1] D=eE------R . . . . . . . add x0, x27, #1 -# CHECK-NEXT: [0,2] .D=======eeeeeeeeER . . . . . . ld3 { v1.s, v2.s, v3.s }[0], [x27], #12 -# CHECK-NEXT: [0,3] .D========eE------R . . . . . . add x0, x27, #1 -# CHECK-NEXT: [0,4] . D==============eeeeeeeeER . . . . ld3 { v1.s, v2.s, v3.s }[0], [x27], x28 -# CHECK-NEXT: [0,5] . D===============eE------R . . . . add x0, x27, #1 -# CHECK-NEXT: [0,6] . D=====================eeeeeeeeER. . . ld3 { v1.d, v2.d, v3.d }[0], [x27], #24 -# CHECK-NEXT: [0,7] . D======================eE------R. . . add x0, x27, #1 -# CHECK-NEXT: [0,8] . D============================eeeeeeeeER ld3 { v1.d, v2.d, v3.d }[0], [x27], x28 -# CHECK-NEXT: [0,9] . D=============================eE------R add x0, x27, #1 +# CHECK-NEXT: [0,1] .DeE------R . . . . . . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . D======eeeeeeeeER . . . . . . ld3 { v1.s, v2.s, v3.s }[0], [x27], #12 +# CHECK-NEXT: [0,3] . D======eE------R . . . . . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . D============eeeeeeeeER . . . . ld3 { v1.s, v2.s, v3.s }[0], [x27], x28 +# CHECK-NEXT: [0,5] . D============eE------R . . . . add x0, x27, #1 +# CHECK-NEXT: [0,6] . .D==================eeeeeeeeER. . . ld3 { v1.d, v2.d, v3.d }[0], [x27], #24 +# CHECK-NEXT: [0,7] . . D==================eE------R. . . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . D========================eeeeeeeeER ld3 { v1.d, v2.d, v3.d }[0], [x27], x28 +# CHECK-NEXT: [0,9] . . D========================eE------R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -2639,16 +2639,16 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld3 { v1.h, v2.h, v3.h }[4], [x27], x28 -# CHECK-NEXT: 1. 1 2.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 8.0 0.0 0.0 ld3 { v1.s, v2.s, v3.s }[0], [x27], #12 -# CHECK-NEXT: 3. 1 9.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 15.0 0.0 0.0 ld3 { v1.s, v2.s, v3.s }[0], [x27], x28 -# CHECK-NEXT: 5. 1 16.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 22.0 0.0 0.0 ld3 { v1.d, v2.d, v3.d }[0], [x27], #24 -# CHECK-NEXT: 7. 1 23.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 29.0 0.0 0.0 ld3 { v1.d, v2.d, v3.d }[0], [x27], x28 -# CHECK-NEXT: 9. 1 30.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 1 15.5 0.1 3.0 +# CHECK-NEXT: 1. 1 1.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 7.0 0.0 0.0 ld3 { v1.s, v2.s, v3.s }[0], [x27], #12 +# CHECK-NEXT: 3. 1 7.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 13.0 0.0 0.0 ld3 { v1.s, v2.s, v3.s }[0], [x27], x28 +# CHECK-NEXT: 5. 1 13.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 19.0 0.0 0.0 ld3 { v1.d, v2.d, v3.d }[0], [x27], #24 +# CHECK-NEXT: 7. 1 19.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 25.0 0.0 0.0 ld3 { v1.d, v2.d, v3.d }[0], [x27], x28 +# CHECK-NEXT: 9. 1 25.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 1 13.0 0.1 3.0 # CHECK: [32] Code Region - G33 @@ -2657,7 +2657,7 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 508 # CHECK-NEXT: Total uOps: 2500 -# CHECK: Dispatch Width: 10 +# CHECK: Dispatch Width: 5 # CHECK-NEXT: uOps Per Cycle: 4.92 # CHECK-NEXT: IPC: 1.97 # CHECK-NEXT: Block RThroughput: 5.0 @@ -2668,14 +2668,14 @@ add x0, x27, 1 # CHECK: [0,0] DeeeeeeER . . ld3r { v1.1d, v2.1d, v3.1d }, [x27], #24 # CHECK-NEXT: [0,1] D=eE----R . . add x0, x27, #1 -# CHECK-NEXT: [0,2] D=eeeeeeER. . ld3r { v1.2d, v2.2d, v3.2d }, [x27], #24 -# CHECK-NEXT: [0,3] D==eE----R. . add x0, x27, #1 -# CHECK-NEXT: [0,4] .D=eeeeeeER . ld3r { v1.2s, v2.2s, v3.2s }, [x27], #12 -# CHECK-NEXT: [0,5] .D==eE----R . add x0, x27, #1 -# CHECK-NEXT: [0,6] .D==eeeeeeER. ld3r { v1.4h, v2.4h, v3.4h }, [x27], #6 -# CHECK-NEXT: [0,7] .D===eE----R. add x0, x27, #1 -# CHECK-NEXT: [0,8] . D==eeeeeeER ld3r { v1.4s, v2.4s, v3.4s }, [x27], #12 -# CHECK-NEXT: [0,9] . D===eE----R add x0, x27, #1 +# CHECK-NEXT: [0,2] .DeeeeeeER. . ld3r { v1.2d, v2.2d, v3.2d }, [x27], #24 +# CHECK-NEXT: [0,3] .D=eE----R. . add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeeeeeER . ld3r { v1.2s, v2.2s, v3.2s }, [x27], #12 +# CHECK-NEXT: [0,5] . D=eE----R . add x0, x27, #1 +# CHECK-NEXT: [0,6] . DeeeeeeER. ld3r { v1.4h, v2.4h, v3.4h }, [x27], #6 +# CHECK-NEXT: [0,7] . D=eE----R. add x0, x27, #1 +# CHECK-NEXT: [0,8] . DeeeeeeER ld3r { v1.4s, v2.4s, v3.4s }, [x27], #12 +# CHECK-NEXT: [0,9] . D=eE----R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -2686,15 +2686,15 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld3r { v1.1d, v2.1d, v3.1d }, [x27], #24 # CHECK-NEXT: 1. 1 2.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld3r { v1.2d, v2.2d, v3.2d }, [x27], #24 -# CHECK-NEXT: 3. 1 3.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 2.0 0.0 0.0 ld3r { v1.2s, v2.2s, v3.2s }, [x27], #12 -# CHECK-NEXT: 5. 1 3.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ld3r { v1.4h, v2.4h, v3.4h }, [x27], #6 -# CHECK-NEXT: 7. 1 4.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 3.0 0.0 0.0 ld3r { v1.4s, v2.4s, v3.4s }, [x27], #12 -# CHECK-NEXT: 9. 1 4.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 1 2.7 0.1 2.0 +# CHECK-NEXT: 2. 1 1.0 0.0 0.0 ld3r { v1.2d, v2.2d, v3.2d }, [x27], #24 +# CHECK-NEXT: 3. 1 2.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 0.0 0.0 ld3r { v1.2s, v2.2s, v3.2s }, [x27], #12 +# CHECK-NEXT: 5. 1 2.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 0.0 0.0 ld3r { v1.4h, v2.4h, v3.4h }, [x27], #6 +# CHECK-NEXT: 7. 1 2.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 0.0 0.0 ld3r { v1.4s, v2.4s, v3.4s }, [x27], #12 +# CHECK-NEXT: 9. 1 2.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.5 0.1 2.0 # CHECK: [33] Code Region - G34 @@ -2703,7 +2703,7 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 508 # CHECK-NEXT: Total uOps: 2500 -# CHECK: Dispatch Width: 10 +# CHECK: Dispatch Width: 5 # CHECK-NEXT: uOps Per Cycle: 4.92 # CHECK-NEXT: IPC: 1.97 # CHECK-NEXT: Block RThroughput: 5.0 @@ -2714,14 +2714,14 @@ add x0, x27, 1 # CHECK: [0,0] DeeeeeeER . . ld3r { v1.8b, v2.8b, v3.8b }, [x27], #3 # CHECK-NEXT: [0,1] D=eE----R . . add x0, x27, #1 -# CHECK-NEXT: [0,2] D=eeeeeeER. . ld3r { v1.8h, v2.8h, v3.8h }, [x27], #6 -# CHECK-NEXT: [0,3] D==eE----R. . add x0, x27, #1 -# CHECK-NEXT: [0,4] .D=eeeeeeER . ld3r { v1.16b, v2.16b, v3.16b }, [x27], #3 -# CHECK-NEXT: [0,5] .D==eE----R . add x0, x27, #1 -# CHECK-NEXT: [0,6] .D==eeeeeeER. ld3r { v1.1d, v2.1d, v3.1d }, [x27], x28 -# CHECK-NEXT: [0,7] .D===eE----R. add x0, x27, #1 -# CHECK-NEXT: [0,8] . D==eeeeeeER ld3r { v1.2d, v2.2d, v3.2d }, [x27], x28 -# CHECK-NEXT: [0,9] . D===eE----R add x0, x27, #1 +# CHECK-NEXT: [0,2] .DeeeeeeER. . ld3r { v1.8h, v2.8h, v3.8h }, [x27], #6 +# CHECK-NEXT: [0,3] .D=eE----R. . add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeeeeeER . ld3r { v1.16b, v2.16b, v3.16b }, [x27], #3 +# CHECK-NEXT: [0,5] . D=eE----R . add x0, x27, #1 +# CHECK-NEXT: [0,6] . DeeeeeeER. ld3r { v1.1d, v2.1d, v3.1d }, [x27], x28 +# CHECK-NEXT: [0,7] . D=eE----R. add x0, x27, #1 +# CHECK-NEXT: [0,8] . DeeeeeeER ld3r { v1.2d, v2.2d, v3.2d }, [x27], x28 +# CHECK-NEXT: [0,9] . D=eE----R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -2732,15 +2732,15 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld3r { v1.8b, v2.8b, v3.8b }, [x27], #3 # CHECK-NEXT: 1. 1 2.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld3r { v1.8h, v2.8h, v3.8h }, [x27], #6 -# CHECK-NEXT: 3. 1 3.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 2.0 0.0 0.0 ld3r { v1.16b, v2.16b, v3.16b }, [x27], #3 -# CHECK-NEXT: 5. 1 3.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ld3r { v1.1d, v2.1d, v3.1d }, [x27], x28 -# CHECK-NEXT: 7. 1 4.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 3.0 0.0 0.0 ld3r { v1.2d, v2.2d, v3.2d }, [x27], x28 -# CHECK-NEXT: 9. 1 4.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 1 2.7 0.1 2.0 +# CHECK-NEXT: 2. 1 1.0 0.0 0.0 ld3r { v1.8h, v2.8h, v3.8h }, [x27], #6 +# CHECK-NEXT: 3. 1 2.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 0.0 0.0 ld3r { v1.16b, v2.16b, v3.16b }, [x27], #3 +# CHECK-NEXT: 5. 1 2.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 0.0 0.0 ld3r { v1.1d, v2.1d, v3.1d }, [x27], x28 +# CHECK-NEXT: 7. 1 2.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 0.0 0.0 ld3r { v1.2d, v2.2d, v3.2d }, [x27], x28 +# CHECK-NEXT: 9. 1 2.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.5 0.1 2.0 # CHECK: [34] Code Region - G35 @@ -2749,7 +2749,7 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 508 # CHECK-NEXT: Total uOps: 2500 -# CHECK: Dispatch Width: 10 +# CHECK: Dispatch Width: 5 # CHECK-NEXT: uOps Per Cycle: 4.92 # CHECK-NEXT: IPC: 1.97 # CHECK-NEXT: Block RThroughput: 5.0 @@ -2760,14 +2760,14 @@ add x0, x27, 1 # CHECK: [0,0] DeeeeeeER . . ld3r { v1.2s, v2.2s, v3.2s }, [x27], x28 # CHECK-NEXT: [0,1] D=eE----R . . add x0, x27, #1 -# CHECK-NEXT: [0,2] D=eeeeeeER. . ld3r { v1.4h, v2.4h, v3.4h }, [x27], x28 -# CHECK-NEXT: [0,3] D==eE----R. . add x0, x27, #1 -# CHECK-NEXT: [0,4] .D=eeeeeeER . ld3r { v1.4s, v2.4s, v3.4s }, [x27], x28 -# CHECK-NEXT: [0,5] .D==eE----R . add x0, x27, #1 -# CHECK-NEXT: [0,6] .D==eeeeeeER. ld3r { v1.8b, v2.8b, v3.8b }, [x27], x28 -# CHECK-NEXT: [0,7] .D===eE----R. add x0, x27, #1 -# CHECK-NEXT: [0,8] . D==eeeeeeER ld3r { v1.8h, v2.8h, v3.8h }, [x27], x28 -# CHECK-NEXT: [0,9] . D===eE----R add x0, x27, #1 +# CHECK-NEXT: [0,2] .DeeeeeeER. . ld3r { v1.4h, v2.4h, v3.4h }, [x27], x28 +# CHECK-NEXT: [0,3] .D=eE----R. . add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeeeeeER . ld3r { v1.4s, v2.4s, v3.4s }, [x27], x28 +# CHECK-NEXT: [0,5] . D=eE----R . add x0, x27, #1 +# CHECK-NEXT: [0,6] . DeeeeeeER. ld3r { v1.8b, v2.8b, v3.8b }, [x27], x28 +# CHECK-NEXT: [0,7] . D=eE----R. add x0, x27, #1 +# CHECK-NEXT: [0,8] . DeeeeeeER ld3r { v1.8h, v2.8h, v3.8h }, [x27], x28 +# CHECK-NEXT: [0,9] . D=eE----R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -2778,42 +2778,42 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld3r { v1.2s, v2.2s, v3.2s }, [x27], x28 # CHECK-NEXT: 1. 1 2.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld3r { v1.4h, v2.4h, v3.4h }, [x27], x28 -# CHECK-NEXT: 3. 1 3.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 2.0 0.0 0.0 ld3r { v1.4s, v2.4s, v3.4s }, [x27], x28 -# CHECK-NEXT: 5. 1 3.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ld3r { v1.8b, v2.8b, v3.8b }, [x27], x28 -# CHECK-NEXT: 7. 1 4.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 3.0 0.0 0.0 ld3r { v1.8h, v2.8h, v3.8h }, [x27], x28 -# CHECK-NEXT: 9. 1 4.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 1 2.7 0.1 2.0 +# CHECK-NEXT: 2. 1 1.0 0.0 0.0 ld3r { v1.4h, v2.4h, v3.4h }, [x27], x28 +# CHECK-NEXT: 3. 1 2.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 0.0 0.0 ld3r { v1.4s, v2.4s, v3.4s }, [x27], x28 +# CHECK-NEXT: 5. 1 2.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 0.0 0.0 ld3r { v1.8b, v2.8b, v3.8b }, [x27], x28 +# CHECK-NEXT: 7. 1 2.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 0.0 0.0 ld3r { v1.8h, v2.8h, v3.8h }, [x27], x28 +# CHECK-NEXT: 9. 1 2.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.5 0.1 2.0 # CHECK: [35] Code Region - G36 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 809 +# CHECK-NEXT: Total Cycles: 909 # CHECK-NEXT: Total uOps: 4500 -# CHECK: Dispatch Width: 10 -# CHECK-NEXT: uOps Per Cycle: 5.56 -# CHECK-NEXT: IPC: 1.24 -# CHECK-NEXT: Block RThroughput: 8.0 +# CHECK: Dispatch Width: 5 +# CHECK-NEXT: uOps Per Cycle: 4.95 +# CHECK-NEXT: IPC: 1.10 +# CHECK-NEXT: Block RThroughput: 9.0 # CHECK: Timeline view: -# CHECK-NEXT: 0123456 +# CHECK-NEXT: 01234567 # CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeeeeER . .. ld3r { v1.16b, v2.16b, v3.16b }, [x27], x28 -# CHECK-NEXT: [0,1] D=eE----R . .. add x0, x27, #1 -# CHECK-NEXT: [0,2] .DeeeeeeeeER .. ld4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64 -# CHECK-NEXT: [0,3] .D=eE------R .. add x0, x27, #1 -# CHECK-NEXT: [0,4] . DeeeeeeeeER .. ld4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32 -# CHECK-NEXT: [0,5] . D=eE------R .. add x0, x27, #1 -# CHECK-NEXT: [0,6] . D==eeeeeeeeER. ld4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32 -# CHECK-NEXT: [0,7] . D===eE------R. add x0, x27, #1 -# CHECK-NEXT: [0,8] . D==eeeeeeeeER ld4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64 -# CHECK-NEXT: [0,9] . D===eE------R add x0, x27, #1 +# CHECK: [0,0] DeeeeeeER . . . ld3r { v1.16b, v2.16b, v3.16b }, [x27], x28 +# CHECK-NEXT: [0,1] D=eE----R . . . add x0, x27, #1 +# CHECK-NEXT: [0,2] .DeeeeeeeeER . . ld4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64 +# CHECK-NEXT: [0,3] . DeE------R . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeeeeeeeER . . ld4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32 +# CHECK-NEXT: [0,5] . DeE------R . . add x0, x27, #1 +# CHECK-NEXT: [0,6] . DeeeeeeeeER . ld4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32 +# CHECK-NEXT: [0,7] . .DeE------R . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . DeeeeeeeeER ld4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64 +# CHECK-NEXT: [0,9] . . DeE------R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -2825,23 +2825,23 @@ add x0, x27, 1 # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld3r { v1.16b, v2.16b, v3.16b }, [x27], x28 # CHECK-NEXT: 1. 1 2.0 0.0 4.0 add x0, x27, #1 # CHECK-NEXT: 2. 1 1.0 0.0 0.0 ld4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64 -# CHECK-NEXT: 3. 1 2.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 1.0 0.0 0.0 ld4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32 -# CHECK-NEXT: 5. 1 2.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 3.0 2.0 0.0 ld4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32 -# CHECK-NEXT: 7. 1 4.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 3.0 0.0 0.0 ld4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64 -# CHECK-NEXT: 9. 1 4.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 1 2.3 0.3 2.8 +# CHECK-NEXT: 3. 1 1.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 1.0 0.0 ld4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32 +# CHECK-NEXT: 5. 1 1.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 1.0 0.0 ld4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32 +# CHECK-NEXT: 7. 1 1.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 1.0 0.0 ld4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64 +# CHECK-NEXT: 9. 1 1.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.1 0.4 2.8 # CHECK: [36] Code Region - G37 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 1008 +# CHECK-NEXT: Total Cycles: 1009 # CHECK-NEXT: Total uOps: 5000 -# CHECK: Dispatch Width: 10 +# CHECK: Dispatch Width: 5 # CHECK-NEXT: uOps Per Cycle: 4.96 # CHECK-NEXT: IPC: 0.99 # CHECK-NEXT: Block RThroughput: 10.0 @@ -2851,15 +2851,15 @@ add x0, x27, 1 # CHECK-NEXT: Index 0123456789 # CHECK: [0,0] DeeeeeeeeER . . ld4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32 -# CHECK-NEXT: [0,1] D=eE------R . . add x0, x27, #1 -# CHECK-NEXT: [0,2] .DeeeeeeeeER . . ld4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64 -# CHECK-NEXT: [0,3] .D=eE------R . . add x0, x27, #1 -# CHECK-NEXT: [0,4] . D==eeeeeeeeER. . ld4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64 -# CHECK-NEXT: [0,5] . D===eE------R. . add x0, x27, #1 -# CHECK-NEXT: [0,6] . D==eeeeeeeeER . ld4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28 -# CHECK-NEXT: [0,7] . D===eE------R . add x0, x27, #1 -# CHECK-NEXT: [0,8] . D====eeeeeeeeER ld4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28 -# CHECK-NEXT: [0,9] . D=====eE------R add x0, x27, #1 +# CHECK-NEXT: [0,1] .DeE------R . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeeeeeeeER . . ld4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64 +# CHECK-NEXT: [0,3] . DeE------R . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeeeeeeeER. . ld4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64 +# CHECK-NEXT: [0,5] . DeE------R. . add x0, x27, #1 +# CHECK-NEXT: [0,6] . .DeeeeeeeeER . ld4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28 +# CHECK-NEXT: [0,7] . . DeE------R . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . DeeeeeeeeER ld4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28 +# CHECK-NEXT: [0,9] . . DeE------R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -2869,25 +2869,25 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32 -# CHECK-NEXT: 1. 1 2.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 1.0 0.0 0.0 ld4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64 -# CHECK-NEXT: 3. 1 2.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 3.0 2.0 0.0 ld4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64 -# CHECK-NEXT: 5. 1 4.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ld4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28 -# CHECK-NEXT: 7. 1 4.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 5.0 2.0 0.0 ld4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28 -# CHECK-NEXT: 9. 1 6.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 1 3.1 0.5 3.0 +# CHECK-NEXT: 1. 1 1.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 1.0 1.0 0.0 ld4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64 +# CHECK-NEXT: 3. 1 1.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 1.0 0.0 ld4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64 +# CHECK-NEXT: 5. 1 1.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 1.0 0.0 ld4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28 +# CHECK-NEXT: 7. 1 1.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 1.0 0.0 ld4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28 +# CHECK-NEXT: 9. 1 1.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.0 0.5 3.0 # CHECK: [37] Code Region - G38 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 1008 +# CHECK-NEXT: Total Cycles: 1009 # CHECK-NEXT: Total uOps: 5000 -# CHECK: Dispatch Width: 10 +# CHECK: Dispatch Width: 5 # CHECK-NEXT: uOps Per Cycle: 4.96 # CHECK-NEXT: IPC: 0.99 # CHECK-NEXT: Block RThroughput: 10.0 @@ -2897,15 +2897,15 @@ add x0, x27, 1 # CHECK-NEXT: Index 0123456789 # CHECK: [0,0] DeeeeeeeeER . . ld4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28 -# CHECK-NEXT: [0,1] D=eE------R . . add x0, x27, #1 -# CHECK-NEXT: [0,2] .DeeeeeeeeER . . ld4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28 -# CHECK-NEXT: [0,3] .D=eE------R . . add x0, x27, #1 -# CHECK-NEXT: [0,4] . D==eeeeeeeeER. . ld4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28 -# CHECK-NEXT: [0,5] . D===eE------R. . add x0, x27, #1 -# CHECK-NEXT: [0,6] . D==eeeeeeeeER . ld4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28 -# CHECK-NEXT: [0,7] . D===eE------R . add x0, x27, #1 -# CHECK-NEXT: [0,8] . D====eeeeeeeeER ld4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28 -# CHECK-NEXT: [0,9] . D=====eE------R add x0, x27, #1 +# CHECK-NEXT: [0,1] .DeE------R . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeeeeeeeER . . ld4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28 +# CHECK-NEXT: [0,3] . DeE------R . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeeeeeeeER. . ld4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28 +# CHECK-NEXT: [0,5] . DeE------R. . add x0, x27, #1 +# CHECK-NEXT: [0,6] . .DeeeeeeeeER . ld4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28 +# CHECK-NEXT: [0,7] . . DeE------R . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . DeeeeeeeeER ld4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28 +# CHECK-NEXT: [0,9] . . DeE------R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -2915,16 +2915,16 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28 -# CHECK-NEXT: 1. 1 2.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 1.0 0.0 0.0 ld4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28 -# CHECK-NEXT: 3. 1 2.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 3.0 2.0 0.0 ld4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28 -# CHECK-NEXT: 5. 1 4.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ld4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28 -# CHECK-NEXT: 7. 1 4.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 5.0 2.0 0.0 ld4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28 -# CHECK-NEXT: 9. 1 6.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 1 3.1 0.5 3.0 +# CHECK-NEXT: 1. 1 1.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 1.0 1.0 0.0 ld4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28 +# CHECK-NEXT: 3. 1 1.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 1.0 0.0 ld4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28 +# CHECK-NEXT: 5. 1 1.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 1.0 0.0 ld4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28 +# CHECK-NEXT: 7. 1 1.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 1.0 0.0 ld4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28 +# CHECK-NEXT: 9. 1 1.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.0 0.5 3.0 # CHECK: [38] Code Region - G39 @@ -2933,7 +2933,7 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 4003 # CHECK-NEXT: Total uOps: 5000 -# CHECK: Dispatch Width: 10 +# CHECK: Dispatch Width: 5 # CHECK-NEXT: uOps Per Cycle: 1.25 # CHECK-NEXT: IPC: 0.25 # CHECK-NEXT: Block RThroughput: 10.0 @@ -2943,15 +2943,15 @@ add x0, x27, 1 # CHECK-NEXT: Index 0123456789 0123456789 012 # CHECK: [0,0] DeeeeeeeeER . . . . . . . ld4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], #4 -# CHECK-NEXT: [0,1] D=eE------R . . . . . . . add x0, x27, #1 -# CHECK-NEXT: [0,2] .D=======eeeeeeeeER . . . . . . ld4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], #4 -# CHECK-NEXT: [0,3] .D========eE------R . . . . . . add x0, x27, #1 -# CHECK-NEXT: [0,4] . D==============eeeeeeeeER . . . . ld4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], x28 -# CHECK-NEXT: [0,5] . D===============eE------R . . . . add x0, x27, #1 -# CHECK-NEXT: [0,6] . D=====================eeeeeeeeER. . . ld4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], x28 -# CHECK-NEXT: [0,7] . D======================eE------R. . . add x0, x27, #1 -# CHECK-NEXT: [0,8] . D============================eeeeeeeeER ld4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], #8 -# CHECK-NEXT: [0,9] . D=============================eE------R add x0, x27, #1 +# CHECK-NEXT: [0,1] .DeE------R . . . . . . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . D======eeeeeeeeER . . . . . . ld4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], #4 +# CHECK-NEXT: [0,3] . D======eE------R . . . . . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . D============eeeeeeeeER . . . . ld4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], x28 +# CHECK-NEXT: [0,5] . D============eE------R . . . . add x0, x27, #1 +# CHECK-NEXT: [0,6] . .D==================eeeeeeeeER. . . ld4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], x28 +# CHECK-NEXT: [0,7] . . D==================eE------R. . . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . D========================eeeeeeeeER ld4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], #8 +# CHECK-NEXT: [0,9] . . D========================eE------R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -2961,16 +2961,16 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], #4 -# CHECK-NEXT: 1. 1 2.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 8.0 0.0 0.0 ld4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], #4 -# CHECK-NEXT: 3. 1 9.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 15.0 0.0 0.0 ld4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], x28 -# CHECK-NEXT: 5. 1 16.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 22.0 0.0 0.0 ld4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], x28 -# CHECK-NEXT: 7. 1 23.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 29.0 0.0 0.0 ld4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], #8 -# CHECK-NEXT: 9. 1 30.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 1 15.5 0.1 3.0 +# CHECK-NEXT: 1. 1 1.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 7.0 0.0 0.0 ld4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], #4 +# CHECK-NEXT: 3. 1 7.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 13.0 0.0 0.0 ld4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], x28 +# CHECK-NEXT: 5. 1 13.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 19.0 0.0 0.0 ld4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], x28 +# CHECK-NEXT: 7. 1 19.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 25.0 0.0 0.0 ld4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], #8 +# CHECK-NEXT: 9. 1 25.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 1 13.0 0.1 3.0 # CHECK: [39] Code Region - G40 @@ -2979,7 +2979,7 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 4003 # CHECK-NEXT: Total uOps: 5000 -# CHECK: Dispatch Width: 10 +# CHECK: Dispatch Width: 5 # CHECK-NEXT: uOps Per Cycle: 1.25 # CHECK-NEXT: IPC: 0.25 # CHECK-NEXT: Block RThroughput: 10.0 @@ -2989,15 +2989,15 @@ add x0, x27, 1 # CHECK-NEXT: Index 0123456789 0123456789 012 # CHECK: [0,0] DeeeeeeeeER . . . . . . . ld4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], #8 -# CHECK-NEXT: [0,1] D=eE------R . . . . . . . add x0, x27, #1 -# CHECK-NEXT: [0,2] .D=======eeeeeeeeER . . . . . . ld4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], x28 -# CHECK-NEXT: [0,3] .D========eE------R . . . . . . add x0, x27, #1 -# CHECK-NEXT: [0,4] . D==============eeeeeeeeER . . . . ld4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], x28 -# CHECK-NEXT: [0,5] . D===============eE------R . . . . add x0, x27, #1 -# CHECK-NEXT: [0,6] . D=====================eeeeeeeeER. . . ld4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], #16 -# CHECK-NEXT: [0,7] . D======================eE------R. . . add x0, x27, #1 -# CHECK-NEXT: [0,8] . D============================eeeeeeeeER ld4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], x28 -# CHECK-NEXT: [0,9] . D=============================eE------R add x0, x27, #1 +# CHECK-NEXT: [0,1] .DeE------R . . . . . . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . D======eeeeeeeeER . . . . . . ld4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], x28 +# CHECK-NEXT: [0,3] . D======eE------R . . . . . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . D============eeeeeeeeER . . . . ld4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], x28 +# CHECK-NEXT: [0,5] . D============eE------R . . . . add x0, x27, #1 +# CHECK-NEXT: [0,6] . .D==================eeeeeeeeER. . . ld4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], #16 +# CHECK-NEXT: [0,7] . . D==================eE------R. . . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . D========================eeeeeeeeER ld4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], x28 +# CHECK-NEXT: [0,9] . . D========================eE------R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -3007,16 +3007,16 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], #8 -# CHECK-NEXT: 1. 1 2.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 8.0 0.0 0.0 ld4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], x28 -# CHECK-NEXT: 3. 1 9.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 15.0 0.0 0.0 ld4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], x28 -# CHECK-NEXT: 5. 1 16.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 22.0 0.0 0.0 ld4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], #16 -# CHECK-NEXT: 7. 1 23.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 29.0 0.0 0.0 ld4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], x28 -# CHECK-NEXT: 9. 1 30.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 1 15.5 0.1 3.0 +# CHECK-NEXT: 1. 1 1.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 7.0 0.0 0.0 ld4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], x28 +# CHECK-NEXT: 3. 1 7.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 13.0 0.0 0.0 ld4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], x28 +# CHECK-NEXT: 5. 1 13.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 19.0 0.0 0.0 ld4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], #16 +# CHECK-NEXT: 7. 1 19.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 25.0 0.0 0.0 ld4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], x28 +# CHECK-NEXT: 9. 1 25.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 1 13.0 0.1 3.0 # CHECK: [40] Code Region - G41 @@ -3025,25 +3025,25 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 2103 # CHECK-NEXT: Total uOps: 4900 -# CHECK: Dispatch Width: 10 +# CHECK: Dispatch Width: 5 # CHECK-NEXT: uOps Per Cycle: 2.33 # CHECK-NEXT: IPC: 0.48 -# CHECK-NEXT: Block RThroughput: 9.5 +# CHECK-NEXT: Block RThroughput: 9.8 # CHECK: Timeline view: # CHECK-NEXT: 0123456789 # CHECK-NEXT: Index 0123456789 0123 # CHECK: [0,0] DeeeeeeeeER . . . ld4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], #32 -# CHECK-NEXT: [0,1] D=eE------R . . . add x0, x27, #1 -# CHECK-NEXT: [0,2] .D=======eeeeeeeeER . . ld4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], x28 -# CHECK-NEXT: [0,3] .D========eE------R . . add x0, x27, #1 -# CHECK-NEXT: [0,4] . D=======eeeeeeeeER. . ld4r { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32 -# CHECK-NEXT: [0,5] . D========eE------R. . add x0, x27, #1 -# CHECK-NEXT: [0,6] . D=========eeeeeeeeER. ld4r { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #32 -# CHECK-NEXT: [0,7] . D==========eE------R. add x0, x27, #1 -# CHECK-NEXT: [0,8] . D=========eeeeeeeeER ld4r { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #16 -# CHECK-NEXT: [0,9] . D==========eE------R add x0, x27, #1 +# CHECK-NEXT: [0,1] .DeE------R . . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . D======eeeeeeeeER . . ld4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], x28 +# CHECK-NEXT: [0,3] . D======eE------R . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . D=====eeeeeeeeER. . ld4r { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32 +# CHECK-NEXT: [0,5] . D=====eE------R. . add x0, x27, #1 +# CHECK-NEXT: [0,6] . .D======eeeeeeeeER. ld4r { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #32 +# CHECK-NEXT: [0,7] . . D======eE------R. add x0, x27, #1 +# CHECK-NEXT: [0,8] . . D=====eeeeeeeeER ld4r { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #16 +# CHECK-NEXT: [0,9] . . D=====eE------R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -3053,43 +3053,43 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], #32 -# CHECK-NEXT: 1. 1 2.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 8.0 0.0 0.0 ld4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], x28 -# CHECK-NEXT: 3. 1 9.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 8.0 0.0 0.0 ld4r { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32 -# CHECK-NEXT: 5. 1 9.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 10.0 2.0 0.0 ld4r { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #32 -# CHECK-NEXT: 7. 1 11.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 10.0 0.0 0.0 ld4r { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #16 -# CHECK-NEXT: 9. 1 11.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 1 7.9 0.3 3.0 +# CHECK-NEXT: 1. 1 1.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 7.0 0.0 0.0 ld4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], x28 +# CHECK-NEXT: 3. 1 7.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 6.0 0.0 0.0 ld4r { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32 +# CHECK-NEXT: 5. 1 6.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 7.0 2.0 0.0 ld4r { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #32 +# CHECK-NEXT: 7. 1 7.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 6.0 0.0 0.0 ld4r { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #16 +# CHECK-NEXT: 9. 1 6.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 1 5.4 0.3 3.0 # CHECK: [41] Code Region - G42 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 759 +# CHECK-NEXT: Total Cycles: 1009 # CHECK-NEXT: Total uOps: 4500 -# CHECK: Dispatch Width: 10 -# CHECK-NEXT: uOps Per Cycle: 5.93 -# CHECK-NEXT: IPC: 1.32 -# CHECK-NEXT: Block RThroughput: 7.5 +# CHECK: Dispatch Width: 5 +# CHECK-NEXT: uOps Per Cycle: 4.46 +# CHECK-NEXT: IPC: 0.99 +# CHECK-NEXT: Block RThroughput: 9.0 # CHECK: Timeline view: -# CHECK-NEXT: 0123456 +# CHECK-NEXT: 012345678 # CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeeeeeeER .. ld4r { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #8 -# CHECK-NEXT: [0,1] D=eE------R .. add x0, x27, #1 -# CHECK-NEXT: [0,2] .DeeeeeeeeER .. ld4r { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #16 -# CHECK-NEXT: [0,3] .D=eE------R .. add x0, x27, #1 -# CHECK-NEXT: [0,4] . D=eeeeeeeeER .. ld4r { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #4 -# CHECK-NEXT: [0,5] . D==eE------R .. add x0, x27, #1 -# CHECK-NEXT: [0,6] . D=eeeeeeeeER.. ld4r { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #8 -# CHECK-NEXT: [0,7] . D==eE------R.. add x0, x27, #1 -# CHECK-NEXT: [0,8] . D==eeeeeeeeER ld4r { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #4 -# CHECK-NEXT: [0,9] . D===eE------R add x0, x27, #1 +# CHECK: [0,0] DeeeeeeeeER . . ld4r { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #8 +# CHECK-NEXT: [0,1] .DeE------R . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeeeeeeeER . . ld4r { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #16 +# CHECK-NEXT: [0,3] . DeE------R . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeeeeeeeER. . ld4r { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #4 +# CHECK-NEXT: [0,5] . DeE------R. . add x0, x27, #1 +# CHECK-NEXT: [0,6] . .DeeeeeeeeER . ld4r { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #8 +# CHECK-NEXT: [0,7] . . DeE------R . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . DeeeeeeeeER ld4r { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #4 +# CHECK-NEXT: [0,9] . . DeE------R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -3099,43 +3099,43 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld4r { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #8 -# CHECK-NEXT: 1. 1 2.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 1.0 0.0 0.0 ld4r { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #16 -# CHECK-NEXT: 3. 1 2.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 2.0 1.0 0.0 ld4r { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #4 -# CHECK-NEXT: 5. 1 3.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 2.0 0.0 0.0 ld4r { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #8 -# CHECK-NEXT: 7. 1 3.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 3.0 1.0 0.0 ld4r { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #4 -# CHECK-NEXT: 9. 1 4.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 1 2.3 0.3 3.0 +# CHECK-NEXT: 1. 1 1.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 1.0 1.0 0.0 ld4r { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #16 +# CHECK-NEXT: 3. 1 1.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 1.0 0.0 ld4r { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #4 +# CHECK-NEXT: 5. 1 1.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 1.0 0.0 ld4r { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #8 +# CHECK-NEXT: 7. 1 1.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 1.0 0.0 ld4r { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #4 +# CHECK-NEXT: 9. 1 1.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.0 0.5 3.0 # CHECK: [42] Code Region - G43 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 859 +# CHECK-NEXT: Total Cycles: 1009 # CHECK-NEXT: Total uOps: 4700 -# CHECK: Dispatch Width: 10 -# CHECK-NEXT: uOps Per Cycle: 5.47 -# CHECK-NEXT: IPC: 1.16 -# CHECK-NEXT: Block RThroughput: 8.5 +# CHECK: Dispatch Width: 5 +# CHECK-NEXT: uOps Per Cycle: 4.66 +# CHECK-NEXT: IPC: 0.99 +# CHECK-NEXT: Block RThroughput: 9.4 # CHECK: Timeline view: -# CHECK-NEXT: 01234567 +# CHECK-NEXT: 012345678 # CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeeeeeeER . . ld4r { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28 -# CHECK-NEXT: [0,1] D=eE------R . . add x0, x27, #1 -# CHECK-NEXT: [0,2] .DeeeeeeeeER . . ld4r { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28 -# CHECK-NEXT: [0,3] .D=eE------R . . add x0, x27, #1 -# CHECK-NEXT: [0,4] . D==eeeeeeeeER. . ld4r { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28 -# CHECK-NEXT: [0,5] . D===eE------R. . add x0, x27, #1 -# CHECK-NEXT: [0,6] . D==eeeeeeeeER . ld4r { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28 -# CHECK-NEXT: [0,7] . D===eE------R . add x0, x27, #1 -# CHECK-NEXT: [0,8] . D===eeeeeeeeER ld4r { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28 -# CHECK-NEXT: [0,9] . D====eE------R add x0, x27, #1 +# CHECK: [0,0] DeeeeeeeeER . . ld4r { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28 +# CHECK-NEXT: [0,1] .DeE------R . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeeeeeeeER . . ld4r { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28 +# CHECK-NEXT: [0,3] . DeE------R . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeeeeeeeER. . ld4r { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28 +# CHECK-NEXT: [0,5] . DeE------R. . add x0, x27, #1 +# CHECK-NEXT: [0,6] . .DeeeeeeeeER . ld4r { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28 +# CHECK-NEXT: [0,7] . . DeE------R . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . DeeeeeeeeER ld4r { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28 +# CHECK-NEXT: [0,9] . . DeE------R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -3145,43 +3145,43 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld4r { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28 -# CHECK-NEXT: 1. 1 2.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 1.0 0.0 0.0 ld4r { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28 -# CHECK-NEXT: 3. 1 2.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 3.0 2.0 0.0 ld4r { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28 -# CHECK-NEXT: 5. 1 4.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ld4r { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28 -# CHECK-NEXT: 7. 1 4.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 4.0 1.0 0.0 ld4r { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28 -# CHECK-NEXT: 9. 1 5.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 1 2.9 0.4 3.0 +# CHECK-NEXT: 1. 1 1.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 1.0 1.0 0.0 ld4r { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28 +# CHECK-NEXT: 3. 1 1.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 1.0 0.0 ld4r { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28 +# CHECK-NEXT: 5. 1 1.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 1.0 0.0 ld4r { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28 +# CHECK-NEXT: 7. 1 1.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 1.0 0.0 ld4r { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28 +# CHECK-NEXT: 9. 1 1.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.0 0.5 3.0 # CHECK: [43] Code Region - G44 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 608 +# CHECK-NEXT: Total Cycles: 1007 # CHECK-NEXT: Total uOps: 3900 -# CHECK: Dispatch Width: 10 -# CHECK-NEXT: uOps Per Cycle: 6.41 -# CHECK-NEXT: IPC: 1.64 -# CHECK-NEXT: Block RThroughput: 5.3 +# CHECK: Dispatch Width: 5 +# CHECK-NEXT: uOps Per Cycle: 3.87 +# CHECK-NEXT: IPC: 0.99 +# CHECK-NEXT: Block RThroughput: 7.8 # CHECK: Timeline view: -# CHECK-NEXT: 0123 +# CHECK-NEXT: 0123456 # CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeeeeeeER . ld4r { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28 -# CHECK-NEXT: [0,1] D=eE------R . add x0, x27, #1 -# CHECK-NEXT: [0,2] .DeeeeeeeeER . ld4r { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28 -# CHECK-NEXT: [0,3] .D=eE------R . add x0, x27, #1 -# CHECK-NEXT: [0,4] . D=eeeeeeeeER ld4r { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28 -# CHECK-NEXT: [0,5] . D==eE------R add x0, x27, #1 -# CHECK-NEXT: [0,6] . D=eeeeeeE-R ldp s1, s2, [x27], #248 -# CHECK-NEXT: [0,7] . D==eE-----R add x0, x27, #1 -# CHECK-NEXT: [0,8] . D=eeeeeeER ldp d1, d2, [x27], #496 -# CHECK-NEXT: [0,9] . D==eE----R add x0, x27, #1 +# CHECK: [0,0] DeeeeeeeeER .. ld4r { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28 +# CHECK-NEXT: [0,1] .DeE------R .. add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeeeeeeeER .. ld4r { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28 +# CHECK-NEXT: [0,3] . DeE------R .. add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeeeeeeeER.. ld4r { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28 +# CHECK-NEXT: [0,5] . DeE------R.. add x0, x27, #1 +# CHECK-NEXT: [0,6] . .DeeeeeeER.. ldp s1, s2, [x27], #248 +# CHECK-NEXT: [0,7] . . DeE----R.. add x0, x27, #1 +# CHECK-NEXT: [0,8] . . DeeeeeeER ldp d1, d2, [x27], #496 +# CHECK-NEXT: [0,9] . . DeE----R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -3191,43 +3191,43 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld4r { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28 -# CHECK-NEXT: 1. 1 2.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 1.0 0.0 0.0 ld4r { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28 -# CHECK-NEXT: 3. 1 2.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 2.0 1.0 0.0 ld4r { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28 -# CHECK-NEXT: 5. 1 3.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 2.0 0.0 1.0 ldp s1, s2, [x27], #248 -# CHECK-NEXT: 7. 1 3.0 0.0 5.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 2.0 0.0 0.0 ldp d1, d2, [x27], #496 -# CHECK-NEXT: 9. 1 3.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 1 2.1 0.2 2.8 +# CHECK-NEXT: 1. 1 1.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 1.0 1.0 0.0 ld4r { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28 +# CHECK-NEXT: 3. 1 1.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 1.0 0.0 ld4r { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28 +# CHECK-NEXT: 5. 1 1.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 1.0 0.0 ldp s1, s2, [x27], #248 +# CHECK-NEXT: 7. 1 1.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 1.0 0.0 ldp d1, d2, [x27], #496 +# CHECK-NEXT: 9. 1 1.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.0 0.5 2.6 # CHECK: [44] Code Region - G45 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 507 +# CHECK-NEXT: Total Cycles: 807 # CHECK-NEXT: Total uOps: 2700 -# CHECK: Dispatch Width: 10 -# CHECK-NEXT: uOps Per Cycle: 5.33 -# CHECK-NEXT: IPC: 1.97 -# CHECK-NEXT: Block RThroughput: 4.5 +# CHECK: Dispatch Width: 5 +# CHECK-NEXT: uOps Per Cycle: 3.35 +# CHECK-NEXT: IPC: 1.24 +# CHECK-NEXT: Block RThroughput: 5.4 # CHECK: Timeline view: -# CHECK-NEXT: 01 +# CHECK-NEXT: 01234 # CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeeeeER .. ldp q1, q2, [x27], #992 -# CHECK-NEXT: [0,1] D=eE----R .. add x0, x27, #1 -# CHECK-NEXT: [0,2] .DeeeeeeER.. ldp s1, s2, [x27, #248]! -# CHECK-NEXT: [0,3] .D=eE----R.. add x0, x27, #1 -# CHECK-NEXT: [0,4] . DeeeeeeER. ldp d1, d2, [x27, #496]! -# CHECK-NEXT: [0,5] . D=eE----R. add x0, x27, #1 -# CHECK-NEXT: [0,6] . DeeeeeeER ldp q1, q2, [x27, #992]! -# CHECK-NEXT: [0,7] . D=eE----R add x0, x27, #1 -# CHECK-NEXT: [0,8] . D=eeeeE-R ldp w1, w2, [x27], #248 -# CHECK-NEXT: [0,9] . D==eE---R add x0, x27, #1 +# CHECK: [0,0] DeeeeeeER . . ldp q1, q2, [x27], #992 +# CHECK-NEXT: [0,1] .DeE----R . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeeeeeER . ldp s1, s2, [x27, #248]! +# CHECK-NEXT: [0,3] . DeE----R . add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeeeeeER . ldp d1, d2, [x27, #496]! +# CHECK-NEXT: [0,5] . DeE----R . add x0, x27, #1 +# CHECK-NEXT: [0,6] . .DeeeeeeER ldp q1, q2, [x27, #992]! +# CHECK-NEXT: [0,7] . . DeE----R add x0, x27, #1 +# CHECK-NEXT: [0,8] . . DeeeeE-R ldp w1, w2, [x27], #248 +# CHECK-NEXT: [0,9] . . D=eE---R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -3237,16 +3237,16 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ldp q1, q2, [x27], #992 -# CHECK-NEXT: 1. 1 2.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 1.0 0.0 0.0 ldp s1, s2, [x27, #248]! -# CHECK-NEXT: 3. 1 2.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 1.0 0.0 0.0 ldp d1, d2, [x27, #496]! -# CHECK-NEXT: 5. 1 2.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 1.0 0.0 0.0 ldp q1, q2, [x27, #992]! -# CHECK-NEXT: 7. 1 2.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 2.0 0.0 1.0 ldp w1, w2, [x27], #248 -# CHECK-NEXT: 9. 1 3.0 0.0 3.0 add x0, x27, #1 -# CHECK-NEXT: 1 1.7 0.1 2.0 +# CHECK-NEXT: 1. 1 1.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 1.0 1.0 0.0 ldp s1, s2, [x27, #248]! +# CHECK-NEXT: 3. 1 1.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 1.0 0.0 ldp d1, d2, [x27, #496]! +# CHECK-NEXT: 5. 1 1.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 1.0 0.0 ldp q1, q2, [x27, #992]! +# CHECK-NEXT: 7. 1 1.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 0.0 1.0 ldp w1, w2, [x27], #248 +# CHECK-NEXT: 9. 1 2.0 0.0 3.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.1 0.4 2.0 # CHECK: [45] Code Region - G46 @@ -3255,10 +3255,10 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 506 # CHECK-NEXT: Total uOps: 2100 -# CHECK: Dispatch Width: 10 +# CHECK: Dispatch Width: 5 # CHECK-NEXT: uOps Per Cycle: 4.15 # CHECK-NEXT: IPC: 1.98 -# CHECK-NEXT: Block RThroughput: 3.0 +# CHECK-NEXT: Block RThroughput: 4.2 # CHECK: Timeline view: # CHECK-NEXT: 0 @@ -3266,14 +3266,14 @@ add x0, x27, 1 # CHECK: [0,0] DeeeeER . ldp x1, x2, [x27], #496 # CHECK-NEXT: [0,1] D=eE--R . add x0, x27, #1 -# CHECK-NEXT: [0,2] D=eeeeER . ldp w1, w2, [x27, #248]! -# CHECK-NEXT: [0,3] D==eE--R . add x0, x27, #1 -# CHECK-NEXT: [0,4] D==eeeeER . ldp x1, x2, [x27, #496]! -# CHECK-NEXT: [0,5] .D==eE--R . add x0, x27, #1 -# CHECK-NEXT: [0,6] .D==eeeeER. ldpsw x1, x2, [x27], #248 -# CHECK-NEXT: [0,7] .D===eE--R. add x0, x27, #1 -# CHECK-NEXT: [0,8] .D===eeeeER ldpsw x1, x2, [x27, #248]! -# CHECK-NEXT: [0,9] . D===eE--R add x0, x27, #1 +# CHECK-NEXT: [0,2] .DeeeeER . ldp w1, w2, [x27, #248]! +# CHECK-NEXT: [0,3] .D=eE--R . add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeeeER . ldp x1, x2, [x27, #496]! +# CHECK-NEXT: [0,5] . D=eE--R . add x0, x27, #1 +# CHECK-NEXT: [0,6] . DeeeeER. ldpsw x1, x2, [x27], #248 +# CHECK-NEXT: [0,7] . D=eE--R. add x0, x27, #1 +# CHECK-NEXT: [0,8] . DeeeeER ldpsw x1, x2, [x27, #248]! +# CHECK-NEXT: [0,9] . D=eE--R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -3284,15 +3284,15 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ldp x1, x2, [x27], #496 # CHECK-NEXT: 1. 1 2.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ldp w1, w2, [x27, #248]! -# CHECK-NEXT: 3. 1 3.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ldp x1, x2, [x27, #496]! -# CHECK-NEXT: 5. 1 3.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ldpsw x1, x2, [x27], #248 -# CHECK-NEXT: 7. 1 4.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 4.0 0.0 0.0 ldpsw x1, x2, [x27, #248]! -# CHECK-NEXT: 9. 1 4.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 1 2.9 0.1 1.0 +# CHECK-NEXT: 2. 1 1.0 0.0 0.0 ldp w1, w2, [x27, #248]! +# CHECK-NEXT: 3. 1 2.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 0.0 0.0 ldp x1, x2, [x27, #496]! +# CHECK-NEXT: 5. 1 2.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 0.0 0.0 ldpsw x1, x2, [x27], #248 +# CHECK-NEXT: 7. 1 2.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 0.0 0.0 ldpsw x1, x2, [x27, #248]! +# CHECK-NEXT: 9. 1 2.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.5 0.1 1.0 # CHECK: [46] Code Region - G47 @@ -3301,10 +3301,10 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 508 # CHECK-NEXT: Total uOps: 1500 -# CHECK: Dispatch Width: 10 +# CHECK: Dispatch Width: 5 # CHECK-NEXT: uOps Per Cycle: 2.95 # CHECK-NEXT: IPC: 1.97 -# CHECK-NEXT: Block RThroughput: 2.5 +# CHECK-NEXT: Block RThroughput: 3.0 # CHECK: Timeline view: # CHECK-NEXT: 012 @@ -3313,13 +3313,13 @@ add x0, x27, 1 # CHECK: [0,0] DeeeeeeER . . ldr b1, [x27], #254 # CHECK-NEXT: [0,1] D=eE----R . . add x0, x27, #1 # CHECK-NEXT: [0,2] D=eeeeeeER. . ldr h1, [x27], #254 -# CHECK-NEXT: [0,3] D==eE----R. . add x0, x27, #1 -# CHECK-NEXT: [0,4] D==eeeeeeER . ldr s1, [x27], #254 -# CHECK-NEXT: [0,5] D===eE----R . add x0, x27, #1 -# CHECK-NEXT: [0,6] .D==eeeeeeER. ldr d1, [x27], #254 -# CHECK-NEXT: [0,7] .D===eE----R. add x0, x27, #1 -# CHECK-NEXT: [0,8] .D===eeeeeeER ldr q1, [x27], #254 -# CHECK-NEXT: [0,9] .D====eE----R add x0, x27, #1 +# CHECK-NEXT: [0,3] .D=eE----R. . add x0, x27, #1 +# CHECK-NEXT: [0,4] .D=eeeeeeER . ldr s1, [x27], #254 +# CHECK-NEXT: [0,5] .D==eE----R . add x0, x27, #1 +# CHECK-NEXT: [0,6] . D=eeeeeeER. ldr d1, [x27], #254 +# CHECK-NEXT: [0,7] . D==eE----R. add x0, x27, #1 +# CHECK-NEXT: [0,8] . D==eeeeeeER ldr q1, [x27], #254 +# CHECK-NEXT: [0,9] . D==eE----R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -3331,14 +3331,14 @@ add x0, x27, 1 # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ldr b1, [x27], #254 # CHECK-NEXT: 1. 1 2.0 0.0 4.0 add x0, x27, #1 # CHECK-NEXT: 2. 1 2.0 0.0 0.0 ldr h1, [x27], #254 -# CHECK-NEXT: 3. 1 3.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ldr s1, [x27], #254 -# CHECK-NEXT: 5. 1 4.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ldr d1, [x27], #254 -# CHECK-NEXT: 7. 1 4.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 4.0 0.0 0.0 ldr q1, [x27], #254 -# CHECK-NEXT: 9. 1 5.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 1 3.1 0.1 2.0 +# CHECK-NEXT: 3. 1 2.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 2.0 0.0 0.0 ldr s1, [x27], #254 +# CHECK-NEXT: 5. 1 3.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 2.0 0.0 0.0 ldr d1, [x27], #254 +# CHECK-NEXT: 7. 1 3.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 3.0 0.0 0.0 ldr q1, [x27], #254 +# CHECK-NEXT: 9. 1 3.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 1 2.3 0.1 2.0 # CHECK: [47] Code Region - G48 @@ -3347,10 +3347,10 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 508 # CHECK-NEXT: Total uOps: 1500 -# CHECK: Dispatch Width: 10 +# CHECK: Dispatch Width: 5 # CHECK-NEXT: uOps Per Cycle: 2.95 # CHECK-NEXT: IPC: 1.97 -# CHECK-NEXT: Block RThroughput: 2.5 +# CHECK-NEXT: Block RThroughput: 3.0 # CHECK: Timeline view: # CHECK-NEXT: 012 @@ -3359,13 +3359,13 @@ add x0, x27, 1 # CHECK: [0,0] DeeeeeeER . . ldr b1, [x27, #254]! # CHECK-NEXT: [0,1] D=eE----R . . add x0, x27, #1 # CHECK-NEXT: [0,2] D=eeeeeeER. . ldr h1, [x27, #254]! -# CHECK-NEXT: [0,3] D==eE----R. . add x0, x27, #1 -# CHECK-NEXT: [0,4] D==eeeeeeER . ldr s1, [x27, #254]! -# CHECK-NEXT: [0,5] D===eE----R . add x0, x27, #1 -# CHECK-NEXT: [0,6] .D==eeeeeeER. ldr d1, [x27, #254]! -# CHECK-NEXT: [0,7] .D===eE----R. add x0, x27, #1 -# CHECK-NEXT: [0,8] .D===eeeeeeER ldr q1, [x27, #254]! -# CHECK-NEXT: [0,9] .D====eE----R add x0, x27, #1 +# CHECK-NEXT: [0,3] .D=eE----R. . add x0, x27, #1 +# CHECK-NEXT: [0,4] .D=eeeeeeER . ldr s1, [x27, #254]! +# CHECK-NEXT: [0,5] .D==eE----R . add x0, x27, #1 +# CHECK-NEXT: [0,6] . D=eeeeeeER. ldr d1, [x27, #254]! +# CHECK-NEXT: [0,7] . D==eE----R. add x0, x27, #1 +# CHECK-NEXT: [0,8] . D==eeeeeeER ldr q1, [x27, #254]! +# CHECK-NEXT: [0,9] . D==eE----R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -3377,14 +3377,14 @@ add x0, x27, 1 # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ldr b1, [x27, #254]! # CHECK-NEXT: 1. 1 2.0 0.0 4.0 add x0, x27, #1 # CHECK-NEXT: 2. 1 2.0 0.0 0.0 ldr h1, [x27, #254]! -# CHECK-NEXT: 3. 1 3.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ldr s1, [x27, #254]! -# CHECK-NEXT: 5. 1 4.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ldr d1, [x27, #254]! -# CHECK-NEXT: 7. 1 4.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 4.0 0.0 0.0 ldr q1, [x27, #254]! -# CHECK-NEXT: 9. 1 5.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 1 3.1 0.1 2.0 +# CHECK-NEXT: 3. 1 2.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 2.0 0.0 0.0 ldr s1, [x27, #254]! +# CHECK-NEXT: 5. 1 3.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 2.0 0.0 0.0 ldr d1, [x27, #254]! +# CHECK-NEXT: 7. 1 3.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 3.0 0.0 0.0 ldr q1, [x27, #254]! +# CHECK-NEXT: 9. 1 3.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 1 2.3 0.1 2.0 # CHECK: [48] Code Region - G49 @@ -3393,10 +3393,10 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 506 # CHECK-NEXT: Total uOps: 1500 -# CHECK: Dispatch Width: 10 +# CHECK: Dispatch Width: 5 # CHECK-NEXT: uOps Per Cycle: 2.96 # CHECK-NEXT: IPC: 1.98 -# CHECK-NEXT: Block RThroughput: 2.5 +# CHECK-NEXT: Block RThroughput: 3.0 # CHECK: Timeline view: # CHECK-NEXT: 0 @@ -3405,13 +3405,13 @@ add x0, x27, 1 # CHECK: [0,0] DeeeeER . ldr w1, [x27], #254 # CHECK-NEXT: [0,1] D=eE--R . add x0, x27, #1 # CHECK-NEXT: [0,2] D=eeeeER . ldr x1, [x27], #254 -# CHECK-NEXT: [0,3] D==eE--R . add x0, x27, #1 -# CHECK-NEXT: [0,4] D==eeeeER . ldr w1, [x27, #254]! -# CHECK-NEXT: [0,5] D===eE--R . add x0, x27, #1 -# CHECK-NEXT: [0,6] .D==eeeeER. ldr x1, [x27, #254]! -# CHECK-NEXT: [0,7] .D===eE--R. add x0, x27, #1 -# CHECK-NEXT: [0,8] .D===eeeeER ldrb w1, [x27], #254 -# CHECK-NEXT: [0,9] .D====eE--R add x0, x27, #1 +# CHECK-NEXT: [0,3] .D=eE--R . add x0, x27, #1 +# CHECK-NEXT: [0,4] .D=eeeeER . ldr w1, [x27, #254]! +# CHECK-NEXT: [0,5] .D==eE--R . add x0, x27, #1 +# CHECK-NEXT: [0,6] . D=eeeeER. ldr x1, [x27, #254]! +# CHECK-NEXT: [0,7] . D==eE--R. add x0, x27, #1 +# CHECK-NEXT: [0,8] . D==eeeeER ldrb w1, [x27], #254 +# CHECK-NEXT: [0,9] . D==eE--R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -3423,14 +3423,14 @@ add x0, x27, 1 # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ldr w1, [x27], #254 # CHECK-NEXT: 1. 1 2.0 0.0 2.0 add x0, x27, #1 # CHECK-NEXT: 2. 1 2.0 0.0 0.0 ldr x1, [x27], #254 -# CHECK-NEXT: 3. 1 3.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ldr w1, [x27, #254]! -# CHECK-NEXT: 5. 1 4.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ldr x1, [x27, #254]! -# CHECK-NEXT: 7. 1 4.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 4.0 0.0 0.0 ldrb w1, [x27], #254 -# CHECK-NEXT: 9. 1 5.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 1 3.1 0.1 1.0 +# CHECK-NEXT: 3. 1 2.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 2.0 0.0 0.0 ldr w1, [x27, #254]! +# CHECK-NEXT: 5. 1 3.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 2.0 0.0 0.0 ldr x1, [x27, #254]! +# CHECK-NEXT: 7. 1 3.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 3.0 0.0 0.0 ldrb w1, [x27], #254 +# CHECK-NEXT: 9. 1 3.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 1 2.3 0.1 1.0 # CHECK: [49] Code Region - G50 @@ -3439,10 +3439,10 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 506 # CHECK-NEXT: Total uOps: 1500 -# CHECK: Dispatch Width: 10 +# CHECK: Dispatch Width: 5 # CHECK-NEXT: uOps Per Cycle: 2.96 # CHECK-NEXT: IPC: 1.98 -# CHECK-NEXT: Block RThroughput: 2.5 +# CHECK-NEXT: Block RThroughput: 3.0 # CHECK: Timeline view: # CHECK-NEXT: 0 @@ -3451,13 +3451,13 @@ add x0, x27, 1 # CHECK: [0,0] DeeeeER . ldrb w1, [x27, #254]! # CHECK-NEXT: [0,1] D=eE--R . add x0, x27, #1 # CHECK-NEXT: [0,2] D=eeeeER . ldrh w1, [x27], #254 -# CHECK-NEXT: [0,3] D==eE--R . add x0, x27, #1 -# CHECK-NEXT: [0,4] D==eeeeER . ldrh w1, [x27, #254]! -# CHECK-NEXT: [0,5] D===eE--R . add x0, x27, #1 -# CHECK-NEXT: [0,6] .D==eeeeER. ldrsb w1, [x27], #254 -# CHECK-NEXT: [0,7] .D===eE--R. add x0, x27, #1 -# CHECK-NEXT: [0,8] .D===eeeeER ldrsb x1, [x27], #254 -# CHECK-NEXT: [0,9] .D====eE--R add x0, x27, #1 +# CHECK-NEXT: [0,3] .D=eE--R . add x0, x27, #1 +# CHECK-NEXT: [0,4] .D=eeeeER . ldrh w1, [x27, #254]! +# CHECK-NEXT: [0,5] .D==eE--R . add x0, x27, #1 +# CHECK-NEXT: [0,6] . D=eeeeER. ldrsb w1, [x27], #254 +# CHECK-NEXT: [0,7] . D==eE--R. add x0, x27, #1 +# CHECK-NEXT: [0,8] . D==eeeeER ldrsb x1, [x27], #254 +# CHECK-NEXT: [0,9] . D==eE--R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -3469,14 +3469,14 @@ add x0, x27, 1 # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ldrb w1, [x27, #254]! # CHECK-NEXT: 1. 1 2.0 0.0 2.0 add x0, x27, #1 # CHECK-NEXT: 2. 1 2.0 0.0 0.0 ldrh w1, [x27], #254 -# CHECK-NEXT: 3. 1 3.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ldrh w1, [x27, #254]! -# CHECK-NEXT: 5. 1 4.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ldrsb w1, [x27], #254 -# CHECK-NEXT: 7. 1 4.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 4.0 0.0 0.0 ldrsb x1, [x27], #254 -# CHECK-NEXT: 9. 1 5.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 1 3.1 0.1 1.0 +# CHECK-NEXT: 3. 1 2.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 2.0 0.0 0.0 ldrh w1, [x27, #254]! +# CHECK-NEXT: 5. 1 3.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 2.0 0.0 0.0 ldrsb w1, [x27], #254 +# CHECK-NEXT: 7. 1 3.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 3.0 0.0 0.0 ldrsb x1, [x27], #254 +# CHECK-NEXT: 9. 1 3.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 1 2.3 0.1 1.0 # CHECK: [50] Code Region - G51 @@ -3485,10 +3485,10 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 506 # CHECK-NEXT: Total uOps: 1500 -# CHECK: Dispatch Width: 10 +# CHECK: Dispatch Width: 5 # CHECK-NEXT: uOps Per Cycle: 2.96 # CHECK-NEXT: IPC: 1.98 -# CHECK-NEXT: Block RThroughput: 2.5 +# CHECK-NEXT: Block RThroughput: 3.0 # CHECK: Timeline view: # CHECK-NEXT: 0 @@ -3497,13 +3497,13 @@ add x0, x27, 1 # CHECK: [0,0] DeeeeER . ldrsb w1, [x27, #254]! # CHECK-NEXT: [0,1] D=eE--R . add x0, x27, #1 # CHECK-NEXT: [0,2] D=eeeeER . ldrsb x1, [x27, #254]! -# CHECK-NEXT: [0,3] D==eE--R . add x0, x27, #1 -# CHECK-NEXT: [0,4] D==eeeeER . ldrsh w1, [x27], #254 -# CHECK-NEXT: [0,5] D===eE--R . add x0, x27, #1 -# CHECK-NEXT: [0,6] .D==eeeeER. ldrsh x1, [x27], #254 -# CHECK-NEXT: [0,7] .D===eE--R. add x0, x27, #1 -# CHECK-NEXT: [0,8] .D===eeeeER ldrsh w1, [x27, #254]! -# CHECK-NEXT: [0,9] .D====eE--R add x0, x27, #1 +# CHECK-NEXT: [0,3] .D=eE--R . add x0, x27, #1 +# CHECK-NEXT: [0,4] .D=eeeeER . ldrsh w1, [x27], #254 +# CHECK-NEXT: [0,5] .D==eE--R . add x0, x27, #1 +# CHECK-NEXT: [0,6] . D=eeeeER. ldrsh x1, [x27], #254 +# CHECK-NEXT: [0,7] . D==eE--R. add x0, x27, #1 +# CHECK-NEXT: [0,8] . D==eeeeER ldrsh w1, [x27, #254]! +# CHECK-NEXT: [0,9] . D==eE--R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -3515,14 +3515,14 @@ add x0, x27, 1 # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ldrsb w1, [x27, #254]! # CHECK-NEXT: 1. 1 2.0 0.0 2.0 add x0, x27, #1 # CHECK-NEXT: 2. 1 2.0 0.0 0.0 ldrsb x1, [x27, #254]! -# CHECK-NEXT: 3. 1 3.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ldrsh w1, [x27], #254 -# CHECK-NEXT: 5. 1 4.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ldrsh x1, [x27], #254 -# CHECK-NEXT: 7. 1 4.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 4.0 0.0 0.0 ldrsh w1, [x27, #254]! -# CHECK-NEXT: 9. 1 5.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 1 3.1 0.1 1.0 +# CHECK-NEXT: 3. 1 2.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 2.0 0.0 0.0 ldrsh w1, [x27], #254 +# CHECK-NEXT: 5. 1 3.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 2.0 0.0 0.0 ldrsh x1, [x27], #254 +# CHECK-NEXT: 7. 1 3.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 3.0 0.0 0.0 ldrsh w1, [x27, #254]! +# CHECK-NEXT: 9. 1 3.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 1 2.3 0.1 1.0 # CHECK: [51] Code Region - G52 @@ -3531,10 +3531,10 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 504 # CHECK-NEXT: Total uOps: 1700 -# CHECK: Dispatch Width: 10 +# CHECK: Dispatch Width: 5 # CHECK-NEXT: uOps Per Cycle: 3.37 # CHECK-NEXT: IPC: 1.98 -# CHECK-NEXT: Block RThroughput: 2.5 +# CHECK-NEXT: Block RThroughput: 3.4 # CHECK: Timeline view: # CHECK-NEXT: Index 012345678 @@ -3542,13 +3542,13 @@ add x0, x27, 1 # CHECK: [0,0] DeeeeER . ldrsh x1, [x27, #254]! # CHECK-NEXT: [0,1] D=eE--R . add x0, x27, #1 # CHECK-NEXT: [0,2] D=eeeeER. ldrsw x1, [x27], #254 -# CHECK-NEXT: [0,3] D==eE--R. add x0, x27, #1 -# CHECK-NEXT: [0,4] D==eeeeER ldrsw x1, [x27, #254]! -# CHECK-NEXT: [0,5] D===eE--R add x0, x27, #1 -# CHECK-NEXT: [0,6] .D==eeE-R st1 { v1.1d }, [x27], #8 -# CHECK-NEXT: [0,7] .D===eE-R add x0, x27, #1 -# CHECK-NEXT: [0,8] .D===eeER st1 { v1.2d }, [x27], #16 -# CHECK-NEXT: [0,9] .D====eER add x0, x27, #1 +# CHECK-NEXT: [0,3] .D=eE--R. add x0, x27, #1 +# CHECK-NEXT: [0,4] .D=eeeeER ldrsw x1, [x27, #254]! +# CHECK-NEXT: [0,5] .D==eE--R add x0, x27, #1 +# CHECK-NEXT: [0,6] . D=eeE-R st1 { v1.1d }, [x27], #8 +# CHECK-NEXT: [0,7] . D==eE-R add x0, x27, #1 +# CHECK-NEXT: [0,8] . D=eeER st1 { v1.2d }, [x27], #16 +# CHECK-NEXT: [0,9] . D==eER add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -3560,14 +3560,14 @@ add x0, x27, 1 # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ldrsh x1, [x27, #254]! # CHECK-NEXT: 1. 1 2.0 0.0 2.0 add x0, x27, #1 # CHECK-NEXT: 2. 1 2.0 0.0 0.0 ldrsw x1, [x27], #254 -# CHECK-NEXT: 3. 1 3.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ldrsw x1, [x27, #254]! -# CHECK-NEXT: 5. 1 4.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 3.0 0.0 1.0 st1 { v1.1d }, [x27], #8 -# CHECK-NEXT: 7. 1 4.0 0.0 1.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 4.0 0.0 0.0 st1 { v1.2d }, [x27], #16 -# CHECK-NEXT: 9. 1 5.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 3.1 0.1 0.8 +# CHECK-NEXT: 3. 1 2.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 2.0 0.0 0.0 ldrsw x1, [x27, #254]! +# CHECK-NEXT: 5. 1 3.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 2.0 0.0 1.0 st1 { v1.1d }, [x27], #8 +# CHECK-NEXT: 7. 1 3.0 0.0 1.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 2.0 0.0 0.0 st1 { v1.2d }, [x27], #16 +# CHECK-NEXT: 9. 1 3.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 1 2.2 0.1 0.8 # CHECK: [52] Code Region - G53 @@ -3576,24 +3576,24 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 504 # CHECK-NEXT: Total uOps: 2000 -# CHECK: Dispatch Width: 10 +# CHECK: Dispatch Width: 5 # CHECK-NEXT: uOps Per Cycle: 3.97 # CHECK-NEXT: IPC: 1.98 -# CHECK-NEXT: Block RThroughput: 2.5 +# CHECK-NEXT: Block RThroughput: 4.0 # CHECK: Timeline view: # CHECK-NEXT: Index 012345678 # CHECK: [0,0] DeeER. . st1 { v1.2s }, [x27], #8 # CHECK-NEXT: [0,1] D=eER. . add x0, x27, #1 -# CHECK-NEXT: [0,2] D=eeER . st1 { v1.4h }, [x27], #8 -# CHECK-NEXT: [0,3] D==eER . add x0, x27, #1 -# CHECK-NEXT: [0,4] .D=eeER . st1 { v1.4s }, [x27], #16 -# CHECK-NEXT: [0,5] .D==eER . add x0, x27, #1 -# CHECK-NEXT: [0,6] .D==eeER. st1 { v1.8b }, [x27], #8 -# CHECK-NEXT: [0,7] .D===eER. add x0, x27, #1 -# CHECK-NEXT: [0,8] . D==eeER st1 { v1.8h }, [x27], #16 -# CHECK-NEXT: [0,9] . D===eER add x0, x27, #1 +# CHECK-NEXT: [0,2] .DeeER . st1 { v1.4h }, [x27], #8 +# CHECK-NEXT: [0,3] .D=eER . add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeER . st1 { v1.4s }, [x27], #16 +# CHECK-NEXT: [0,5] . D=eER . add x0, x27, #1 +# CHECK-NEXT: [0,6] . DeeER. st1 { v1.8b }, [x27], #8 +# CHECK-NEXT: [0,7] . D=eER. add x0, x27, #1 +# CHECK-NEXT: [0,8] . DeeER st1 { v1.8h }, [x27], #16 +# CHECK-NEXT: [0,9] . D=eER add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -3604,15 +3604,15 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.2s }, [x27], #8 # CHECK-NEXT: 1. 1 2.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st1 { v1.4h }, [x27], #8 -# CHECK-NEXT: 3. 1 3.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 2.0 0.0 0.0 st1 { v1.4s }, [x27], #16 -# CHECK-NEXT: 5. 1 3.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 3.0 0.0 0.0 st1 { v1.8b }, [x27], #8 -# CHECK-NEXT: 7. 1 4.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 3.0 0.0 0.0 st1 { v1.8h }, [x27], #16 -# CHECK-NEXT: 9. 1 4.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 2.7 0.1 0.0 +# CHECK-NEXT: 2. 1 1.0 0.0 0.0 st1 { v1.4h }, [x27], #8 +# CHECK-NEXT: 3. 1 2.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 0.0 0.0 st1 { v1.4s }, [x27], #16 +# CHECK-NEXT: 5. 1 2.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 0.0 0.0 st1 { v1.8b }, [x27], #8 +# CHECK-NEXT: 7. 1 2.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 0.0 0.0 st1 { v1.8h }, [x27], #16 +# CHECK-NEXT: 9. 1 2.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.5 0.1 0.0 # CHECK: [53] Code Region - G54 @@ -3621,24 +3621,24 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 504 # CHECK-NEXT: Total uOps: 2000 -# CHECK: Dispatch Width: 10 +# CHECK: Dispatch Width: 5 # CHECK-NEXT: uOps Per Cycle: 3.97 # CHECK-NEXT: IPC: 1.98 -# CHECK-NEXT: Block RThroughput: 2.5 +# CHECK-NEXT: Block RThroughput: 4.0 # CHECK: Timeline view: # CHECK-NEXT: Index 012345678 # CHECK: [0,0] DeeER. . st1 { v1.16b }, [x27], #16 # CHECK-NEXT: [0,1] D=eER. . add x0, x27, #1 -# CHECK-NEXT: [0,2] D=eeER . st1 { v1.1d }, [x27], x28 -# CHECK-NEXT: [0,3] D==eER . add x0, x27, #1 -# CHECK-NEXT: [0,4] .D=eeER . st1 { v1.2d }, [x27], x28 -# CHECK-NEXT: [0,5] .D==eER . add x0, x27, #1 -# CHECK-NEXT: [0,6] .D==eeER. st1 { v1.2s }, [x27], x28 -# CHECK-NEXT: [0,7] .D===eER. add x0, x27, #1 -# CHECK-NEXT: [0,8] . D==eeER st1 { v1.4h }, [x27], x28 -# CHECK-NEXT: [0,9] . D===eER add x0, x27, #1 +# CHECK-NEXT: [0,2] .DeeER . st1 { v1.1d }, [x27], x28 +# CHECK-NEXT: [0,3] .D=eER . add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeER . st1 { v1.2d }, [x27], x28 +# CHECK-NEXT: [0,5] . D=eER . add x0, x27, #1 +# CHECK-NEXT: [0,6] . DeeER. st1 { v1.2s }, [x27], x28 +# CHECK-NEXT: [0,7] . D=eER. add x0, x27, #1 +# CHECK-NEXT: [0,8] . DeeER st1 { v1.4h }, [x27], x28 +# CHECK-NEXT: [0,9] . D=eER add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -3649,15 +3649,15 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.16b }, [x27], #16 # CHECK-NEXT: 1. 1 2.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st1 { v1.1d }, [x27], x28 -# CHECK-NEXT: 3. 1 3.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 2.0 0.0 0.0 st1 { v1.2d }, [x27], x28 -# CHECK-NEXT: 5. 1 3.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 3.0 0.0 0.0 st1 { v1.2s }, [x27], x28 -# CHECK-NEXT: 7. 1 4.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 3.0 0.0 0.0 st1 { v1.4h }, [x27], x28 -# CHECK-NEXT: 9. 1 4.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 2.7 0.1 0.0 +# CHECK-NEXT: 2. 1 1.0 0.0 0.0 st1 { v1.1d }, [x27], x28 +# CHECK-NEXT: 3. 1 2.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 0.0 0.0 st1 { v1.2d }, [x27], x28 +# CHECK-NEXT: 5. 1 2.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 0.0 0.0 st1 { v1.2s }, [x27], x28 +# CHECK-NEXT: 7. 1 2.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 0.0 0.0 st1 { v1.4h }, [x27], x28 +# CHECK-NEXT: 9. 1 2.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.5 0.1 0.0 # CHECK: [54] Code Region - G55 @@ -3666,24 +3666,24 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 504 # CHECK-NEXT: Total uOps: 2000 -# CHECK: Dispatch Width: 10 +# CHECK: Dispatch Width: 5 # CHECK-NEXT: uOps Per Cycle: 3.97 # CHECK-NEXT: IPC: 1.98 -# CHECK-NEXT: Block RThroughput: 2.5 +# CHECK-NEXT: Block RThroughput: 4.0 # CHECK: Timeline view: # CHECK-NEXT: Index 012345678 # CHECK: [0,0] DeeER. . st1 { v1.4s }, [x27], x28 # CHECK-NEXT: [0,1] D=eER. . add x0, x27, #1 -# CHECK-NEXT: [0,2] D=eeER . st1 { v1.8b }, [x27], x28 -# CHECK-NEXT: [0,3] D==eER . add x0, x27, #1 -# CHECK-NEXT: [0,4] .D=eeER . st1 { v1.8h }, [x27], x28 -# CHECK-NEXT: [0,5] .D==eER . add x0, x27, #1 -# CHECK-NEXT: [0,6] .D==eeER. st1 { v1.16b }, [x27], x28 -# CHECK-NEXT: [0,7] .D===eER. add x0, x27, #1 -# CHECK-NEXT: [0,8] . D==eeER st1 { v1.1d, v2.1d }, [x27], #16 -# CHECK-NEXT: [0,9] . D===eER add x0, x27, #1 +# CHECK-NEXT: [0,2] .DeeER . st1 { v1.8b }, [x27], x28 +# CHECK-NEXT: [0,3] .D=eER . add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeER . st1 { v1.8h }, [x27], x28 +# CHECK-NEXT: [0,5] . D=eER . add x0, x27, #1 +# CHECK-NEXT: [0,6] . DeeER. st1 { v1.16b }, [x27], x28 +# CHECK-NEXT: [0,7] . D=eER. add x0, x27, #1 +# CHECK-NEXT: [0,8] . DeeER st1 { v1.1d, v2.1d }, [x27], #16 +# CHECK-NEXT: [0,9] . D=eER add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -3694,15 +3694,15 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.4s }, [x27], x28 # CHECK-NEXT: 1. 1 2.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st1 { v1.8b }, [x27], x28 -# CHECK-NEXT: 3. 1 3.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 2.0 0.0 0.0 st1 { v1.8h }, [x27], x28 -# CHECK-NEXT: 5. 1 3.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 3.0 0.0 0.0 st1 { v1.16b }, [x27], x28 -# CHECK-NEXT: 7. 1 4.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 3.0 0.0 0.0 st1 { v1.1d, v2.1d }, [x27], #16 -# CHECK-NEXT: 9. 1 4.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 2.7 0.1 0.0 +# CHECK-NEXT: 2. 1 1.0 0.0 0.0 st1 { v1.8b }, [x27], x28 +# CHECK-NEXT: 3. 1 2.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 0.0 0.0 st1 { v1.8h }, [x27], x28 +# CHECK-NEXT: 5. 1 2.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 0.0 0.0 st1 { v1.16b }, [x27], x28 +# CHECK-NEXT: 7. 1 2.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 0.0 0.0 st1 { v1.1d, v2.1d }, [x27], #16 +# CHECK-NEXT: 9. 1 2.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.5 0.1 0.0 # CHECK: [55] Code Region - G56 @@ -3711,24 +3711,24 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 504 # CHECK-NEXT: Total uOps: 2000 -# CHECK: Dispatch Width: 10 +# CHECK: Dispatch Width: 5 # CHECK-NEXT: uOps Per Cycle: 3.97 # CHECK-NEXT: IPC: 1.98 -# CHECK-NEXT: Block RThroughput: 2.5 +# CHECK-NEXT: Block RThroughput: 4.0 # CHECK: Timeline view: # CHECK-NEXT: Index 012345678 # CHECK: [0,0] DeeER. . st1 { v1.2d, v2.2d }, [x27], #32 # CHECK-NEXT: [0,1] D=eER. . add x0, x27, #1 -# CHECK-NEXT: [0,2] D=eeER . st1 { v1.2s, v2.2s }, [x27], #16 -# CHECK-NEXT: [0,3] D==eER . add x0, x27, #1 -# CHECK-NEXT: [0,4] .D=eeER . st1 { v1.4h, v2.4h }, [x27], #16 -# CHECK-NEXT: [0,5] .D==eER . add x0, x27, #1 -# CHECK-NEXT: [0,6] .D==eeER. st1 { v1.4s, v2.4s }, [x27], #32 -# CHECK-NEXT: [0,7] .D===eER. add x0, x27, #1 -# CHECK-NEXT: [0,8] . D==eeER st1 { v1.8b, v2.8b }, [x27], #16 -# CHECK-NEXT: [0,9] . D===eER add x0, x27, #1 +# CHECK-NEXT: [0,2] .DeeER . st1 { v1.2s, v2.2s }, [x27], #16 +# CHECK-NEXT: [0,3] .D=eER . add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeER . st1 { v1.4h, v2.4h }, [x27], #16 +# CHECK-NEXT: [0,5] . D=eER . add x0, x27, #1 +# CHECK-NEXT: [0,6] . DeeER. st1 { v1.4s, v2.4s }, [x27], #32 +# CHECK-NEXT: [0,7] . D=eER. add x0, x27, #1 +# CHECK-NEXT: [0,8] . DeeER st1 { v1.8b, v2.8b }, [x27], #16 +# CHECK-NEXT: [0,9] . D=eER add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -3739,15 +3739,15 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.2d, v2.2d }, [x27], #32 # CHECK-NEXT: 1. 1 2.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st1 { v1.2s, v2.2s }, [x27], #16 -# CHECK-NEXT: 3. 1 3.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 2.0 0.0 0.0 st1 { v1.4h, v2.4h }, [x27], #16 -# CHECK-NEXT: 5. 1 3.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 3.0 0.0 0.0 st1 { v1.4s, v2.4s }, [x27], #32 -# CHECK-NEXT: 7. 1 4.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 3.0 0.0 0.0 st1 { v1.8b, v2.8b }, [x27], #16 -# CHECK-NEXT: 9. 1 4.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 2.7 0.1 0.0 +# CHECK-NEXT: 2. 1 1.0 0.0 0.0 st1 { v1.2s, v2.2s }, [x27], #16 +# CHECK-NEXT: 3. 1 2.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 0.0 0.0 st1 { v1.4h, v2.4h }, [x27], #16 +# CHECK-NEXT: 5. 1 2.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 0.0 0.0 st1 { v1.4s, v2.4s }, [x27], #32 +# CHECK-NEXT: 7. 1 2.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 0.0 0.0 st1 { v1.8b, v2.8b }, [x27], #16 +# CHECK-NEXT: 9. 1 2.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.5 0.1 0.0 # CHECK: [56] Code Region - G57 @@ -3756,24 +3756,24 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 504 # CHECK-NEXT: Total uOps: 2000 -# CHECK: Dispatch Width: 10 +# CHECK: Dispatch Width: 5 # CHECK-NEXT: uOps Per Cycle: 3.97 # CHECK-NEXT: IPC: 1.98 -# CHECK-NEXT: Block RThroughput: 2.5 +# CHECK-NEXT: Block RThroughput: 4.0 # CHECK: Timeline view: # CHECK-NEXT: Index 012345678 # CHECK: [0,0] DeeER. . st1 { v1.8h, v2.8h }, [x27], #32 # CHECK-NEXT: [0,1] D=eER. . add x0, x27, #1 -# CHECK-NEXT: [0,2] D=eeER . st1 { v1.16b, v2.16b }, [x27], #32 -# CHECK-NEXT: [0,3] D==eER . add x0, x27, #1 -# CHECK-NEXT: [0,4] .D=eeER . st1 { v1.1d, v2.1d }, [x27], x28 -# CHECK-NEXT: [0,5] .D==eER . add x0, x27, #1 -# CHECK-NEXT: [0,6] .D==eeER. st1 { v1.2d, v2.2d }, [x27], x28 -# CHECK-NEXT: [0,7] .D===eER. add x0, x27, #1 -# CHECK-NEXT: [0,8] . D==eeER st1 { v1.2s, v2.2s }, [x27], x28 -# CHECK-NEXT: [0,9] . D===eER add x0, x27, #1 +# CHECK-NEXT: [0,2] .DeeER . st1 { v1.16b, v2.16b }, [x27], #32 +# CHECK-NEXT: [0,3] .D=eER . add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeER . st1 { v1.1d, v2.1d }, [x27], x28 +# CHECK-NEXT: [0,5] . D=eER . add x0, x27, #1 +# CHECK-NEXT: [0,6] . DeeER. st1 { v1.2d, v2.2d }, [x27], x28 +# CHECK-NEXT: [0,7] . D=eER. add x0, x27, #1 +# CHECK-NEXT: [0,8] . DeeER st1 { v1.2s, v2.2s }, [x27], x28 +# CHECK-NEXT: [0,9] . D=eER add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -3784,15 +3784,15 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.8h, v2.8h }, [x27], #32 # CHECK-NEXT: 1. 1 2.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st1 { v1.16b, v2.16b }, [x27], #32 -# CHECK-NEXT: 3. 1 3.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 2.0 0.0 0.0 st1 { v1.1d, v2.1d }, [x27], x28 -# CHECK-NEXT: 5. 1 3.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 3.0 0.0 0.0 st1 { v1.2d, v2.2d }, [x27], x28 -# CHECK-NEXT: 7. 1 4.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 3.0 0.0 0.0 st1 { v1.2s, v2.2s }, [x27], x28 -# CHECK-NEXT: 9. 1 4.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 2.7 0.1 0.0 +# CHECK-NEXT: 2. 1 1.0 0.0 0.0 st1 { v1.16b, v2.16b }, [x27], #32 +# CHECK-NEXT: 3. 1 2.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 0.0 0.0 st1 { v1.1d, v2.1d }, [x27], x28 +# CHECK-NEXT: 5. 1 2.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 0.0 0.0 st1 { v1.2d, v2.2d }, [x27], x28 +# CHECK-NEXT: 7. 1 2.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 0.0 0.0 st1 { v1.2s, v2.2s }, [x27], x28 +# CHECK-NEXT: 9. 1 2.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.5 0.1 0.0 # CHECK: [57] Code Region - G58 @@ -3801,24 +3801,24 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 504 # CHECK-NEXT: Total uOps: 2000 -# CHECK: Dispatch Width: 10 +# CHECK: Dispatch Width: 5 # CHECK-NEXT: uOps Per Cycle: 3.97 # CHECK-NEXT: IPC: 1.98 -# CHECK-NEXT: Block RThroughput: 2.5 +# CHECK-NEXT: Block RThroughput: 4.0 # CHECK: Timeline view: # CHECK-NEXT: Index 012345678 # CHECK: [0,0] DeeER. . st1 { v1.4h, v2.4h }, [x27], x28 # CHECK-NEXT: [0,1] D=eER. . add x0, x27, #1 -# CHECK-NEXT: [0,2] D=eeER . st1 { v1.4s, v2.4s }, [x27], x28 -# CHECK-NEXT: [0,3] D==eER . add x0, x27, #1 -# CHECK-NEXT: [0,4] .D=eeER . st1 { v1.8b, v2.8b }, [x27], x28 -# CHECK-NEXT: [0,5] .D==eER . add x0, x27, #1 -# CHECK-NEXT: [0,6] .D==eeER. st1 { v1.8h, v2.8h }, [x27], x28 -# CHECK-NEXT: [0,7] .D===eER. add x0, x27, #1 -# CHECK-NEXT: [0,8] . D==eeER st1 { v1.16b, v2.16b }, [x27], x28 -# CHECK-NEXT: [0,9] . D===eER add x0, x27, #1 +# CHECK-NEXT: [0,2] .DeeER . st1 { v1.4s, v2.4s }, [x27], x28 +# CHECK-NEXT: [0,3] .D=eER . add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeER . st1 { v1.8b, v2.8b }, [x27], x28 +# CHECK-NEXT: [0,5] . D=eER . add x0, x27, #1 +# CHECK-NEXT: [0,6] . DeeER. st1 { v1.8h, v2.8h }, [x27], x28 +# CHECK-NEXT: [0,7] . D=eER. add x0, x27, #1 +# CHECK-NEXT: [0,8] . DeeER st1 { v1.16b, v2.16b }, [x27], x28 +# CHECK-NEXT: [0,9] . D=eER add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -3829,41 +3829,42 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.4h, v2.4h }, [x27], x28 # CHECK-NEXT: 1. 1 2.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st1 { v1.4s, v2.4s }, [x27], x28 -# CHECK-NEXT: 3. 1 3.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 2.0 0.0 0.0 st1 { v1.8b, v2.8b }, [x27], x28 -# CHECK-NEXT: 5. 1 3.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 3.0 0.0 0.0 st1 { v1.8h, v2.8h }, [x27], x28 -# CHECK-NEXT: 7. 1 4.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 3.0 0.0 0.0 st1 { v1.16b, v2.16b }, [x27], x28 -# CHECK-NEXT: 9. 1 4.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 2.7 0.1 0.0 +# CHECK-NEXT: 2. 1 1.0 0.0 0.0 st1 { v1.4s, v2.4s }, [x27], x28 +# CHECK-NEXT: 3. 1 2.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 0.0 0.0 st1 { v1.8b, v2.8b }, [x27], x28 +# CHECK-NEXT: 5. 1 2.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 0.0 0.0 st1 { v1.8h, v2.8h }, [x27], x28 +# CHECK-NEXT: 7. 1 2.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 0.0 0.0 st1 { v1.16b, v2.16b }, [x27], x28 +# CHECK-NEXT: 9. 1 2.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.5 0.1 0.0 # CHECK: [58] Code Region - G59 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 504 +# CHECK-NEXT: Total Cycles: 1003 # CHECK-NEXT: Total uOps: 3000 -# CHECK: Dispatch Width: 10 -# CHECK-NEXT: uOps Per Cycle: 5.95 -# CHECK-NEXT: IPC: 1.98 -# CHECK-NEXT: Block RThroughput: 5.0 +# CHECK: Dispatch Width: 5 +# CHECK-NEXT: uOps Per Cycle: 2.99 +# CHECK-NEXT: IPC: 1.00 +# CHECK-NEXT: Block RThroughput: 6.0 # CHECK: Timeline view: -# CHECK-NEXT: Index 012345678 +# CHECK-NEXT: 012 +# CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeER. . st1 { v1.1d, v2.1d, v3.1d }, [x27], #24 -# CHECK-NEXT: [0,1] D=eER. . add x0, x27, #1 -# CHECK-NEXT: [0,2] .DeeER . st1 { v1.2d, v2.2d, v3.2d }, [x27], #48 -# CHECK-NEXT: [0,3] .D=eER . add x0, x27, #1 -# CHECK-NEXT: [0,4] . DeeER . st1 { v1.2s, v2.2s, v3.2s }, [x27], #24 -# CHECK-NEXT: [0,5] . D=eER . add x0, x27, #1 -# CHECK-NEXT: [0,6] . DeeER. st1 { v1.4h, v2.4h, v3.4h }, [x27], #24 -# CHECK-NEXT: [0,7] . D=eER. add x0, x27, #1 -# CHECK-NEXT: [0,8] . DeeER st1 { v1.4s, v2.4s, v3.4s }, [x27], #48 -# CHECK-NEXT: [0,9] . D=eER add x0, x27, #1 +# CHECK: [0,0] DeeER. . . st1 { v1.1d, v2.1d, v3.1d }, [x27], #24 +# CHECK-NEXT: [0,1] .DeER. . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeER . . st1 { v1.2d, v2.2d, v3.2d }, [x27], #48 +# CHECK-NEXT: [0,3] . DeER . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeER . . st1 { v1.2s, v2.2s, v3.2s }, [x27], #24 +# CHECK-NEXT: [0,5] . DeER . . add x0, x27, #1 +# CHECK-NEXT: [0,6] . .DeeER . st1 { v1.4h, v2.4h, v3.4h }, [x27], #24 +# CHECK-NEXT: [0,7] . . DeER . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . DeeER st1 { v1.4s, v2.4s, v3.4s }, [x27], #48 +# CHECK-NEXT: [0,9] . . DeER add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -3873,42 +3874,43 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.1d, v2.1d, v3.1d }, [x27], #24 -# CHECK-NEXT: 1. 1 2.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 1.0 0.0 0.0 st1 { v1.2d, v2.2d, v3.2d }, [x27], #48 -# CHECK-NEXT: 3. 1 2.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 1.0 0.0 0.0 st1 { v1.2s, v2.2s, v3.2s }, [x27], #24 -# CHECK-NEXT: 5. 1 2.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 1.0 0.0 0.0 st1 { v1.4h, v2.4h, v3.4h }, [x27], #24 -# CHECK-NEXT: 7. 1 2.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 1.0 0.0 0.0 st1 { v1.4s, v2.4s, v3.4s }, [x27], #48 -# CHECK-NEXT: 9. 1 2.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 1.5 0.1 0.0 +# CHECK-NEXT: 1. 1 1.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 1.0 1.0 0.0 st1 { v1.2d, v2.2d, v3.2d }, [x27], #48 +# CHECK-NEXT: 3. 1 1.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 1.0 0.0 st1 { v1.2s, v2.2s, v3.2s }, [x27], #24 +# CHECK-NEXT: 5. 1 1.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 1.0 0.0 st1 { v1.4h, v2.4h, v3.4h }, [x27], #24 +# CHECK-NEXT: 7. 1 1.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 1.0 0.0 st1 { v1.4s, v2.4s, v3.4s }, [x27], #48 +# CHECK-NEXT: 9. 1 1.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.0 0.5 0.0 # CHECK: [59] Code Region - G60 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 504 +# CHECK-NEXT: Total Cycles: 1003 # CHECK-NEXT: Total uOps: 3000 -# CHECK: Dispatch Width: 10 -# CHECK-NEXT: uOps Per Cycle: 5.95 -# CHECK-NEXT: IPC: 1.98 -# CHECK-NEXT: Block RThroughput: 5.0 +# CHECK: Dispatch Width: 5 +# CHECK-NEXT: uOps Per Cycle: 2.99 +# CHECK-NEXT: IPC: 1.00 +# CHECK-NEXT: Block RThroughput: 6.0 # CHECK: Timeline view: -# CHECK-NEXT: Index 012345678 +# CHECK-NEXT: 012 +# CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeER. . st1 { v1.8b, v2.8b, v3.8b }, [x27], #24 -# CHECK-NEXT: [0,1] D=eER. . add x0, x27, #1 -# CHECK-NEXT: [0,2] .DeeER . st1 { v1.8h, v2.8h, v3.8h }, [x27], #48 -# CHECK-NEXT: [0,3] .D=eER . add x0, x27, #1 -# CHECK-NEXT: [0,4] . DeeER . st1 { v1.16b, v2.16b, v3.16b }, [x27], #48 -# CHECK-NEXT: [0,5] . D=eER . add x0, x27, #1 -# CHECK-NEXT: [0,6] . DeeER. st1 { v1.1d, v2.1d, v3.1d }, [x27], x28 -# CHECK-NEXT: [0,7] . D=eER. add x0, x27, #1 -# CHECK-NEXT: [0,8] . DeeER st1 { v1.2d, v2.2d, v3.2d }, [x27], x28 -# CHECK-NEXT: [0,9] . D=eER add x0, x27, #1 +# CHECK: [0,0] DeeER. . . st1 { v1.8b, v2.8b, v3.8b }, [x27], #24 +# CHECK-NEXT: [0,1] .DeER. . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeER . . st1 { v1.8h, v2.8h, v3.8h }, [x27], #48 +# CHECK-NEXT: [0,3] . DeER . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeER . . st1 { v1.16b, v2.16b, v3.16b }, [x27], #48 +# CHECK-NEXT: [0,5] . DeER . . add x0, x27, #1 +# CHECK-NEXT: [0,6] . .DeeER . st1 { v1.1d, v2.1d, v3.1d }, [x27], x28 +# CHECK-NEXT: [0,7] . . DeER . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . DeeER st1 { v1.2d, v2.2d, v3.2d }, [x27], x28 +# CHECK-NEXT: [0,9] . . DeER add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -3918,42 +3920,43 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.8b, v2.8b, v3.8b }, [x27], #24 -# CHECK-NEXT: 1. 1 2.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 1.0 0.0 0.0 st1 { v1.8h, v2.8h, v3.8h }, [x27], #48 -# CHECK-NEXT: 3. 1 2.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 1.0 0.0 0.0 st1 { v1.16b, v2.16b, v3.16b }, [x27], #48 -# CHECK-NEXT: 5. 1 2.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 1.0 0.0 0.0 st1 { v1.1d, v2.1d, v3.1d }, [x27], x28 -# CHECK-NEXT: 7. 1 2.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 1.0 0.0 0.0 st1 { v1.2d, v2.2d, v3.2d }, [x27], x28 -# CHECK-NEXT: 9. 1 2.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 1.5 0.1 0.0 +# CHECK-NEXT: 1. 1 1.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 1.0 1.0 0.0 st1 { v1.8h, v2.8h, v3.8h }, [x27], #48 +# CHECK-NEXT: 3. 1 1.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 1.0 0.0 st1 { v1.16b, v2.16b, v3.16b }, [x27], #48 +# CHECK-NEXT: 5. 1 1.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 1.0 0.0 st1 { v1.1d, v2.1d, v3.1d }, [x27], x28 +# CHECK-NEXT: 7. 1 1.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 1.0 0.0 st1 { v1.2d, v2.2d, v3.2d }, [x27], x28 +# CHECK-NEXT: 9. 1 1.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.0 0.5 0.0 # CHECK: [60] Code Region - G61 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 504 +# CHECK-NEXT: Total Cycles: 1003 # CHECK-NEXT: Total uOps: 3000 -# CHECK: Dispatch Width: 10 -# CHECK-NEXT: uOps Per Cycle: 5.95 -# CHECK-NEXT: IPC: 1.98 -# CHECK-NEXT: Block RThroughput: 5.0 +# CHECK: Dispatch Width: 5 +# CHECK-NEXT: uOps Per Cycle: 2.99 +# CHECK-NEXT: IPC: 1.00 +# CHECK-NEXT: Block RThroughput: 6.0 # CHECK: Timeline view: -# CHECK-NEXT: Index 012345678 +# CHECK-NEXT: 012 +# CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeER. . st1 { v1.2s, v2.2s, v3.2s }, [x27], x28 -# CHECK-NEXT: [0,1] D=eER. . add x0, x27, #1 -# CHECK-NEXT: [0,2] .DeeER . st1 { v1.4h, v2.4h, v3.4h }, [x27], x28 -# CHECK-NEXT: [0,3] .D=eER . add x0, x27, #1 -# CHECK-NEXT: [0,4] . DeeER . st1 { v1.4s, v2.4s, v3.4s }, [x27], x28 -# CHECK-NEXT: [0,5] . D=eER . add x0, x27, #1 -# CHECK-NEXT: [0,6] . DeeER. st1 { v1.8b, v2.8b, v3.8b }, [x27], x28 -# CHECK-NEXT: [0,7] . D=eER. add x0, x27, #1 -# CHECK-NEXT: [0,8] . DeeER st1 { v1.8h, v2.8h, v3.8h }, [x27], x28 -# CHECK-NEXT: [0,9] . D=eER add x0, x27, #1 +# CHECK: [0,0] DeeER. . . st1 { v1.2s, v2.2s, v3.2s }, [x27], x28 +# CHECK-NEXT: [0,1] .DeER. . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeER . . st1 { v1.4h, v2.4h, v3.4h }, [x27], x28 +# CHECK-NEXT: [0,3] . DeER . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeER . . st1 { v1.4s, v2.4s, v3.4s }, [x27], x28 +# CHECK-NEXT: [0,5] . DeER . . add x0, x27, #1 +# CHECK-NEXT: [0,6] . .DeeER . st1 { v1.8b, v2.8b, v3.8b }, [x27], x28 +# CHECK-NEXT: [0,7] . . DeER . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . DeeER st1 { v1.8h, v2.8h, v3.8h }, [x27], x28 +# CHECK-NEXT: [0,9] . . DeER add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -3963,42 +3966,43 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.2s, v2.2s, v3.2s }, [x27], x28 -# CHECK-NEXT: 1. 1 2.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 1.0 0.0 0.0 st1 { v1.4h, v2.4h, v3.4h }, [x27], x28 -# CHECK-NEXT: 3. 1 2.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 1.0 0.0 0.0 st1 { v1.4s, v2.4s, v3.4s }, [x27], x28 -# CHECK-NEXT: 5. 1 2.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 1.0 0.0 0.0 st1 { v1.8b, v2.8b, v3.8b }, [x27], x28 -# CHECK-NEXT: 7. 1 2.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 1.0 0.0 0.0 st1 { v1.8h, v2.8h, v3.8h }, [x27], x28 -# CHECK-NEXT: 9. 1 2.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 1.5 0.1 0.0 +# CHECK-NEXT: 1. 1 1.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 1.0 1.0 0.0 st1 { v1.4h, v2.4h, v3.4h }, [x27], x28 +# CHECK-NEXT: 3. 1 1.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 1.0 0.0 st1 { v1.4s, v2.4s, v3.4s }, [x27], x28 +# CHECK-NEXT: 5. 1 1.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 1.0 0.0 st1 { v1.8b, v2.8b, v3.8b }, [x27], x28 +# CHECK-NEXT: 7. 1 1.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 1.0 0.0 st1 { v1.8h, v2.8h, v3.8h }, [x27], x28 +# CHECK-NEXT: 9. 1 1.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.0 0.5 0.0 # CHECK: [61] Code Region - G62 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 504 +# CHECK-NEXT: Total Cycles: 1003 # CHECK-NEXT: Total uOps: 3000 -# CHECK: Dispatch Width: 10 -# CHECK-NEXT: uOps Per Cycle: 5.95 -# CHECK-NEXT: IPC: 1.98 -# CHECK-NEXT: Block RThroughput: 5.0 +# CHECK: Dispatch Width: 5 +# CHECK-NEXT: uOps Per Cycle: 2.99 +# CHECK-NEXT: IPC: 1.00 +# CHECK-NEXT: Block RThroughput: 6.0 # CHECK: Timeline view: -# CHECK-NEXT: Index 012345678 +# CHECK-NEXT: 012 +# CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeER. . st1 { v1.16b, v2.16b, v3.16b }, [x27], x28 -# CHECK-NEXT: [0,1] D=eER. . add x0, x27, #1 -# CHECK-NEXT: [0,2] .DeeER . st1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32 -# CHECK-NEXT: [0,3] .D=eER . add x0, x27, #1 -# CHECK-NEXT: [0,4] . DeeER . st1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64 -# CHECK-NEXT: [0,5] . D=eER . add x0, x27, #1 -# CHECK-NEXT: [0,6] . DeeER. st1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32 -# CHECK-NEXT: [0,7] . D=eER. add x0, x27, #1 -# CHECK-NEXT: [0,8] . DeeER st1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32 -# CHECK-NEXT: [0,9] . D=eER add x0, x27, #1 +# CHECK: [0,0] DeeER. . . st1 { v1.16b, v2.16b, v3.16b }, [x27], x28 +# CHECK-NEXT: [0,1] .DeER. . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeER . . st1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32 +# CHECK-NEXT: [0,3] . DeER . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeER . . st1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64 +# CHECK-NEXT: [0,5] . DeER . . add x0, x27, #1 +# CHECK-NEXT: [0,6] . .DeeER . st1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32 +# CHECK-NEXT: [0,7] . . DeER . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . DeeER st1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32 +# CHECK-NEXT: [0,9] . . DeER add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -4008,42 +4012,43 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.16b, v2.16b, v3.16b }, [x27], x28 -# CHECK-NEXT: 1. 1 2.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 1.0 0.0 0.0 st1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32 -# CHECK-NEXT: 3. 1 2.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 1.0 0.0 0.0 st1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64 -# CHECK-NEXT: 5. 1 2.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 1.0 0.0 0.0 st1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32 -# CHECK-NEXT: 7. 1 2.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 1.0 0.0 0.0 st1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32 -# CHECK-NEXT: 9. 1 2.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 1.5 0.1 0.0 +# CHECK-NEXT: 1. 1 1.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 1.0 1.0 0.0 st1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32 +# CHECK-NEXT: 3. 1 1.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 1.0 0.0 st1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64 +# CHECK-NEXT: 5. 1 1.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 1.0 0.0 st1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32 +# CHECK-NEXT: 7. 1 1.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 1.0 0.0 st1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32 +# CHECK-NEXT: 9. 1 1.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.0 0.5 0.0 # CHECK: [62] Code Region - G63 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 504 +# CHECK-NEXT: Total Cycles: 1003 # CHECK-NEXT: Total uOps: 3000 -# CHECK: Dispatch Width: 10 -# CHECK-NEXT: uOps Per Cycle: 5.95 -# CHECK-NEXT: IPC: 1.98 -# CHECK-NEXT: Block RThroughput: 5.0 +# CHECK: Dispatch Width: 5 +# CHECK-NEXT: uOps Per Cycle: 2.99 +# CHECK-NEXT: IPC: 1.00 +# CHECK-NEXT: Block RThroughput: 6.0 # CHECK: Timeline view: -# CHECK-NEXT: Index 012345678 +# CHECK-NEXT: 012 +# CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeER. . st1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64 -# CHECK-NEXT: [0,1] D=eER. . add x0, x27, #1 -# CHECK-NEXT: [0,2] .DeeER . st1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32 -# CHECK-NEXT: [0,3] .D=eER . add x0, x27, #1 -# CHECK-NEXT: [0,4] . DeeER . st1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64 -# CHECK-NEXT: [0,5] . D=eER . add x0, x27, #1 -# CHECK-NEXT: [0,6] . DeeER. st1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64 -# CHECK-NEXT: [0,7] . D=eER. add x0, x27, #1 -# CHECK-NEXT: [0,8] . DeeER st1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28 -# CHECK-NEXT: [0,9] . D=eER add x0, x27, #1 +# CHECK: [0,0] DeeER. . . st1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64 +# CHECK-NEXT: [0,1] .DeER. . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeER . . st1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32 +# CHECK-NEXT: [0,3] . DeER . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeER . . st1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64 +# CHECK-NEXT: [0,5] . DeER . . add x0, x27, #1 +# CHECK-NEXT: [0,6] . .DeeER . st1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64 +# CHECK-NEXT: [0,7] . . DeER . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . DeeER st1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28 +# CHECK-NEXT: [0,9] . . DeER add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -4053,42 +4058,43 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64 -# CHECK-NEXT: 1. 1 2.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 1.0 0.0 0.0 st1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32 -# CHECK-NEXT: 3. 1 2.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 1.0 0.0 0.0 st1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64 -# CHECK-NEXT: 5. 1 2.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 1.0 0.0 0.0 st1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64 -# CHECK-NEXT: 7. 1 2.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 1.0 0.0 0.0 st1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28 -# CHECK-NEXT: 9. 1 2.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 1.5 0.1 0.0 +# CHECK-NEXT: 1. 1 1.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 1.0 1.0 0.0 st1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32 +# CHECK-NEXT: 3. 1 1.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 1.0 0.0 st1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64 +# CHECK-NEXT: 5. 1 1.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 1.0 0.0 st1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64 +# CHECK-NEXT: 7. 1 1.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 1.0 0.0 st1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28 +# CHECK-NEXT: 9. 1 1.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.0 0.5 0.0 # CHECK: [63] Code Region - G64 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 504 +# CHECK-NEXT: Total Cycles: 1003 # CHECK-NEXT: Total uOps: 3000 -# CHECK: Dispatch Width: 10 -# CHECK-NEXT: uOps Per Cycle: 5.95 -# CHECK-NEXT: IPC: 1.98 -# CHECK-NEXT: Block RThroughput: 5.0 +# CHECK: Dispatch Width: 5 +# CHECK-NEXT: uOps Per Cycle: 2.99 +# CHECK-NEXT: IPC: 1.00 +# CHECK-NEXT: Block RThroughput: 6.0 # CHECK: Timeline view: -# CHECK-NEXT: Index 012345678 +# CHECK-NEXT: 012 +# CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeER. . st1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28 -# CHECK-NEXT: [0,1] D=eER. . add x0, x27, #1 -# CHECK-NEXT: [0,2] .DeeER . st1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28 -# CHECK-NEXT: [0,3] .D=eER . add x0, x27, #1 -# CHECK-NEXT: [0,4] . DeeER . st1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28 -# CHECK-NEXT: [0,5] . D=eER . add x0, x27, #1 -# CHECK-NEXT: [0,6] . DeeER. st1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28 -# CHECK-NEXT: [0,7] . D=eER. add x0, x27, #1 -# CHECK-NEXT: [0,8] . DeeER st1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28 -# CHECK-NEXT: [0,9] . D=eER add x0, x27, #1 +# CHECK: [0,0] DeeER. . . st1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28 +# CHECK-NEXT: [0,1] .DeER. . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeER . . st1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28 +# CHECK-NEXT: [0,3] . DeER . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeER . . st1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28 +# CHECK-NEXT: [0,5] . DeER . . add x0, x27, #1 +# CHECK-NEXT: [0,6] . .DeeER . st1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28 +# CHECK-NEXT: [0,7] . . DeER . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . DeeER st1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28 +# CHECK-NEXT: [0,9] . . DeER add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -4098,42 +4104,42 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28 -# CHECK-NEXT: 1. 1 2.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 1.0 0.0 0.0 st1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28 -# CHECK-NEXT: 3. 1 2.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 1.0 0.0 0.0 st1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28 -# CHECK-NEXT: 5. 1 2.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 1.0 0.0 0.0 st1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28 -# CHECK-NEXT: 7. 1 2.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 1.0 0.0 0.0 st1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28 -# CHECK-NEXT: 9. 1 2.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 1.5 0.1 0.0 +# CHECK-NEXT: 1. 1 1.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 1.0 1.0 0.0 st1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28 +# CHECK-NEXT: 3. 1 1.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 1.0 0.0 st1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28 +# CHECK-NEXT: 5. 1 1.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 1.0 0.0 st1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28 +# CHECK-NEXT: 7. 1 1.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 1.0 0.0 st1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28 +# CHECK-NEXT: 9. 1 1.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.0 0.5 0.0 # CHECK: [64] Code Region - G65 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 504 +# CHECK-NEXT: Total Cycles: 604 # CHECK-NEXT: Total uOps: 2400 -# CHECK: Dispatch Width: 10 -# CHECK-NEXT: uOps Per Cycle: 4.76 -# CHECK-NEXT: IPC: 1.98 -# CHECK-NEXT: Block RThroughput: 3.5 +# CHECK: Dispatch Width: 5 +# CHECK-NEXT: uOps Per Cycle: 3.97 +# CHECK-NEXT: IPC: 1.66 +# CHECK-NEXT: Block RThroughput: 4.8 # CHECK: Timeline view: -# CHECK-NEXT: Index 012345678 +# CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeER. . st1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28 -# CHECK-NEXT: [0,1] D=eER. . add x0, x27, #1 -# CHECK-NEXT: [0,2] .DeeER . st1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28 -# CHECK-NEXT: [0,3] .D=eER . add x0, x27, #1 -# CHECK-NEXT: [0,4] .D=eeER . st1 { v1.b }[0], [x27], #1 -# CHECK-NEXT: [0,5] .D==eER . add x0, x27, #1 -# CHECK-NEXT: [0,6] . D=eeER. st1 { v1.b }[8], [x27], #1 -# CHECK-NEXT: [0,7] . D==eER. add x0, x27, #1 -# CHECK-NEXT: [0,8] . D==eeER st1 { v1.b }[0], [x27], x28 -# CHECK-NEXT: [0,9] . D===eER add x0, x27, #1 +# CHECK: [0,0] DeeER. . st1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28 +# CHECK-NEXT: [0,1] .DeER. . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeER . st1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28 +# CHECK-NEXT: [0,3] . DeER . add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeER . st1 { v1.b }[0], [x27], #1 +# CHECK-NEXT: [0,5] . D=eER . add x0, x27, #1 +# CHECK-NEXT: [0,6] . DeeER. st1 { v1.b }[8], [x27], #1 +# CHECK-NEXT: [0,7] . D=eER. add x0, x27, #1 +# CHECK-NEXT: [0,8] . DeeER st1 { v1.b }[0], [x27], x28 +# CHECK-NEXT: [0,9] . D=eER add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -4143,16 +4149,16 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28 -# CHECK-NEXT: 1. 1 2.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 1.0 0.0 0.0 st1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28 -# CHECK-NEXT: 3. 1 2.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 2.0 0.0 0.0 st1 { v1.b }[0], [x27], #1 -# CHECK-NEXT: 5. 1 3.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 2.0 0.0 0.0 st1 { v1.b }[8], [x27], #1 -# CHECK-NEXT: 7. 1 3.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 3.0 0.0 0.0 st1 { v1.b }[0], [x27], x28 -# CHECK-NEXT: 9. 1 4.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 2.3 0.1 0.0 +# CHECK-NEXT: 1. 1 1.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 1.0 1.0 0.0 st1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28 +# CHECK-NEXT: 3. 1 1.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 0.0 0.0 st1 { v1.b }[0], [x27], #1 +# CHECK-NEXT: 5. 1 2.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 0.0 0.0 st1 { v1.b }[8], [x27], #1 +# CHECK-NEXT: 7. 1 2.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 0.0 0.0 st1 { v1.b }[0], [x27], x28 +# CHECK-NEXT: 9. 1 2.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.3 0.2 0.0 # CHECK: [65] Code Region - G66 @@ -4161,24 +4167,24 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 504 # CHECK-NEXT: Total uOps: 2000 -# CHECK: Dispatch Width: 10 +# CHECK: Dispatch Width: 5 # CHECK-NEXT: uOps Per Cycle: 3.97 # CHECK-NEXT: IPC: 1.98 -# CHECK-NEXT: Block RThroughput: 2.5 +# CHECK-NEXT: Block RThroughput: 4.0 # CHECK: Timeline view: # CHECK-NEXT: Index 012345678 # CHECK: [0,0] DeeER. . st1 { v1.b }[8], [x27], x28 # CHECK-NEXT: [0,1] D=eER. . add x0, x27, #1 -# CHECK-NEXT: [0,2] D=eeER . st1 { v1.h }[0], [x27], #2 -# CHECK-NEXT: [0,3] D==eER . add x0, x27, #1 -# CHECK-NEXT: [0,4] .D=eeER . st1 { v1.h }[4], [x27], #2 -# CHECK-NEXT: [0,5] .D==eER . add x0, x27, #1 -# CHECK-NEXT: [0,6] .D==eeER. st1 { v1.h }[0], [x27], x28 -# CHECK-NEXT: [0,7] .D===eER. add x0, x27, #1 -# CHECK-NEXT: [0,8] . D==eeER st1 { v1.h }[4], [x27], x28 -# CHECK-NEXT: [0,9] . D===eER add x0, x27, #1 +# CHECK-NEXT: [0,2] .DeeER . st1 { v1.h }[0], [x27], #2 +# CHECK-NEXT: [0,3] .D=eER . add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeER . st1 { v1.h }[4], [x27], #2 +# CHECK-NEXT: [0,5] . D=eER . add x0, x27, #1 +# CHECK-NEXT: [0,6] . DeeER. st1 { v1.h }[0], [x27], x28 +# CHECK-NEXT: [0,7] . D=eER. add x0, x27, #1 +# CHECK-NEXT: [0,8] . DeeER st1 { v1.h }[4], [x27], x28 +# CHECK-NEXT: [0,9] . D=eER add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -4189,15 +4195,15 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.b }[8], [x27], x28 # CHECK-NEXT: 1. 1 2.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st1 { v1.h }[0], [x27], #2 -# CHECK-NEXT: 3. 1 3.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 2.0 0.0 0.0 st1 { v1.h }[4], [x27], #2 -# CHECK-NEXT: 5. 1 3.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 3.0 0.0 0.0 st1 { v1.h }[0], [x27], x28 -# CHECK-NEXT: 7. 1 4.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 3.0 0.0 0.0 st1 { v1.h }[4], [x27], x28 -# CHECK-NEXT: 9. 1 4.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 2.7 0.1 0.0 +# CHECK-NEXT: 2. 1 1.0 0.0 0.0 st1 { v1.h }[0], [x27], #2 +# CHECK-NEXT: 3. 1 2.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 0.0 0.0 st1 { v1.h }[4], [x27], #2 +# CHECK-NEXT: 5. 1 2.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 0.0 0.0 st1 { v1.h }[0], [x27], x28 +# CHECK-NEXT: 7. 1 2.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 0.0 0.0 st1 { v1.h }[4], [x27], x28 +# CHECK-NEXT: 9. 1 2.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.5 0.1 0.0 # CHECK: [66] Code Region - G67 @@ -4206,24 +4212,24 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 504 # CHECK-NEXT: Total uOps: 2000 -# CHECK: Dispatch Width: 10 +# CHECK: Dispatch Width: 5 # CHECK-NEXT: uOps Per Cycle: 3.97 # CHECK-NEXT: IPC: 1.98 -# CHECK-NEXT: Block RThroughput: 2.5 +# CHECK-NEXT: Block RThroughput: 4.0 # CHECK: Timeline view: # CHECK-NEXT: Index 012345678 # CHECK: [0,0] DeeER. . st1 { v1.s }[0], [x27], #4 # CHECK-NEXT: [0,1] D=eER. . add x0, x27, #1 -# CHECK-NEXT: [0,2] D=eeER . st1 { v1.s }[0], [x27], x28 -# CHECK-NEXT: [0,3] D==eER . add x0, x27, #1 -# CHECK-NEXT: [0,4] .D=eeER . st1 { v1.d }[0], [x27], #8 -# CHECK-NEXT: [0,5] .D==eER . add x0, x27, #1 -# CHECK-NEXT: [0,6] .D==eeER. st1 { v1.d }[0], [x27], x28 -# CHECK-NEXT: [0,7] .D===eER. add x0, x27, #1 -# CHECK-NEXT: [0,8] . D==eeER st2 { v1.2d, v2.2d }, [x27], #32 -# CHECK-NEXT: [0,9] . D===eER add x0, x27, #1 +# CHECK-NEXT: [0,2] .DeeER . st1 { v1.s }[0], [x27], x28 +# CHECK-NEXT: [0,3] .D=eER . add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeER . st1 { v1.d }[0], [x27], #8 +# CHECK-NEXT: [0,5] . D=eER . add x0, x27, #1 +# CHECK-NEXT: [0,6] . DeeER. st1 { v1.d }[0], [x27], x28 +# CHECK-NEXT: [0,7] . D=eER. add x0, x27, #1 +# CHECK-NEXT: [0,8] . DeeER st2 { v1.2d, v2.2d }, [x27], #32 +# CHECK-NEXT: [0,9] . D=eER add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -4234,15 +4240,15 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.s }[0], [x27], #4 # CHECK-NEXT: 1. 1 2.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st1 { v1.s }[0], [x27], x28 -# CHECK-NEXT: 3. 1 3.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 2.0 0.0 0.0 st1 { v1.d }[0], [x27], #8 -# CHECK-NEXT: 5. 1 3.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 3.0 0.0 0.0 st1 { v1.d }[0], [x27], x28 -# CHECK-NEXT: 7. 1 4.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 3.0 0.0 0.0 st2 { v1.2d, v2.2d }, [x27], #32 -# CHECK-NEXT: 9. 1 4.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 2.7 0.1 0.0 +# CHECK-NEXT: 2. 1 1.0 0.0 0.0 st1 { v1.s }[0], [x27], x28 +# CHECK-NEXT: 3. 1 2.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 0.0 0.0 st1 { v1.d }[0], [x27], #8 +# CHECK-NEXT: 5. 1 2.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 0.0 0.0 st1 { v1.d }[0], [x27], x28 +# CHECK-NEXT: 7. 1 2.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 0.0 0.0 st2 { v1.2d, v2.2d }, [x27], #32 +# CHECK-NEXT: 9. 1 2.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.5 0.1 0.0 # CHECK: [67] Code Region - G68 @@ -4251,24 +4257,24 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 504 # CHECK-NEXT: Total uOps: 2000 -# CHECK: Dispatch Width: 10 +# CHECK: Dispatch Width: 5 # CHECK-NEXT: uOps Per Cycle: 3.97 # CHECK-NEXT: IPC: 1.98 -# CHECK-NEXT: Block RThroughput: 2.5 +# CHECK-NEXT: Block RThroughput: 4.0 # CHECK: Timeline view: # CHECK-NEXT: Index 012345678 # CHECK: [0,0] DeeER. . st2 { v1.2s, v2.2s }, [x27], #16 # CHECK-NEXT: [0,1] D=eER. . add x0, x27, #1 -# CHECK-NEXT: [0,2] D=eeER . st2 { v1.4h, v2.4h }, [x27], #16 -# CHECK-NEXT: [0,3] D==eER . add x0, x27, #1 -# CHECK-NEXT: [0,4] .D=eeER . st2 { v1.4s, v2.4s }, [x27], #32 -# CHECK-NEXT: [0,5] .D==eER . add x0, x27, #1 -# CHECK-NEXT: [0,6] .D==eeER. st2 { v1.8b, v2.8b }, [x27], #16 -# CHECK-NEXT: [0,7] .D===eER. add x0, x27, #1 -# CHECK-NEXT: [0,8] . D==eeER st2 { v1.8h, v2.8h }, [x27], #32 -# CHECK-NEXT: [0,9] . D===eER add x0, x27, #1 +# CHECK-NEXT: [0,2] .DeeER . st2 { v1.4h, v2.4h }, [x27], #16 +# CHECK-NEXT: [0,3] .D=eER . add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeER . st2 { v1.4s, v2.4s }, [x27], #32 +# CHECK-NEXT: [0,5] . D=eER . add x0, x27, #1 +# CHECK-NEXT: [0,6] . DeeER. st2 { v1.8b, v2.8b }, [x27], #16 +# CHECK-NEXT: [0,7] . D=eER. add x0, x27, #1 +# CHECK-NEXT: [0,8] . DeeER st2 { v1.8h, v2.8h }, [x27], #32 +# CHECK-NEXT: [0,9] . D=eER add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -4279,15 +4285,15 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st2 { v1.2s, v2.2s }, [x27], #16 # CHECK-NEXT: 1. 1 2.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st2 { v1.4h, v2.4h }, [x27], #16 -# CHECK-NEXT: 3. 1 3.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 2.0 0.0 0.0 st2 { v1.4s, v2.4s }, [x27], #32 -# CHECK-NEXT: 5. 1 3.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 3.0 0.0 0.0 st2 { v1.8b, v2.8b }, [x27], #16 -# CHECK-NEXT: 7. 1 4.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 3.0 0.0 0.0 st2 { v1.8h, v2.8h }, [x27], #32 -# CHECK-NEXT: 9. 1 4.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 2.7 0.1 0.0 +# CHECK-NEXT: 2. 1 1.0 0.0 0.0 st2 { v1.4h, v2.4h }, [x27], #16 +# CHECK-NEXT: 3. 1 2.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 0.0 0.0 st2 { v1.4s, v2.4s }, [x27], #32 +# CHECK-NEXT: 5. 1 2.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 0.0 0.0 st2 { v1.8b, v2.8b }, [x27], #16 +# CHECK-NEXT: 7. 1 2.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 0.0 0.0 st2 { v1.8h, v2.8h }, [x27], #32 +# CHECK-NEXT: 9. 1 2.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.5 0.1 0.0 # CHECK: [68] Code Region - G69 @@ -4296,24 +4302,24 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 504 # CHECK-NEXT: Total uOps: 2000 -# CHECK: Dispatch Width: 10 +# CHECK: Dispatch Width: 5 # CHECK-NEXT: uOps Per Cycle: 3.97 # CHECK-NEXT: IPC: 1.98 -# CHECK-NEXT: Block RThroughput: 2.5 +# CHECK-NEXT: Block RThroughput: 4.0 # CHECK: Timeline view: # CHECK-NEXT: Index 012345678 # CHECK: [0,0] DeeER. . st2 { v1.16b, v2.16b }, [x27], #32 # CHECK-NEXT: [0,1] D=eER. . add x0, x27, #1 -# CHECK-NEXT: [0,2] D=eeER . st2 { v1.2d, v2.2d }, [x27], x28 -# CHECK-NEXT: [0,3] D==eER . add x0, x27, #1 -# CHECK-NEXT: [0,4] .D=eeER . st2 { v1.2s, v2.2s }, [x27], x28 -# CHECK-NEXT: [0,5] .D==eER . add x0, x27, #1 -# CHECK-NEXT: [0,6] .D==eeER. st2 { v1.4h, v2.4h }, [x27], x28 -# CHECK-NEXT: [0,7] .D===eER. add x0, x27, #1 -# CHECK-NEXT: [0,8] . D==eeER st2 { v1.4s, v2.4s }, [x27], x28 -# CHECK-NEXT: [0,9] . D===eER add x0, x27, #1 +# CHECK-NEXT: [0,2] .DeeER . st2 { v1.2d, v2.2d }, [x27], x28 +# CHECK-NEXT: [0,3] .D=eER . add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeER . st2 { v1.2s, v2.2s }, [x27], x28 +# CHECK-NEXT: [0,5] . D=eER . add x0, x27, #1 +# CHECK-NEXT: [0,6] . DeeER. st2 { v1.4h, v2.4h }, [x27], x28 +# CHECK-NEXT: [0,7] . D=eER. add x0, x27, #1 +# CHECK-NEXT: [0,8] . DeeER st2 { v1.4s, v2.4s }, [x27], x28 +# CHECK-NEXT: [0,9] . D=eER add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -4324,15 +4330,15 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st2 { v1.16b, v2.16b }, [x27], #32 # CHECK-NEXT: 1. 1 2.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st2 { v1.2d, v2.2d }, [x27], x28 -# CHECK-NEXT: 3. 1 3.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 2.0 0.0 0.0 st2 { v1.2s, v2.2s }, [x27], x28 -# CHECK-NEXT: 5. 1 3.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 3.0 0.0 0.0 st2 { v1.4h, v2.4h }, [x27], x28 -# CHECK-NEXT: 7. 1 4.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 3.0 0.0 0.0 st2 { v1.4s, v2.4s }, [x27], x28 -# CHECK-NEXT: 9. 1 4.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 2.7 0.1 0.0 +# CHECK-NEXT: 2. 1 1.0 0.0 0.0 st2 { v1.2d, v2.2d }, [x27], x28 +# CHECK-NEXT: 3. 1 2.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 0.0 0.0 st2 { v1.2s, v2.2s }, [x27], x28 +# CHECK-NEXT: 5. 1 2.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 0.0 0.0 st2 { v1.4h, v2.4h }, [x27], x28 +# CHECK-NEXT: 7. 1 2.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 0.0 0.0 st2 { v1.4s, v2.4s }, [x27], x28 +# CHECK-NEXT: 9. 1 2.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.5 0.1 0.0 # CHECK: [69] Code Region - G70 @@ -4341,24 +4347,24 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 504 # CHECK-NEXT: Total uOps: 2000 -# CHECK: Dispatch Width: 10 +# CHECK: Dispatch Width: 5 # CHECK-NEXT: uOps Per Cycle: 3.97 # CHECK-NEXT: IPC: 1.98 -# CHECK-NEXT: Block RThroughput: 2.5 +# CHECK-NEXT: Block RThroughput: 4.0 # CHECK: Timeline view: # CHECK-NEXT: Index 012345678 # CHECK: [0,0] DeeER. . st2 { v1.8b, v2.8b }, [x27], x28 # CHECK-NEXT: [0,1] D=eER. . add x0, x27, #1 -# CHECK-NEXT: [0,2] D=eeER . st2 { v1.8h, v2.8h }, [x27], x28 -# CHECK-NEXT: [0,3] D==eER . add x0, x27, #1 -# CHECK-NEXT: [0,4] .D=eeER . st2 { v1.16b, v2.16b }, [x27], x28 -# CHECK-NEXT: [0,5] .D==eER . add x0, x27, #1 -# CHECK-NEXT: [0,6] .D==eeER. st2 { v1.b, v2.b }[0], [x27], #2 -# CHECK-NEXT: [0,7] .D===eER. add x0, x27, #1 -# CHECK-NEXT: [0,8] . D==eeER st2 { v1.b, v2.b }[8], [x27], #2 -# CHECK-NEXT: [0,9] . D===eER add x0, x27, #1 +# CHECK-NEXT: [0,2] .DeeER . st2 { v1.8h, v2.8h }, [x27], x28 +# CHECK-NEXT: [0,3] .D=eER . add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeER . st2 { v1.16b, v2.16b }, [x27], x28 +# CHECK-NEXT: [0,5] . D=eER . add x0, x27, #1 +# CHECK-NEXT: [0,6] . DeeER. st2 { v1.b, v2.b }[0], [x27], #2 +# CHECK-NEXT: [0,7] . D=eER. add x0, x27, #1 +# CHECK-NEXT: [0,8] . DeeER st2 { v1.b, v2.b }[8], [x27], #2 +# CHECK-NEXT: [0,9] . D=eER add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -4369,15 +4375,15 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st2 { v1.8b, v2.8b }, [x27], x28 # CHECK-NEXT: 1. 1 2.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st2 { v1.8h, v2.8h }, [x27], x28 -# CHECK-NEXT: 3. 1 3.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 2.0 0.0 0.0 st2 { v1.16b, v2.16b }, [x27], x28 -# CHECK-NEXT: 5. 1 3.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 3.0 0.0 0.0 st2 { v1.b, v2.b }[0], [x27], #2 -# CHECK-NEXT: 7. 1 4.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 3.0 0.0 0.0 st2 { v1.b, v2.b }[8], [x27], #2 -# CHECK-NEXT: 9. 1 4.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 2.7 0.1 0.0 +# CHECK-NEXT: 2. 1 1.0 0.0 0.0 st2 { v1.8h, v2.8h }, [x27], x28 +# CHECK-NEXT: 3. 1 2.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 0.0 0.0 st2 { v1.16b, v2.16b }, [x27], x28 +# CHECK-NEXT: 5. 1 2.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 0.0 0.0 st2 { v1.b, v2.b }[0], [x27], #2 +# CHECK-NEXT: 7. 1 2.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 0.0 0.0 st2 { v1.b, v2.b }[8], [x27], #2 +# CHECK-NEXT: 9. 1 2.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.5 0.1 0.0 # CHECK: [70] Code Region - G71 @@ -4386,24 +4392,24 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 504 # CHECK-NEXT: Total uOps: 2000 -# CHECK: Dispatch Width: 10 +# CHECK: Dispatch Width: 5 # CHECK-NEXT: uOps Per Cycle: 3.97 # CHECK-NEXT: IPC: 1.98 -# CHECK-NEXT: Block RThroughput: 2.5 +# CHECK-NEXT: Block RThroughput: 4.0 # CHECK: Timeline view: # CHECK-NEXT: Index 012345678 # CHECK: [0,0] DeeER. . st2 { v1.b, v2.b }[0], [x27], x28 # CHECK-NEXT: [0,1] D=eER. . add x0, x27, #1 -# CHECK-NEXT: [0,2] D=eeER . st2 { v1.b, v2.b }[8], [x27], x28 -# CHECK-NEXT: [0,3] D==eER . add x0, x27, #1 -# CHECK-NEXT: [0,4] .D=eeER . st2 { v1.h, v2.h }[0], [x27], #4 -# CHECK-NEXT: [0,5] .D==eER . add x0, x27, #1 -# CHECK-NEXT: [0,6] .D==eeER. st2 { v1.h, v2.h }[4], [x27], #4 -# CHECK-NEXT: [0,7] .D===eER. add x0, x27, #1 -# CHECK-NEXT: [0,8] . D==eeER st2 { v1.h, v2.h }[0], [x27], x28 -# CHECK-NEXT: [0,9] . D===eER add x0, x27, #1 +# CHECK-NEXT: [0,2] .DeeER . st2 { v1.b, v2.b }[8], [x27], x28 +# CHECK-NEXT: [0,3] .D=eER . add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeER . st2 { v1.h, v2.h }[0], [x27], #4 +# CHECK-NEXT: [0,5] . D=eER . add x0, x27, #1 +# CHECK-NEXT: [0,6] . DeeER. st2 { v1.h, v2.h }[4], [x27], #4 +# CHECK-NEXT: [0,7] . D=eER. add x0, x27, #1 +# CHECK-NEXT: [0,8] . DeeER st2 { v1.h, v2.h }[0], [x27], x28 +# CHECK-NEXT: [0,9] . D=eER add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -4414,15 +4420,15 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st2 { v1.b, v2.b }[0], [x27], x28 # CHECK-NEXT: 1. 1 2.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st2 { v1.b, v2.b }[8], [x27], x28 -# CHECK-NEXT: 3. 1 3.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 2.0 0.0 0.0 st2 { v1.h, v2.h }[0], [x27], #4 -# CHECK-NEXT: 5. 1 3.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 3.0 0.0 0.0 st2 { v1.h, v2.h }[4], [x27], #4 -# CHECK-NEXT: 7. 1 4.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 3.0 0.0 0.0 st2 { v1.h, v2.h }[0], [x27], x28 -# CHECK-NEXT: 9. 1 4.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 2.7 0.1 0.0 +# CHECK-NEXT: 2. 1 1.0 0.0 0.0 st2 { v1.b, v2.b }[8], [x27], x28 +# CHECK-NEXT: 3. 1 2.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 0.0 0.0 st2 { v1.h, v2.h }[0], [x27], #4 +# CHECK-NEXT: 5. 1 2.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 0.0 0.0 st2 { v1.h, v2.h }[4], [x27], #4 +# CHECK-NEXT: 7. 1 2.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 0.0 0.0 st2 { v1.h, v2.h }[0], [x27], x28 +# CHECK-NEXT: 9. 1 2.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.5 0.1 0.0 # CHECK: [71] Code Region - G72 @@ -4431,24 +4437,24 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 504 # CHECK-NEXT: Total uOps: 2000 -# CHECK: Dispatch Width: 10 +# CHECK: Dispatch Width: 5 # CHECK-NEXT: uOps Per Cycle: 3.97 # CHECK-NEXT: IPC: 1.98 -# CHECK-NEXT: Block RThroughput: 2.5 +# CHECK-NEXT: Block RThroughput: 4.0 # CHECK: Timeline view: # CHECK-NEXT: Index 012345678 # CHECK: [0,0] DeeER. . st2 { v1.h, v2.h }[4], [x27], x28 # CHECK-NEXT: [0,1] D=eER. . add x0, x27, #1 -# CHECK-NEXT: [0,2] D=eeER . st2 { v1.s, v2.s }[0], [x27], #8 -# CHECK-NEXT: [0,3] D==eER . add x0, x27, #1 -# CHECK-NEXT: [0,4] .D=eeER . st2 { v1.s, v2.s }[0], [x27], x28 -# CHECK-NEXT: [0,5] .D==eER . add x0, x27, #1 -# CHECK-NEXT: [0,6] .D==eeER. st2 { v1.d, v2.d }[0], [x27], #16 -# CHECK-NEXT: [0,7] .D===eER. add x0, x27, #1 -# CHECK-NEXT: [0,8] . D==eeER st2 { v1.d, v2.d }[0], [x27], x28 -# CHECK-NEXT: [0,9] . D===eER add x0, x27, #1 +# CHECK-NEXT: [0,2] .DeeER . st2 { v1.s, v2.s }[0], [x27], #8 +# CHECK-NEXT: [0,3] .D=eER . add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeER . st2 { v1.s, v2.s }[0], [x27], x28 +# CHECK-NEXT: [0,5] . D=eER . add x0, x27, #1 +# CHECK-NEXT: [0,6] . DeeER. st2 { v1.d, v2.d }[0], [x27], #16 +# CHECK-NEXT: [0,7] . D=eER. add x0, x27, #1 +# CHECK-NEXT: [0,8] . DeeER st2 { v1.d, v2.d }[0], [x27], x28 +# CHECK-NEXT: [0,9] . D=eER add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -4459,42 +4465,42 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st2 { v1.h, v2.h }[4], [x27], x28 # CHECK-NEXT: 1. 1 2.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st2 { v1.s, v2.s }[0], [x27], #8 -# CHECK-NEXT: 3. 1 3.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 2.0 0.0 0.0 st2 { v1.s, v2.s }[0], [x27], x28 -# CHECK-NEXT: 5. 1 3.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 3.0 0.0 0.0 st2 { v1.d, v2.d }[0], [x27], #16 -# CHECK-NEXT: 7. 1 4.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 3.0 0.0 0.0 st2 { v1.d, v2.d }[0], [x27], x28 -# CHECK-NEXT: 9. 1 4.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 2.7 0.1 0.0 +# CHECK-NEXT: 2. 1 1.0 0.0 0.0 st2 { v1.s, v2.s }[0], [x27], #8 +# CHECK-NEXT: 3. 1 2.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 0.0 0.0 st2 { v1.s, v2.s }[0], [x27], x28 +# CHECK-NEXT: 5. 1 2.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 0.0 0.0 st2 { v1.d, v2.d }[0], [x27], #16 +# CHECK-NEXT: 7. 1 2.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 0.0 0.0 st2 { v1.d, v2.d }[0], [x27], x28 +# CHECK-NEXT: 9. 1 2.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.5 0.1 0.0 # CHECK: [72] Code Region - G73 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 606 +# CHECK-NEXT: Total Cycles: 706 # CHECK-NEXT: Total uOps: 2800 -# CHECK: Dispatch Width: 10 -# CHECK-NEXT: uOps Per Cycle: 4.62 -# CHECK-NEXT: IPC: 1.65 -# CHECK-NEXT: Block RThroughput: 4.5 +# CHECK: Dispatch Width: 5 +# CHECK-NEXT: uOps Per Cycle: 3.97 +# CHECK-NEXT: IPC: 1.42 +# CHECK-NEXT: Block RThroughput: 5.6 # CHECK: Timeline view: -# CHECK-NEXT: 01 +# CHECK-NEXT: 012 # CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeER . .. st2g x26, [x27], #4064 -# CHECK-NEXT: [0,1] D=eER. .. add x0, x27, #1 -# CHECK-NEXT: [0,2] D=eER. .. st2g x26, [x27, #4064]! -# CHECK-NEXT: [0,3] D==eER .. add x0, x27, #1 -# CHECK-NEXT: [0,4] .D=eeER .. st3 { v1.2d, v2.2d, v3.2d }, [x27], #48 -# CHECK-NEXT: [0,5] .D==eER .. add x0, x27, #1 -# CHECK-NEXT: [0,6] . D=eeeeER.. st3 { v1.2s, v2.2s, v3.2s }, [x27], #24 -# CHECK-NEXT: [0,7] . D==eE--R.. add x0, x27, #1 -# CHECK-NEXT: [0,8] . D==eeeeER st3 { v1.4h, v2.4h, v3.4h }, [x27], #24 -# CHECK-NEXT: [0,9] . D===eE--R add x0, x27, #1 +# CHECK: [0,0] DeER . . . st2g x26, [x27], #4064 +# CHECK-NEXT: [0,1] D=eER. . . add x0, x27, #1 +# CHECK-NEXT: [0,2] .DeER. . . st2g x26, [x27, #4064]! +# CHECK-NEXT: [0,3] .D=eER . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeER . . st3 { v1.2d, v2.2d, v3.2d }, [x27], #48 +# CHECK-NEXT: [0,5] . DeER . . add x0, x27, #1 +# CHECK-NEXT: [0,6] . DeeeeER . st3 { v1.2s, v2.2s, v3.2s }, [x27], #24 +# CHECK-NEXT: [0,7] . DeE--R . add x0, x27, #1 +# CHECK-NEXT: [0,8] . .DeeeeER st3 { v1.4h, v2.4h, v3.4h }, [x27], #24 +# CHECK-NEXT: [0,9] . . DeE--R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -4505,42 +4511,42 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st2g x26, [x27], #4064 # CHECK-NEXT: 1. 1 2.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st2g x26, [x27, #4064]! -# CHECK-NEXT: 3. 1 3.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 2.0 0.0 0.0 st3 { v1.2d, v2.2d, v3.2d }, [x27], #48 -# CHECK-NEXT: 5. 1 3.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 2.0 0.0 0.0 st3 { v1.2s, v2.2s, v3.2s }, [x27], #24 -# CHECK-NEXT: 7. 1 3.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 3.0 1.0 0.0 st3 { v1.4h, v2.4h, v3.4h }, [x27], #24 -# CHECK-NEXT: 9. 1 4.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 1 2.5 0.2 0.4 +# CHECK-NEXT: 2. 1 1.0 0.0 0.0 st2g x26, [x27, #4064]! +# CHECK-NEXT: 3. 1 2.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 0.0 0.0 st3 { v1.2d, v2.2d, v3.2d }, [x27], #48 +# CHECK-NEXT: 5. 1 1.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 1.0 0.0 st3 { v1.2s, v2.2s, v3.2s }, [x27], #24 +# CHECK-NEXT: 7. 1 1.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 1.0 0.0 st3 { v1.4h, v2.4h, v3.4h }, [x27], #24 +# CHECK-NEXT: 9. 1 1.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.2 0.3 0.4 # CHECK: [73] Code Region - G74 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 704 +# CHECK-NEXT: Total Cycles: 1003 # CHECK-NEXT: Total uOps: 3800 -# CHECK: Dispatch Width: 10 -# CHECK-NEXT: uOps Per Cycle: 5.40 -# CHECK-NEXT: IPC: 1.42 -# CHECK-NEXT: Block RThroughput: 7.0 +# CHECK: Dispatch Width: 5 +# CHECK-NEXT: uOps Per Cycle: 3.79 +# CHECK-NEXT: IPC: 1.00 +# CHECK-NEXT: Block RThroughput: 7.6 # CHECK: Timeline view: -# CHECK-NEXT: 0 +# CHECK-NEXT: 012 # CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeeER . st3 { v1.4s, v2.4s, v3.4s }, [x27], #48 -# CHECK-NEXT: [0,1] D=eE--R . add x0, x27, #1 -# CHECK-NEXT: [0,2] .DeeeeER . st3 { v1.8b, v2.8b, v3.8b }, [x27], #24 -# CHECK-NEXT: [0,3] .D=eE--R . add x0, x27, #1 -# CHECK-NEXT: [0,4] . D=eeeeER. st3 { v1.8h, v2.8h, v3.8h }, [x27], #48 -# CHECK-NEXT: [0,5] . D==eE--R. add x0, x27, #1 -# CHECK-NEXT: [0,6] . D=eeeeER st3 { v1.16b, v2.16b, v3.16b }, [x27], #48 -# CHECK-NEXT: [0,7] . D==eE--R add x0, x27, #1 -# CHECK-NEXT: [0,8] . D==eeER st3 { v1.2d, v2.2d, v3.2d }, [x27], x28 -# CHECK-NEXT: [0,9] . D===eER add x0, x27, #1 +# CHECK: [0,0] DeeeeER . . st3 { v1.4s, v2.4s, v3.4s }, [x27], #48 +# CHECK-NEXT: [0,1] .DeE--R . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeeeER . . st3 { v1.8b, v2.8b, v3.8b }, [x27], #24 +# CHECK-NEXT: [0,3] . DeE--R . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeeeER . st3 { v1.8h, v2.8h, v3.8h }, [x27], #48 +# CHECK-NEXT: [0,5] . DeE--R . add x0, x27, #1 +# CHECK-NEXT: [0,6] . .DeeeeER st3 { v1.16b, v2.16b, v3.16b }, [x27], #48 +# CHECK-NEXT: [0,7] . . DeE--R add x0, x27, #1 +# CHECK-NEXT: [0,8] . . DeeER st3 { v1.2d, v2.2d, v3.2d }, [x27], x28 +# CHECK-NEXT: [0,9] . . DeER add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -4550,43 +4556,43 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st3 { v1.4s, v2.4s, v3.4s }, [x27], #48 -# CHECK-NEXT: 1. 1 2.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 1.0 0.0 0.0 st3 { v1.8b, v2.8b, v3.8b }, [x27], #24 -# CHECK-NEXT: 3. 1 2.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 2.0 1.0 0.0 st3 { v1.8h, v2.8h, v3.8h }, [x27], #48 -# CHECK-NEXT: 5. 1 3.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 2.0 0.0 0.0 st3 { v1.16b, v2.16b, v3.16b }, [x27], #48 -# CHECK-NEXT: 7. 1 3.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 3.0 1.0 0.0 st3 { v1.2d, v2.2d, v3.2d }, [x27], x28 -# CHECK-NEXT: 9. 1 4.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 2.3 0.3 0.8 +# CHECK-NEXT: 1. 1 1.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 1.0 1.0 0.0 st3 { v1.8b, v2.8b, v3.8b }, [x27], #24 +# CHECK-NEXT: 3. 1 1.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 1.0 0.0 st3 { v1.8h, v2.8h, v3.8h }, [x27], #48 +# CHECK-NEXT: 5. 1 1.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 1.0 0.0 st3 { v1.16b, v2.16b, v3.16b }, [x27], #48 +# CHECK-NEXT: 7. 1 1.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 1.0 0.0 st3 { v1.2d, v2.2d, v3.2d }, [x27], x28 +# CHECK-NEXT: 9. 1 1.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.0 0.5 0.8 # CHECK: [74] Code Region - G75 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 705 +# CHECK-NEXT: Total Cycles: 1005 # CHECK-NEXT: Total uOps: 3400 -# CHECK: Dispatch Width: 10 -# CHECK-NEXT: uOps Per Cycle: 4.82 -# CHECK-NEXT: IPC: 1.42 -# CHECK-NEXT: Block RThroughput: 6.0 +# CHECK: Dispatch Width: 5 +# CHECK-NEXT: uOps Per Cycle: 3.38 +# CHECK-NEXT: IPC: 1.00 +# CHECK-NEXT: Block RThroughput: 6.8 # CHECK: Timeline view: -# CHECK-NEXT: 01 +# CHECK-NEXT: 01234 # CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeeER .. st3 { v1.2s, v2.2s, v3.2s }, [x27], x28 -# CHECK-NEXT: [0,1] D=eE--R .. add x0, x27, #1 -# CHECK-NEXT: [0,2] .DeeeeER .. st3 { v1.4h, v2.4h, v3.4h }, [x27], x28 -# CHECK-NEXT: [0,3] .D=eE--R .. add x0, x27, #1 -# CHECK-NEXT: [0,4] . DeeeeER .. st3 { v1.4s, v2.4s, v3.4s }, [x27], x28 -# CHECK-NEXT: [0,5] . D=eE--R .. add x0, x27, #1 -# CHECK-NEXT: [0,6] . DeeeeER.. st3 { v1.8b, v2.8b, v3.8b }, [x27], x28 -# CHECK-NEXT: [0,7] . D=eE--R.. add x0, x27, #1 -# CHECK-NEXT: [0,8] . D=eeeeER st3 { v1.8h, v2.8h, v3.8h }, [x27], x28 -# CHECK-NEXT: [0,9] . D==eE--R add x0, x27, #1 +# CHECK: [0,0] DeeeeER . . st3 { v1.2s, v2.2s, v3.2s }, [x27], x28 +# CHECK-NEXT: [0,1] .DeE--R . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeeeER . . st3 { v1.4h, v2.4h, v3.4h }, [x27], x28 +# CHECK-NEXT: [0,3] . DeE--R . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeeeER . st3 { v1.4s, v2.4s, v3.4s }, [x27], x28 +# CHECK-NEXT: [0,5] . DeE--R . add x0, x27, #1 +# CHECK-NEXT: [0,6] . .DeeeeER . st3 { v1.8b, v2.8b, v3.8b }, [x27], x28 +# CHECK-NEXT: [0,7] . . DeE--R . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . DeeeeER st3 { v1.8h, v2.8h, v3.8h }, [x27], x28 +# CHECK-NEXT: [0,9] . . DeE--R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -4596,42 +4602,43 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st3 { v1.2s, v2.2s, v3.2s }, [x27], x28 -# CHECK-NEXT: 1. 1 2.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 1.0 0.0 0.0 st3 { v1.4h, v2.4h, v3.4h }, [x27], x28 -# CHECK-NEXT: 3. 1 2.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 1.0 0.0 0.0 st3 { v1.4s, v2.4s, v3.4s }, [x27], x28 -# CHECK-NEXT: 5. 1 2.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 1.0 0.0 0.0 st3 { v1.8b, v2.8b, v3.8b }, [x27], x28 -# CHECK-NEXT: 7. 1 2.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 2.0 1.0 0.0 st3 { v1.8h, v2.8h, v3.8h }, [x27], x28 -# CHECK-NEXT: 9. 1 3.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 1 1.7 0.2 1.0 +# CHECK-NEXT: 1. 1 1.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 1.0 1.0 0.0 st3 { v1.4h, v2.4h, v3.4h }, [x27], x28 +# CHECK-NEXT: 3. 1 1.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 1.0 0.0 st3 { v1.4s, v2.4s, v3.4s }, [x27], x28 +# CHECK-NEXT: 5. 1 1.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 1.0 0.0 st3 { v1.8b, v2.8b, v3.8b }, [x27], x28 +# CHECK-NEXT: 7. 1 1.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 1.0 0.0 st3 { v1.8h, v2.8h, v3.8h }, [x27], x28 +# CHECK-NEXT: 9. 1 1.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.0 0.5 1.0 # CHECK: [75] Code Region - G76 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 604 +# CHECK-NEXT: Total Cycles: 1003 # CHECK-NEXT: Total uOps: 3200 -# CHECK: Dispatch Width: 10 -# CHECK-NEXT: uOps Per Cycle: 5.30 -# CHECK-NEXT: IPC: 1.66 -# CHECK-NEXT: Block RThroughput: 5.5 +# CHECK: Dispatch Width: 5 +# CHECK-NEXT: uOps Per Cycle: 3.19 +# CHECK-NEXT: IPC: 1.00 +# CHECK-NEXT: Block RThroughput: 6.4 # CHECK: Timeline view: +# CHECK-NEXT: 012 # CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeeER . st3 { v1.16b, v2.16b, v3.16b }, [x27], x28 -# CHECK-NEXT: [0,1] D=eE--R . add x0, x27, #1 -# CHECK-NEXT: [0,2] .DeeE-R . st3 { v1.b, v2.b, v3.b }[0], [x27], #3 -# CHECK-NEXT: [0,3] .D=eE-R . add x0, x27, #1 -# CHECK-NEXT: [0,4] . D=eeER . st3 { v1.b, v2.b, v3.b }[8], [x27], #3 -# CHECK-NEXT: [0,5] . D==eER . add x0, x27, #1 -# CHECK-NEXT: [0,6] . D=eeER. st3 { v1.b, v2.b, v3.b }[0], [x27], x28 -# CHECK-NEXT: [0,7] . D==eER. add x0, x27, #1 -# CHECK-NEXT: [0,8] . D=eeER st3 { v1.b, v2.b, v3.b }[8], [x27], x28 -# CHECK-NEXT: [0,9] . D==eER add x0, x27, #1 +# CHECK: [0,0] DeeeeER . . st3 { v1.16b, v2.16b, v3.16b }, [x27], x28 +# CHECK-NEXT: [0,1] .DeE--R . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeER . . st3 { v1.b, v2.b, v3.b }[0], [x27], #3 +# CHECK-NEXT: [0,3] . DeER . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeER . . st3 { v1.b, v2.b, v3.b }[8], [x27], #3 +# CHECK-NEXT: [0,5] . DeER . . add x0, x27, #1 +# CHECK-NEXT: [0,6] . .DeeER . st3 { v1.b, v2.b, v3.b }[0], [x27], x28 +# CHECK-NEXT: [0,7] . . DeER . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . DeeER st3 { v1.b, v2.b, v3.b }[8], [x27], x28 +# CHECK-NEXT: [0,9] . . DeER add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -4641,42 +4648,43 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st3 { v1.16b, v2.16b, v3.16b }, [x27], x28 -# CHECK-NEXT: 1. 1 2.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 1.0 0.0 1.0 st3 { v1.b, v2.b, v3.b }[0], [x27], #3 -# CHECK-NEXT: 3. 1 2.0 0.0 1.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 2.0 1.0 0.0 st3 { v1.b, v2.b, v3.b }[8], [x27], #3 -# CHECK-NEXT: 5. 1 3.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 2.0 0.0 0.0 st3 { v1.b, v2.b, v3.b }[0], [x27], x28 -# CHECK-NEXT: 7. 1 3.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 2.0 0.0 0.0 st3 { v1.b, v2.b, v3.b }[8], [x27], x28 -# CHECK-NEXT: 9. 1 3.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 2.1 0.2 0.4 +# CHECK-NEXT: 1. 1 1.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 1.0 1.0 0.0 st3 { v1.b, v2.b, v3.b }[0], [x27], #3 +# CHECK-NEXT: 3. 1 1.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 1.0 0.0 st3 { v1.b, v2.b, v3.b }[8], [x27], #3 +# CHECK-NEXT: 5. 1 1.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 1.0 0.0 st3 { v1.b, v2.b, v3.b }[0], [x27], x28 +# CHECK-NEXT: 7. 1 1.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 1.0 0.0 st3 { v1.b, v2.b, v3.b }[8], [x27], x28 +# CHECK-NEXT: 9. 1 1.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.0 0.5 0.2 # CHECK: [76] Code Region - G77 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 504 +# CHECK-NEXT: Total Cycles: 1003 # CHECK-NEXT: Total uOps: 3000 -# CHECK: Dispatch Width: 10 -# CHECK-NEXT: uOps Per Cycle: 5.95 -# CHECK-NEXT: IPC: 1.98 -# CHECK-NEXT: Block RThroughput: 5.0 +# CHECK: Dispatch Width: 5 +# CHECK-NEXT: uOps Per Cycle: 2.99 +# CHECK-NEXT: IPC: 1.00 +# CHECK-NEXT: Block RThroughput: 6.0 # CHECK: Timeline view: -# CHECK-NEXT: Index 012345678 +# CHECK-NEXT: 012 +# CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeER. . st3 { v1.h, v2.h, v3.h }[0], [x27], #6 -# CHECK-NEXT: [0,1] D=eER. . add x0, x27, #1 -# CHECK-NEXT: [0,2] .DeeER . st3 { v1.h, v2.h, v3.h }[4], [x27], #6 -# CHECK-NEXT: [0,3] .D=eER . add x0, x27, #1 -# CHECK-NEXT: [0,4] . DeeER . st3 { v1.h, v2.h, v3.h }[0], [x27], x28 -# CHECK-NEXT: [0,5] . D=eER . add x0, x27, #1 -# CHECK-NEXT: [0,6] . DeeER. st3 { v1.h, v2.h, v3.h }[4], [x27], x28 -# CHECK-NEXT: [0,7] . D=eER. add x0, x27, #1 -# CHECK-NEXT: [0,8] . DeeER st3 { v1.s, v2.s, v3.s }[0], [x27], #12 -# CHECK-NEXT: [0,9] . D=eER add x0, x27, #1 +# CHECK: [0,0] DeeER. . . st3 { v1.h, v2.h, v3.h }[0], [x27], #6 +# CHECK-NEXT: [0,1] .DeER. . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeER . . st3 { v1.h, v2.h, v3.h }[4], [x27], #6 +# CHECK-NEXT: [0,3] . DeER . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeER . . st3 { v1.h, v2.h, v3.h }[0], [x27], x28 +# CHECK-NEXT: [0,5] . DeER . . add x0, x27, #1 +# CHECK-NEXT: [0,6] . .DeeER . st3 { v1.h, v2.h, v3.h }[4], [x27], x28 +# CHECK-NEXT: [0,7] . . DeER . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . DeeER st3 { v1.s, v2.s, v3.s }[0], [x27], #12 +# CHECK-NEXT: [0,9] . . DeER add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -4686,43 +4694,43 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st3 { v1.h, v2.h, v3.h }[0], [x27], #6 -# CHECK-NEXT: 1. 1 2.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 1.0 0.0 0.0 st3 { v1.h, v2.h, v3.h }[4], [x27], #6 -# CHECK-NEXT: 3. 1 2.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 1.0 0.0 0.0 st3 { v1.h, v2.h, v3.h }[0], [x27], x28 -# CHECK-NEXT: 5. 1 2.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 1.0 0.0 0.0 st3 { v1.h, v2.h, v3.h }[4], [x27], x28 -# CHECK-NEXT: 7. 1 2.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 1.0 0.0 0.0 st3 { v1.s, v2.s, v3.s }[0], [x27], #12 -# CHECK-NEXT: 9. 1 2.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 1.5 0.1 0.0 +# CHECK-NEXT: 1. 1 1.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 1.0 1.0 0.0 st3 { v1.h, v2.h, v3.h }[4], [x27], #6 +# CHECK-NEXT: 3. 1 1.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 1.0 0.0 st3 { v1.h, v2.h, v3.h }[0], [x27], x28 +# CHECK-NEXT: 5. 1 1.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 1.0 0.0 st3 { v1.h, v2.h, v3.h }[4], [x27], x28 +# CHECK-NEXT: 7. 1 1.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 1.0 0.0 st3 { v1.s, v2.s, v3.s }[0], [x27], #12 +# CHECK-NEXT: 9. 1 1.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.0 0.5 0.0 # CHECK: [77] Code Region - G78 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 506 +# CHECK-NEXT: Total Cycles: 1005 # CHECK-NEXT: Total uOps: 3000 -# CHECK: Dispatch Width: 10 -# CHECK-NEXT: uOps Per Cycle: 5.93 -# CHECK-NEXT: IPC: 1.98 -# CHECK-NEXT: Block RThroughput: 5.0 +# CHECK: Dispatch Width: 5 +# CHECK-NEXT: uOps Per Cycle: 2.99 +# CHECK-NEXT: IPC: 1.00 +# CHECK-NEXT: Block RThroughput: 6.0 # CHECK: Timeline view: -# CHECK-NEXT: 0 +# CHECK-NEXT: 01234 # CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeER. . st3 { v1.s, v2.s, v3.s }[0], [x27], x28 -# CHECK-NEXT: [0,1] D=eER. . add x0, x27, #1 -# CHECK-NEXT: [0,2] .DeeER . st3 { v1.d, v2.d, v3.d }[0], [x27], #24 -# CHECK-NEXT: [0,3] .D=eER . add x0, x27, #1 -# CHECK-NEXT: [0,4] . DeeER . st3 { v1.d, v2.d, v3.d }[0], [x27], x28 -# CHECK-NEXT: [0,5] . D=eER . add x0, x27, #1 -# CHECK-NEXT: [0,6] . DeeER . st4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64 -# CHECK-NEXT: [0,7] . D=eER . add x0, x27, #1 -# CHECK-NEXT: [0,8] . DeeeeER st4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32 -# CHECK-NEXT: [0,9] . D=eE--R add x0, x27, #1 +# CHECK: [0,0] DeeER. . . st3 { v1.s, v2.s, v3.s }[0], [x27], x28 +# CHECK-NEXT: [0,1] .DeER. . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeER . . st3 { v1.d, v2.d, v3.d }[0], [x27], #24 +# CHECK-NEXT: [0,3] . DeER . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeER . . st3 { v1.d, v2.d, v3.d }[0], [x27], x28 +# CHECK-NEXT: [0,5] . DeER . . add x0, x27, #1 +# CHECK-NEXT: [0,6] . .DeeER . st4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64 +# CHECK-NEXT: [0,7] . . DeER . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . DeeeeER st4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32 +# CHECK-NEXT: [0,9] . . DeE--R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -4732,43 +4740,43 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st3 { v1.s, v2.s, v3.s }[0], [x27], x28 -# CHECK-NEXT: 1. 1 2.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 1.0 0.0 0.0 st3 { v1.d, v2.d, v3.d }[0], [x27], #24 -# CHECK-NEXT: 3. 1 2.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 1.0 0.0 0.0 st3 { v1.d, v2.d, v3.d }[0], [x27], x28 -# CHECK-NEXT: 5. 1 2.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 1.0 0.0 0.0 st4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64 -# CHECK-NEXT: 7. 1 2.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 1.0 0.0 0.0 st4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32 -# CHECK-NEXT: 9. 1 2.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 1 1.5 0.1 0.2 +# CHECK-NEXT: 1. 1 1.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 1.0 1.0 0.0 st3 { v1.d, v2.d, v3.d }[0], [x27], #24 +# CHECK-NEXT: 3. 1 1.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 1.0 0.0 st3 { v1.d, v2.d, v3.d }[0], [x27], x28 +# CHECK-NEXT: 5. 1 1.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 1.0 0.0 st4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64 +# CHECK-NEXT: 7. 1 1.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 1.0 0.0 st4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32 +# CHECK-NEXT: 9. 1 1.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.0 0.5 0.2 # CHECK: [78] Code Region - G79 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 804 +# CHECK-NEXT: Total Cycles: 1005 # CHECK-NEXT: Total uOps: 4200 -# CHECK: Dispatch Width: 10 -# CHECK-NEXT: uOps Per Cycle: 5.22 -# CHECK-NEXT: IPC: 1.24 -# CHECK-NEXT: Block RThroughput: 8.0 +# CHECK: Dispatch Width: 5 +# CHECK-NEXT: uOps Per Cycle: 4.18 +# CHECK-NEXT: IPC: 1.00 +# CHECK-NEXT: Block RThroughput: 8.4 # CHECK: Timeline view: -# CHECK-NEXT: 01 +# CHECK-NEXT: 01234 # CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeeER .. st4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32 -# CHECK-NEXT: [0,1] D=eE--R .. add x0, x27, #1 -# CHECK-NEXT: [0,2] .DeeeeER .. st4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64 -# CHECK-NEXT: [0,3] .D=eE--R .. add x0, x27, #1 -# CHECK-NEXT: [0,4] . DeeeeER .. st4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32 -# CHECK-NEXT: [0,5] . D=eE--R .. add x0, x27, #1 -# CHECK-NEXT: [0,6] . D=eeeeER. st4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64 -# CHECK-NEXT: [0,7] . D==eE--R. add x0, x27, #1 -# CHECK-NEXT: [0,8] . D=eeeeER st4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64 -# CHECK-NEXT: [0,9] . D==eE--R add x0, x27, #1 +# CHECK: [0,0] DeeeeER . . st4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32 +# CHECK-NEXT: [0,1] .DeE--R . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeeeER . . st4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64 +# CHECK-NEXT: [0,3] . DeE--R . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeeeER . st4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32 +# CHECK-NEXT: [0,5] . DeE--R . add x0, x27, #1 +# CHECK-NEXT: [0,6] . .DeeeeER . st4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64 +# CHECK-NEXT: [0,7] . . DeE--R . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . DeeeeER st4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64 +# CHECK-NEXT: [0,9] . . DeE--R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -4778,43 +4786,43 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32 -# CHECK-NEXT: 1. 1 2.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 1.0 0.0 0.0 st4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64 -# CHECK-NEXT: 3. 1 2.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 1.0 0.0 0.0 st4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32 -# CHECK-NEXT: 5. 1 2.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 2.0 1.0 0.0 st4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64 -# CHECK-NEXT: 7. 1 3.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 2.0 0.0 0.0 st4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64 -# CHECK-NEXT: 9. 1 3.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 1 1.9 0.2 1.0 +# CHECK-NEXT: 1. 1 1.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 1.0 1.0 0.0 st4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64 +# CHECK-NEXT: 3. 1 1.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 1.0 0.0 st4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32 +# CHECK-NEXT: 5. 1 1.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 1.0 0.0 st4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64 +# CHECK-NEXT: 7. 1 1.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 1.0 0.0 st4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64 +# CHECK-NEXT: 9. 1 1.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.0 0.5 1.0 # CHECK: [79] Code Region - G80 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 605 +# CHECK-NEXT: Total Cycles: 1005 # CHECK-NEXT: Total uOps: 3400 -# CHECK: Dispatch Width: 10 -# CHECK-NEXT: uOps Per Cycle: 5.62 -# CHECK-NEXT: IPC: 1.65 -# CHECK-NEXT: Block RThroughput: 6.0 +# CHECK: Dispatch Width: 5 +# CHECK-NEXT: uOps Per Cycle: 3.38 +# CHECK-NEXT: IPC: 1.00 +# CHECK-NEXT: Block RThroughput: 6.8 # CHECK: Timeline view: -# CHECK-NEXT: 0 +# CHECK-NEXT: 01234 # CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeER. . st4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28 -# CHECK-NEXT: [0,1] D=eER. . add x0, x27, #1 -# CHECK-NEXT: [0,2] .DeeeeER . st4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28 -# CHECK-NEXT: [0,3] .D=eE--R . add x0, x27, #1 -# CHECK-NEXT: [0,4] . DeeeeER . st4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28 -# CHECK-NEXT: [0,5] . D=eE--R . add x0, x27, #1 -# CHECK-NEXT: [0,6] . DeeeeER. st4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28 -# CHECK-NEXT: [0,7] . D=eE--R. add x0, x27, #1 -# CHECK-NEXT: [0,8] . DeeeeER st4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28 -# CHECK-NEXT: [0,9] . D=eE--R add x0, x27, #1 +# CHECK: [0,0] DeeER. . . st4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28 +# CHECK-NEXT: [0,1] .DeER. . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeeeER . . st4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28 +# CHECK-NEXT: [0,3] . DeE--R . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeeeER . st4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28 +# CHECK-NEXT: [0,5] . DeE--R . add x0, x27, #1 +# CHECK-NEXT: [0,6] . .DeeeeER . st4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28 +# CHECK-NEXT: [0,7] . . DeE--R . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . DeeeeER st4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28 +# CHECK-NEXT: [0,9] . . DeE--R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -4824,43 +4832,43 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28 -# CHECK-NEXT: 1. 1 2.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 1.0 0.0 0.0 st4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28 -# CHECK-NEXT: 3. 1 2.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 1.0 0.0 0.0 st4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28 -# CHECK-NEXT: 5. 1 2.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 1.0 0.0 0.0 st4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28 -# CHECK-NEXT: 7. 1 2.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 1.0 0.0 0.0 st4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28 -# CHECK-NEXT: 9. 1 2.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 1 1.5 0.1 0.8 +# CHECK-NEXT: 1. 1 1.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 1.0 1.0 0.0 st4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28 +# CHECK-NEXT: 3. 1 1.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 1.0 0.0 st4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28 +# CHECK-NEXT: 5. 1 1.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 1.0 0.0 st4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28 +# CHECK-NEXT: 7. 1 1.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 1.0 0.0 st4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28 +# CHECK-NEXT: 9. 1 1.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.0 0.5 0.8 # CHECK: [80] Code Region - G81 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 704 +# CHECK-NEXT: Total Cycles: 1003 # CHECK-NEXT: Total uOps: 3800 -# CHECK: Dispatch Width: 10 -# CHECK-NEXT: uOps Per Cycle: 5.40 -# CHECK-NEXT: IPC: 1.42 -# CHECK-NEXT: Block RThroughput: 7.0 +# CHECK: Dispatch Width: 5 +# CHECK-NEXT: uOps Per Cycle: 3.79 +# CHECK-NEXT: IPC: 1.00 +# CHECK-NEXT: Block RThroughput: 7.6 # CHECK: Timeline view: -# CHECK-NEXT: 0 +# CHECK-NEXT: 012 # CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeeER . st4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28 -# CHECK-NEXT: [0,1] D=eE--R . add x0, x27, #1 -# CHECK-NEXT: [0,2] .DeeeeER . st4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28 -# CHECK-NEXT: [0,3] .D=eE--R . add x0, x27, #1 -# CHECK-NEXT: [0,4] . D==eeER . st4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], #4 -# CHECK-NEXT: [0,5] . D===eER . add x0, x27, #1 -# CHECK-NEXT: [0,6] . D==eeER. st4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], #4 -# CHECK-NEXT: [0,7] . D===eER. add x0, x27, #1 -# CHECK-NEXT: [0,8] . D==eeER st4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], x28 -# CHECK-NEXT: [0,9] . D===eER add x0, x27, #1 +# CHECK: [0,0] DeeeeER . . st4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28 +# CHECK-NEXT: [0,1] .DeE--R . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeeeER . . st4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28 +# CHECK-NEXT: [0,3] . DeE--R . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeER . . st4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], #4 +# CHECK-NEXT: [0,5] . DeER . . add x0, x27, #1 +# CHECK-NEXT: [0,6] . .DeeER . st4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], #4 +# CHECK-NEXT: [0,7] . . DeER . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . DeeER st4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], x28 +# CHECK-NEXT: [0,9] . . DeER add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -4870,42 +4878,43 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28 -# CHECK-NEXT: 1. 1 2.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 1.0 0.0 0.0 st4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28 -# CHECK-NEXT: 3. 1 2.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 3.0 2.0 0.0 st4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], #4 -# CHECK-NEXT: 5. 1 4.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 3.0 0.0 0.0 st4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], #4 -# CHECK-NEXT: 7. 1 4.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 3.0 0.0 0.0 st4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], x28 -# CHECK-NEXT: 9. 1 4.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 2.7 0.3 0.4 +# CHECK-NEXT: 1. 1 1.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 1.0 1.0 0.0 st4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28 +# CHECK-NEXT: 3. 1 1.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 1.0 0.0 st4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], #4 +# CHECK-NEXT: 5. 1 1.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 1.0 0.0 st4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], #4 +# CHECK-NEXT: 7. 1 1.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 1.0 0.0 st4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], x28 +# CHECK-NEXT: 9. 1 1.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.0 0.5 0.4 # CHECK: [81] Code Region - G82 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 504 +# CHECK-NEXT: Total Cycles: 1003 # CHECK-NEXT: Total uOps: 3000 -# CHECK: Dispatch Width: 10 -# CHECK-NEXT: uOps Per Cycle: 5.95 -# CHECK-NEXT: IPC: 1.98 -# CHECK-NEXT: Block RThroughput: 5.0 +# CHECK: Dispatch Width: 5 +# CHECK-NEXT: uOps Per Cycle: 2.99 +# CHECK-NEXT: IPC: 1.00 +# CHECK-NEXT: Block RThroughput: 6.0 # CHECK: Timeline view: -# CHECK-NEXT: Index 012345678 +# CHECK-NEXT: 012 +# CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeER. . st4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], x28 -# CHECK-NEXT: [0,1] D=eER. . add x0, x27, #1 -# CHECK-NEXT: [0,2] .DeeER . st4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], #8 -# CHECK-NEXT: [0,3] .D=eER . add x0, x27, #1 -# CHECK-NEXT: [0,4] . DeeER . st4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], #8 -# CHECK-NEXT: [0,5] . D=eER . add x0, x27, #1 -# CHECK-NEXT: [0,6] . DeeER. st4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], x28 -# CHECK-NEXT: [0,7] . D=eER. add x0, x27, #1 -# CHECK-NEXT: [0,8] . DeeER st4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], x28 -# CHECK-NEXT: [0,9] . D=eER add x0, x27, #1 +# CHECK: [0,0] DeeER. . . st4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], x28 +# CHECK-NEXT: [0,1] .DeER. . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeER . . st4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], #8 +# CHECK-NEXT: [0,3] . DeER . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeER . . st4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], #8 +# CHECK-NEXT: [0,5] . DeER . . add x0, x27, #1 +# CHECK-NEXT: [0,6] . .DeeER . st4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], x28 +# CHECK-NEXT: [0,7] . . DeER . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . DeeER st4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], x28 +# CHECK-NEXT: [0,9] . . DeER add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -4915,42 +4924,43 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], x28 -# CHECK-NEXT: 1. 1 2.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 1.0 0.0 0.0 st4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], #8 -# CHECK-NEXT: 3. 1 2.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 1.0 0.0 0.0 st4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], #8 -# CHECK-NEXT: 5. 1 2.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 1.0 0.0 0.0 st4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], x28 -# CHECK-NEXT: 7. 1 2.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 1.0 0.0 0.0 st4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], x28 -# CHECK-NEXT: 9. 1 2.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 1.5 0.1 0.0 +# CHECK-NEXT: 1. 1 1.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 1.0 1.0 0.0 st4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], #8 +# CHECK-NEXT: 3. 1 1.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 1.0 0.0 st4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], #8 +# CHECK-NEXT: 5. 1 1.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 1.0 0.0 st4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], x28 +# CHECK-NEXT: 7. 1 1.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 1.0 0.0 st4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], x28 +# CHECK-NEXT: 9. 1 1.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.0 0.5 0.0 # CHECK: [82] Code Region - G83 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 504 +# CHECK-NEXT: Total Cycles: 804 # CHECK-NEXT: Total uOps: 2800 -# CHECK: Dispatch Width: 10 -# CHECK-NEXT: uOps Per Cycle: 5.56 -# CHECK-NEXT: IPC: 1.98 -# CHECK-NEXT: Block RThroughput: 4.5 +# CHECK: Dispatch Width: 5 +# CHECK-NEXT: uOps Per Cycle: 3.48 +# CHECK-NEXT: IPC: 1.24 +# CHECK-NEXT: Block RThroughput: 5.6 # CHECK: Timeline view: -# CHECK-NEXT: Index 012345678 +# CHECK-NEXT: 01 +# CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeER. . st4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], #16 -# CHECK-NEXT: [0,1] D=eER. . add x0, x27, #1 -# CHECK-NEXT: [0,2] .DeeER . st4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], x28 -# CHECK-NEXT: [0,3] .D=eER . add x0, x27, #1 -# CHECK-NEXT: [0,4] . DeeER . st4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], #32 -# CHECK-NEXT: [0,5] . D=eER . add x0, x27, #1 -# CHECK-NEXT: [0,6] . DeeER. st4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], x28 -# CHECK-NEXT: [0,7] . D=eER. add x0, x27, #1 -# CHECK-NEXT: [0,8] . D=eER. stg x26, [x27], #4064 -# CHECK-NEXT: [0,9] . D==eER add x0, x27, #1 +# CHECK: [0,0] DeeER. .. st4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], #16 +# CHECK-NEXT: [0,1] .DeER. .. add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeER .. st4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], x28 +# CHECK-NEXT: [0,3] . DeER .. add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeER .. st4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], #32 +# CHECK-NEXT: [0,5] . DeER .. add x0, x27, #1 +# CHECK-NEXT: [0,6] . .DeeER. st4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], x28 +# CHECK-NEXT: [0,7] . . DeER. add x0, x27, #1 +# CHECK-NEXT: [0,8] . . DeER. stg x26, [x27], #4064 +# CHECK-NEXT: [0,9] . . D=eER add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -4960,16 +4970,16 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], #16 -# CHECK-NEXT: 1. 1 2.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 1.0 0.0 0.0 st4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], x28 -# CHECK-NEXT: 3. 1 2.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 1.0 0.0 0.0 st4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], #32 -# CHECK-NEXT: 5. 1 2.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 1.0 0.0 0.0 st4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], x28 -# CHECK-NEXT: 7. 1 2.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 2.0 0.0 0.0 stg x26, [x27], #4064 -# CHECK-NEXT: 9. 1 3.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 1.7 0.1 0.0 +# CHECK-NEXT: 1. 1 1.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 1.0 1.0 0.0 st4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], x28 +# CHECK-NEXT: 3. 1 1.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 1.0 0.0 st4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], #32 +# CHECK-NEXT: 5. 1 1.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 1.0 0.0 st4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], x28 +# CHECK-NEXT: 7. 1 1.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 0.0 0.0 stg x26, [x27], #4064 +# CHECK-NEXT: 9. 1 2.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.1 0.4 0.0 # CHECK: [83] Code Region - G84 @@ -4978,24 +4988,24 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 504 # CHECK-NEXT: Total uOps: 2200 -# CHECK: Dispatch Width: 10 +# CHECK: Dispatch Width: 5 # CHECK-NEXT: uOps Per Cycle: 4.37 # CHECK-NEXT: IPC: 1.98 -# CHECK-NEXT: Block RThroughput: 3.0 +# CHECK-NEXT: Block RThroughput: 4.4 # CHECK: Timeline view: # CHECK-NEXT: Index 012345678 # CHECK: [0,0] DeER . . stg x26, [x27, #4064]! # CHECK-NEXT: [0,1] D=eER. . add x0, x27, #1 -# CHECK-NEXT: [0,2] D=eER. . stgp x1, x2, [x27], #992 -# CHECK-NEXT: [0,3] D==eER . add x0, x27, #1 -# CHECK-NEXT: [0,4] .D=eER . stgp x1, x2, [x27, #992]! -# CHECK-NEXT: [0,5] .D==eER . add x0, x27, #1 -# CHECK-NEXT: [0,6] .D==eeER. stp s1, s2, [x27], #248 -# CHECK-NEXT: [0,7] .D===eER. add x0, x27, #1 -# CHECK-NEXT: [0,8] . D==eeER stp d1, d2, [x27], #496 -# CHECK-NEXT: [0,9] . D===eER add x0, x27, #1 +# CHECK-NEXT: [0,2] .DeER. . stgp x1, x2, [x27], #992 +# CHECK-NEXT: [0,3] .D=eER . add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeER . stgp x1, x2, [x27, #992]! +# CHECK-NEXT: [0,5] . D=eER . add x0, x27, #1 +# CHECK-NEXT: [0,6] . DeeER. stp s1, s2, [x27], #248 +# CHECK-NEXT: [0,7] . D=eER. add x0, x27, #1 +# CHECK-NEXT: [0,8] . DeeER stp d1, d2, [x27], #496 +# CHECK-NEXT: [0,9] . D=eER add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -5006,15 +5016,15 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 stg x26, [x27, #4064]! # CHECK-NEXT: 1. 1 2.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 2.0 0.0 0.0 stgp x1, x2, [x27], #992 -# CHECK-NEXT: 3. 1 3.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 2.0 0.0 0.0 stgp x1, x2, [x27, #992]! -# CHECK-NEXT: 5. 1 3.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 3.0 0.0 0.0 stp s1, s2, [x27], #248 -# CHECK-NEXT: 7. 1 4.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 3.0 0.0 0.0 stp d1, d2, [x27], #496 -# CHECK-NEXT: 9. 1 4.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 2.7 0.1 0.0 +# CHECK-NEXT: 2. 1 1.0 0.0 0.0 stgp x1, x2, [x27], #992 +# CHECK-NEXT: 3. 1 2.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 0.0 0.0 stgp x1, x2, [x27, #992]! +# CHECK-NEXT: 5. 1 2.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 0.0 0.0 stp s1, s2, [x27], #248 +# CHECK-NEXT: 7. 1 2.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 0.0 0.0 stp d1, d2, [x27], #496 +# CHECK-NEXT: 9. 1 2.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.5 0.1 0.0 # CHECK: [84] Code Region - G85 @@ -5023,24 +5033,24 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 504 # CHECK-NEXT: Total uOps: 2400 -# CHECK: Dispatch Width: 10 +# CHECK: Dispatch Width: 5 # CHECK-NEXT: uOps Per Cycle: 4.76 # CHECK-NEXT: IPC: 1.98 -# CHECK-NEXT: Block RThroughput: 3.5 +# CHECK-NEXT: Block RThroughput: 4.8 # CHECK: Timeline view: # CHECK-NEXT: Index 012345678 # CHECK: [0,0] DeeER. . stp q1, q2, [x27], #992 # CHECK-NEXT: [0,1] D=eER. . add x0, x27, #1 -# CHECK-NEXT: [0,2] D=eeER . stp s1, s2, [x27, #248]! -# CHECK-NEXT: [0,3] D==eER . add x0, x27, #1 -# CHECK-NEXT: [0,4] .D=eeER . stp d1, d2, [x27, #496]! -# CHECK-NEXT: [0,5] .D==eER . add x0, x27, #1 -# CHECK-NEXT: [0,6] .D==eeER. stp q1, q2, [x27, #992]! -# CHECK-NEXT: [0,7] .D===eER. add x0, x27, #1 -# CHECK-NEXT: [0,8] . D==eER. stp w1, w2, [x27], #248 -# CHECK-NEXT: [0,9] . D===eER add x0, x27, #1 +# CHECK-NEXT: [0,2] .DeeER . stp s1, s2, [x27, #248]! +# CHECK-NEXT: [0,3] .D=eER . add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeER . stp d1, d2, [x27, #496]! +# CHECK-NEXT: [0,5] . D=eER . add x0, x27, #1 +# CHECK-NEXT: [0,6] . DeeER. stp q1, q2, [x27, #992]! +# CHECK-NEXT: [0,7] . D=eER. add x0, x27, #1 +# CHECK-NEXT: [0,8] . DeER. stp w1, w2, [x27], #248 +# CHECK-NEXT: [0,9] . D=eER add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -5051,15 +5061,15 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 stp q1, q2, [x27], #992 # CHECK-NEXT: 1. 1 2.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 2.0 0.0 0.0 stp s1, s2, [x27, #248]! -# CHECK-NEXT: 3. 1 3.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 2.0 0.0 0.0 stp d1, d2, [x27, #496]! -# CHECK-NEXT: 5. 1 3.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 3.0 0.0 0.0 stp q1, q2, [x27, #992]! -# CHECK-NEXT: 7. 1 4.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 3.0 0.0 0.0 stp w1, w2, [x27], #248 -# CHECK-NEXT: 9. 1 4.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 2.7 0.1 0.0 +# CHECK-NEXT: 2. 1 1.0 0.0 0.0 stp s1, s2, [x27, #248]! +# CHECK-NEXT: 3. 1 2.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 0.0 0.0 stp d1, d2, [x27, #496]! +# CHECK-NEXT: 5. 1 2.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 0.0 0.0 stp q1, q2, [x27, #992]! +# CHECK-NEXT: 7. 1 2.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 0.0 0.0 stp w1, w2, [x27], #248 +# CHECK-NEXT: 9. 1 2.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.5 0.1 0.0 # CHECK: [85] Code Region - G86 @@ -5068,24 +5078,24 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 504 # CHECK-NEXT: Total uOps: 2200 -# CHECK: Dispatch Width: 10 +# CHECK: Dispatch Width: 5 # CHECK-NEXT: uOps Per Cycle: 4.37 # CHECK-NEXT: IPC: 1.98 -# CHECK-NEXT: Block RThroughput: 3.0 +# CHECK-NEXT: Block RThroughput: 4.4 # CHECK: Timeline view: # CHECK-NEXT: Index 012345678 # CHECK: [0,0] DeER . . stp x1, x2, [x27], #496 # CHECK-NEXT: [0,1] D=eER. . add x0, x27, #1 -# CHECK-NEXT: [0,2] D=eER. . stp w1, w2, [x27, #248]! -# CHECK-NEXT: [0,3] D==eER . add x0, x27, #1 -# CHECK-NEXT: [0,4] .D=eER . stp x1, x2, [x27, #496]! -# CHECK-NEXT: [0,5] .D==eER . add x0, x27, #1 -# CHECK-NEXT: [0,6] .D==eeER. str b1, [x27], #254 -# CHECK-NEXT: [0,7] .D===eER. add x0, x27, #1 -# CHECK-NEXT: [0,8] . D==eeER str h1, [x27], #254 -# CHECK-NEXT: [0,9] . D===eER add x0, x27, #1 +# CHECK-NEXT: [0,2] .DeER. . stp w1, w2, [x27, #248]! +# CHECK-NEXT: [0,3] .D=eER . add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeER . stp x1, x2, [x27, #496]! +# CHECK-NEXT: [0,5] . D=eER . add x0, x27, #1 +# CHECK-NEXT: [0,6] . DeeER. str b1, [x27], #254 +# CHECK-NEXT: [0,7] . D=eER. add x0, x27, #1 +# CHECK-NEXT: [0,8] . DeeER str h1, [x27], #254 +# CHECK-NEXT: [0,9] . D=eER add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -5096,15 +5106,15 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 stp x1, x2, [x27], #496 # CHECK-NEXT: 1. 1 2.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 2.0 0.0 0.0 stp w1, w2, [x27, #248]! -# CHECK-NEXT: 3. 1 3.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 2.0 0.0 0.0 stp x1, x2, [x27, #496]! -# CHECK-NEXT: 5. 1 3.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 3.0 0.0 0.0 str b1, [x27], #254 -# CHECK-NEXT: 7. 1 4.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 3.0 0.0 0.0 str h1, [x27], #254 -# CHECK-NEXT: 9. 1 4.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 2.7 0.1 0.0 +# CHECK-NEXT: 2. 1 1.0 0.0 0.0 stp w1, w2, [x27, #248]! +# CHECK-NEXT: 3. 1 2.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 0.0 0.0 stp x1, x2, [x27, #496]! +# CHECK-NEXT: 5. 1 2.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 0.0 0.0 str b1, [x27], #254 +# CHECK-NEXT: 7. 1 2.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 0.0 0.0 str h1, [x27], #254 +# CHECK-NEXT: 9. 1 2.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.5 0.1 0.0 # CHECK: [86] Code Region - G87 @@ -5113,24 +5123,24 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 505 # CHECK-NEXT: Total uOps: 2500 -# CHECK: Dispatch Width: 10 +# CHECK: Dispatch Width: 5 # CHECK-NEXT: uOps Per Cycle: 4.95 # CHECK-NEXT: IPC: 1.98 -# CHECK-NEXT: Block RThroughput: 3.8 +# CHECK-NEXT: Block RThroughput: 5.0 # CHECK: Timeline view: # CHECK-NEXT: Index 0123456789 # CHECK: [0,0] DeeER. . str s1, [x27], #254 # CHECK-NEXT: [0,1] D=eER. . add x0, x27, #1 -# CHECK-NEXT: [0,2] D=eeER . str d1, [x27], #254 -# CHECK-NEXT: [0,3] D==eER . add x0, x27, #1 -# CHECK-NEXT: [0,4] .D=eeER . str q1, [x27], #254 -# CHECK-NEXT: [0,5] .D==eER . add x0, x27, #1 -# CHECK-NEXT: [0,6] .D==eeeER. str b1, [x27, #254]! -# CHECK-NEXT: [0,7] .D===eE-R. add x0, x27, #1 -# CHECK-NEXT: [0,8] . D==eeeER str h1, [x27, #254]! -# CHECK-NEXT: [0,9] . D===eE-R add x0, x27, #1 +# CHECK-NEXT: [0,2] .DeeER . str d1, [x27], #254 +# CHECK-NEXT: [0,3] .D=eER . add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeER . str q1, [x27], #254 +# CHECK-NEXT: [0,5] . D=eER . add x0, x27, #1 +# CHECK-NEXT: [0,6] . DeeeER. str b1, [x27, #254]! +# CHECK-NEXT: [0,7] . D=eE-R. add x0, x27, #1 +# CHECK-NEXT: [0,8] . DeeeER str h1, [x27, #254]! +# CHECK-NEXT: [0,9] . D=eE-R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -5141,15 +5151,15 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 str s1, [x27], #254 # CHECK-NEXT: 1. 1 2.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 2.0 0.0 0.0 str d1, [x27], #254 -# CHECK-NEXT: 3. 1 3.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 2.0 0.0 0.0 str q1, [x27], #254 -# CHECK-NEXT: 5. 1 3.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 3.0 0.0 0.0 str b1, [x27, #254]! -# CHECK-NEXT: 7. 1 4.0 0.0 1.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 3.0 0.0 0.0 str h1, [x27, #254]! -# CHECK-NEXT: 9. 1 4.0 0.0 1.0 add x0, x27, #1 -# CHECK-NEXT: 1 2.7 0.1 0.2 +# CHECK-NEXT: 2. 1 1.0 0.0 0.0 str d1, [x27], #254 +# CHECK-NEXT: 3. 1 2.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 0.0 0.0 str q1, [x27], #254 +# CHECK-NEXT: 5. 1 2.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 0.0 0.0 str b1, [x27, #254]! +# CHECK-NEXT: 7. 1 2.0 0.0 1.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 0.0 0.0 str h1, [x27, #254]! +# CHECK-NEXT: 9. 1 2.0 0.0 1.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.5 0.1 0.2 # CHECK: [87] Code Region - G88 @@ -5158,24 +5168,24 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 504 # CHECK-NEXT: Total uOps: 2300 -# CHECK: Dispatch Width: 10 +# CHECK: Dispatch Width: 5 # CHECK-NEXT: uOps Per Cycle: 4.56 # CHECK-NEXT: IPC: 1.98 -# CHECK-NEXT: Block RThroughput: 3.3 +# CHECK-NEXT: Block RThroughput: 4.6 # CHECK: Timeline view: # CHECK-NEXT: Index 012345678 # CHECK: [0,0] DeeeER . str s1, [x27, #254]! # CHECK-NEXT: [0,1] D=eE-R . add x0, x27, #1 -# CHECK-NEXT: [0,2] D=eeeER . str d1, [x27, #254]! -# CHECK-NEXT: [0,3] D==eE-R . add x0, x27, #1 -# CHECK-NEXT: [0,4] .D=eeER . str q1, [x27, #254]! -# CHECK-NEXT: [0,5] .D==eER . add x0, x27, #1 -# CHECK-NEXT: [0,6] .D==eER . str w1, [x27], #254 -# CHECK-NEXT: [0,7] .D===eER. add x0, x27, #1 -# CHECK-NEXT: [0,8] . D==eER. str x1, [x27], #254 -# CHECK-NEXT: [0,9] . D===eER add x0, x27, #1 +# CHECK-NEXT: [0,2] .DeeeER . str d1, [x27, #254]! +# CHECK-NEXT: [0,3] .D=eE-R . add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeER . str q1, [x27, #254]! +# CHECK-NEXT: [0,5] . D=eER . add x0, x27, #1 +# CHECK-NEXT: [0,6] . DeER . str w1, [x27], #254 +# CHECK-NEXT: [0,7] . D=eER. add x0, x27, #1 +# CHECK-NEXT: [0,8] . DeER. str x1, [x27], #254 +# CHECK-NEXT: [0,9] . D=eER add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -5186,15 +5196,15 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 str s1, [x27, #254]! # CHECK-NEXT: 1. 1 2.0 0.0 1.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 2.0 0.0 0.0 str d1, [x27, #254]! -# CHECK-NEXT: 3. 1 3.0 0.0 1.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 2.0 0.0 0.0 str q1, [x27, #254]! -# CHECK-NEXT: 5. 1 3.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 3.0 0.0 0.0 str w1, [x27], #254 -# CHECK-NEXT: 7. 1 4.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 3.0 0.0 0.0 str x1, [x27], #254 -# CHECK-NEXT: 9. 1 4.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 2.7 0.1 0.2 +# CHECK-NEXT: 2. 1 1.0 0.0 0.0 str d1, [x27, #254]! +# CHECK-NEXT: 3. 1 2.0 0.0 1.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 0.0 0.0 str q1, [x27, #254]! +# CHECK-NEXT: 5. 1 2.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 0.0 0.0 str w1, [x27], #254 +# CHECK-NEXT: 7. 1 2.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 0.0 0.0 str x1, [x27], #254 +# CHECK-NEXT: 9. 1 2.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.5 0.1 0.2 # CHECK: [88] Code Region - G89 @@ -5203,24 +5213,24 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 504 # CHECK-NEXT: Total uOps: 2000 -# CHECK: Dispatch Width: 10 +# CHECK: Dispatch Width: 5 # CHECK-NEXT: uOps Per Cycle: 3.97 # CHECK-NEXT: IPC: 1.98 -# CHECK-NEXT: Block RThroughput: 2.5 +# CHECK-NEXT: Block RThroughput: 4.0 # CHECK: Timeline view: # CHECK-NEXT: Index 012345678 # CHECK: [0,0] DeER . . str w1, [x27, #254]! # CHECK-NEXT: [0,1] D=eER. . add x0, x27, #1 -# CHECK-NEXT: [0,2] D=eER. . str x1, [x27, #254]! -# CHECK-NEXT: [0,3] D==eER . add x0, x27, #1 -# CHECK-NEXT: [0,4] .D=eER . strb w1, [x27], #254 -# CHECK-NEXT: [0,5] .D==eER . add x0, x27, #1 -# CHECK-NEXT: [0,6] .D==eER . strb w1, [x27, #254]! -# CHECK-NEXT: [0,7] .D===eER. add x0, x27, #1 -# CHECK-NEXT: [0,8] . D==eER. strh w1, [x27], #254 -# CHECK-NEXT: [0,9] . D===eER add x0, x27, #1 +# CHECK-NEXT: [0,2] .DeER. . str x1, [x27, #254]! +# CHECK-NEXT: [0,3] .D=eER . add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeER . strb w1, [x27], #254 +# CHECK-NEXT: [0,5] . D=eER . add x0, x27, #1 +# CHECK-NEXT: [0,6] . DeER . strb w1, [x27, #254]! +# CHECK-NEXT: [0,7] . D=eER. add x0, x27, #1 +# CHECK-NEXT: [0,8] . DeER. strh w1, [x27], #254 +# CHECK-NEXT: [0,9] . D=eER add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -5231,15 +5241,15 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 str w1, [x27, #254]! # CHECK-NEXT: 1. 1 2.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 2.0 0.0 0.0 str x1, [x27, #254]! -# CHECK-NEXT: 3. 1 3.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 2.0 0.0 0.0 strb w1, [x27], #254 -# CHECK-NEXT: 5. 1 3.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 3.0 0.0 0.0 strb w1, [x27, #254]! -# CHECK-NEXT: 7. 1 4.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 3.0 0.0 0.0 strh w1, [x27], #254 -# CHECK-NEXT: 9. 1 4.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 2.7 0.1 0.0 +# CHECK-NEXT: 2. 1 1.0 0.0 0.0 str x1, [x27, #254]! +# CHECK-NEXT: 3. 1 2.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 0.0 0.0 strb w1, [x27], #254 +# CHECK-NEXT: 5. 1 2.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 0.0 0.0 strb w1, [x27, #254]! +# CHECK-NEXT: 7. 1 2.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 0.0 0.0 strh w1, [x27], #254 +# CHECK-NEXT: 9. 1 2.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.5 0.1 0.0 # CHECK: [89] Code Region - G90 @@ -5248,24 +5258,24 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 504 # CHECK-NEXT: Total uOps: 2000 -# CHECK: Dispatch Width: 10 +# CHECK: Dispatch Width: 5 # CHECK-NEXT: uOps Per Cycle: 3.97 # CHECK-NEXT: IPC: 1.98 -# CHECK-NEXT: Block RThroughput: 2.5 +# CHECK-NEXT: Block RThroughput: 4.0 # CHECK: Timeline view: # CHECK-NEXT: Index 012345678 # CHECK: [0,0] DeER . . strh w1, [x27, #254]! # CHECK-NEXT: [0,1] D=eER. . add x0, x27, #1 -# CHECK-NEXT: [0,2] D=eER. . stz2g x26, [x27], #4064 -# CHECK-NEXT: [0,3] D==eER . add x0, x27, #1 -# CHECK-NEXT: [0,4] .D=eER . stz2g x26, [x27, #4064]! -# CHECK-NEXT: [0,5] .D==eER . add x0, x27, #1 -# CHECK-NEXT: [0,6] .D==eER . stzg x26, [x27], #4064 -# CHECK-NEXT: [0,7] .D===eER. add x0, x27, #1 -# CHECK-NEXT: [0,8] . D==eER. stzg x26, [x27, #4064]! -# CHECK-NEXT: [0,9] . D===eER add x0, x27, #1 +# CHECK-NEXT: [0,2] .DeER. . stz2g x26, [x27], #4064 +# CHECK-NEXT: [0,3] .D=eER . add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeER . stz2g x26, [x27, #4064]! +# CHECK-NEXT: [0,5] . D=eER . add x0, x27, #1 +# CHECK-NEXT: [0,6] . DeER . stzg x26, [x27], #4064 +# CHECK-NEXT: [0,7] . D=eER. add x0, x27, #1 +# CHECK-NEXT: [0,8] . DeER. stzg x26, [x27, #4064]! +# CHECK-NEXT: [0,9] . D=eER add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -5276,27 +5286,27 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 strh w1, [x27, #254]! # CHECK-NEXT: 1. 1 2.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 2.0 0.0 0.0 stz2g x26, [x27], #4064 -# CHECK-NEXT: 3. 1 3.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 2.0 0.0 0.0 stz2g x26, [x27, #4064]! -# CHECK-NEXT: 5. 1 3.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 3.0 0.0 0.0 stzg x26, [x27], #4064 -# CHECK-NEXT: 7. 1 4.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 3.0 0.0 0.0 stzg x26, [x27, #4064]! -# CHECK-NEXT: 9. 1 4.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 2.7 0.1 0.0 +# CHECK-NEXT: 2. 1 1.0 0.0 0.0 stz2g x26, [x27], #4064 +# CHECK-NEXT: 3. 1 2.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 0.0 0.0 stz2g x26, [x27, #4064]! +# CHECK-NEXT: 5. 1 2.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 0.0 0.0 stzg x26, [x27], #4064 +# CHECK-NEXT: 7. 1 2.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 0.0 0.0 stzg x26, [x27, #4064]! +# CHECK-NEXT: 9. 1 2.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.5 0.1 0.0 # CHECK: [90] Code Region - G91 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 400 -# CHECK-NEXT: Total Cycles: 110 +# CHECK-NEXT: Total Cycles: 143 # CHECK-NEXT: Total uOps: 600 -# CHECK: Dispatch Width: 10 -# CHECK-NEXT: uOps Per Cycle: 5.45 -# CHECK-NEXT: IPC: 3.64 -# CHECK-NEXT: Block RThroughput: 1.0 +# CHECK: Dispatch Width: 5 +# CHECK-NEXT: uOps Per Cycle: 4.20 +# CHECK-NEXT: IPC: 2.80 +# CHECK-NEXT: Block RThroughput: 1.2 # CHECK: Timeline view: # CHECK-NEXT: 0 @@ -5305,7 +5315,7 @@ add x0, x27, 1 # CHECK: [0,0] DeeeeER . ldr x1, [x27], #254 # CHECK-NEXT: [0,1] D=eE--R . add x0, x27, #1 # CHECK-NEXT: [0,2] D====eeeeER ldr x2, [x1], #254 -# CHECK-NEXT: [0,3] D=eE------R add x0, x27, #1 +# CHECK-NEXT: [0,3] .DeE------R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -5317,5 +5327,5 @@ add x0, x27, 1 # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ldr x1, [x27], #254 # CHECK-NEXT: 1. 1 2.0 0.0 2.0 add x0, x27, #1 # CHECK-NEXT: 2. 1 5.0 0.0 0.0 ldr x2, [x1], #254 -# CHECK-NEXT: 3. 1 2.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 1 2.5 0.3 2.0 +# CHECK-NEXT: 3. 1 1.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 1 2.3 0.3 2.0 diff --git a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-basic-instructions.s b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-basic-instructions.s index e1c7bf56f45f2..72ae67e3bea71 100644 --- a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-basic-instructions.s +++ b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-basic-instructions.s @@ -2688,7 +2688,7 @@ drps # CHECK-NEXT: 1 1 0.25 movk x7, #0, lsl #32 # CHECK-NEXT: 1 1 0.25 movz x8, #0, lsl #48 # CHECK-NEXT: 1 1 0.25 movk x9, #0, lsl #48 -# CHECK-NEXT: 1 1 0.07 U msr DAIFSet, #0 +# CHECK-NEXT: 1 1 0.12 U msr DAIFSet, #0 # CHECK-NEXT: 1 1 0.25 adr x2, #1600 # CHECK-NEXT: 1 1 0.25 adrp x21, #6553600 # CHECK-NEXT: 1 1 0.25 adr x0, #262144 diff --git a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-clear-upper-regs.s b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-clear-upper-regs.s index 37ae765148396..e0eb35917dc5e 100644 --- a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-clear-upper-regs.s +++ b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-clear-upper-regs.s @@ -58,7 +58,7 @@ add v0.16b, v0.16b, v0.16b # CHECK-NEXT: Total Cycles: 41 # CHECK-NEXT: Total uOps: 200 -# CHECK: Dispatch Width: 15 +# CHECK: Dispatch Width: 8 # CHECK-NEXT: uOps Per Cycle: 4.88 # CHECK-NEXT: IPC: 4.88 # CHECK-NEXT: Block RThroughput: 0.3 @@ -134,7 +134,7 @@ add v0.16b, v0.16b, v0.16b # CHECK-NEXT: Total Cycles: 44 # CHECK-NEXT: Total uOps: 200 -# CHECK: Dispatch Width: 15 +# CHECK: Dispatch Width: 8 # CHECK-NEXT: uOps Per Cycle: 4.55 # CHECK-NEXT: IPC: 4.55 # CHECK-NEXT: Block RThroughput: 0.3 @@ -211,7 +211,7 @@ add v0.16b, v0.16b, v0.16b # CHECK-NEXT: Total Cycles: 44 # CHECK-NEXT: Total uOps: 200 -# CHECK: Dispatch Width: 15 +# CHECK: Dispatch Width: 8 # CHECK-NEXT: uOps Per Cycle: 4.55 # CHECK-NEXT: IPC: 4.55 # CHECK-NEXT: Block RThroughput: 0.3 @@ -288,7 +288,7 @@ add v0.16b, v0.16b, v0.16b # CHECK-NEXT: Total Cycles: 44 # CHECK-NEXT: Total uOps: 200 -# CHECK: Dispatch Width: 15 +# CHECK: Dispatch Width: 8 # CHECK-NEXT: uOps Per Cycle: 4.55 # CHECK-NEXT: IPC: 4.55 # CHECK-NEXT: Block RThroughput: 0.3 @@ -365,7 +365,7 @@ add v0.16b, v0.16b, v0.16b # CHECK-NEXT: Total Cycles: 44 # CHECK-NEXT: Total uOps: 200 -# CHECK: Dispatch Width: 15 +# CHECK: Dispatch Width: 8 # CHECK-NEXT: uOps Per Cycle: 4.55 # CHECK-NEXT: IPC: 4.55 # CHECK-NEXT: Block RThroughput: 0.3 @@ -442,7 +442,7 @@ add v0.16b, v0.16b, v0.16b # CHECK-NEXT: Total Cycles: 44 # CHECK-NEXT: Total uOps: 200 -# CHECK: Dispatch Width: 15 +# CHECK: Dispatch Width: 8 # CHECK-NEXT: uOps Per Cycle: 4.55 # CHECK-NEXT: IPC: 4.55 # CHECK-NEXT: Block RThroughput: 0.3 @@ -519,7 +519,7 @@ add v0.16b, v0.16b, v0.16b # CHECK-NEXT: Total Cycles: 44 # CHECK-NEXT: Total uOps: 200 -# CHECK: Dispatch Width: 15 +# CHECK: Dispatch Width: 8 # CHECK-NEXT: uOps Per Cycle: 4.55 # CHECK-NEXT: IPC: 4.55 # CHECK-NEXT: Block RThroughput: 0.3 @@ -596,7 +596,7 @@ add v0.16b, v0.16b, v0.16b # CHECK-NEXT: Total Cycles: 44 # CHECK-NEXT: Total uOps: 200 -# CHECK: Dispatch Width: 15 +# CHECK: Dispatch Width: 8 # CHECK-NEXT: uOps Per Cycle: 4.55 # CHECK-NEXT: IPC: 4.55 # CHECK-NEXT: Block RThroughput: 0.3 @@ -673,7 +673,7 @@ add v0.16b, v0.16b, v0.16b # CHECK-NEXT: Total Cycles: 403 # CHECK-NEXT: Total uOps: 200 -# CHECK: Dispatch Width: 15 +# CHECK: Dispatch Width: 8 # CHECK-NEXT: uOps Per Cycle: 0.50 # CHECK-NEXT: IPC: 0.50 # CHECK-NEXT: Block RThroughput: 0.5 @@ -750,7 +750,7 @@ add v0.16b, v0.16b, v0.16b # CHECK-NEXT: Total Cycles: 1003 # CHECK-NEXT: Total uOps: 300 -# CHECK: Dispatch Width: 15 +# CHECK: Dispatch Width: 8 # CHECK-NEXT: uOps Per Cycle: 0.30 # CHECK-NEXT: IPC: 0.20 # CHECK-NEXT: Block RThroughput: 0.5 @@ -805,9 +805,9 @@ add v0.16b, v0.16b, v0.16b # CHECK-NEXT: [1,0] D==========eeeeeeeeER . . . . . ld1 { v0.b }[0], [sp] # CHECK-NEXT: [1,1] D==================eeER . . . . . add v0.16b, v0.16b, v0.16b # CHECK-NEXT: [2,0] D====================eeeeeeeeER . . . ld1 { v0.b }[0], [sp] -# CHECK-NEXT: [2,1] D============================eeER . . . add v0.16b, v0.16b, v0.16b -# CHECK-NEXT: [3,0] D==============================eeeeeeeeER . ld1 { v0.b }[0], [sp] -# CHECK-NEXT: [3,1] D======================================eeER add v0.16b, v0.16b, v0.16b +# CHECK-NEXT: [2,1] .D===========================eeER . . . add v0.16b, v0.16b, v0.16b +# CHECK-NEXT: [3,0] .D=============================eeeeeeeeER . ld1 { v0.b }[0], [sp] +# CHECK-NEXT: [3,1] .D=====================================eeER add v0.16b, v0.16b, v0.16b # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -816,6 +816,6 @@ add v0.16b, v0.16b, v0.16b # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage # CHECK: [0] [1] [2] [3] -# CHECK-NEXT: 0. 4 16.0 0.3 0.0 ld1 { v0.b }[0], [sp] -# CHECK-NEXT: 1. 4 24.0 0.0 0.0 add v0.16b, v0.16b, v0.16b -# CHECK-NEXT: 4 20.0 0.1 0.0 +# CHECK-NEXT: 0. 4 15.8 0.3 0.0 ld1 { v0.b }[0], [sp] +# CHECK-NEXT: 1. 4 23.5 0.0 0.0 add v0.16b, v0.16b, v0.16b +# CHECK-NEXT: 4 19.6 0.1 0.0 diff --git a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-forwarding.s b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-forwarding.s index a720a6bde305b..c3ccf1ceba307 100644 --- a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-forwarding.s +++ b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-forwarding.s @@ -238,7 +238,7 @@ bfmlalb z0.s, z0.h, z1.h # CHECK-NEXT: Total Cycles: 703 # CHECK-NEXT: Total uOps: 400 -# CHECK: Dispatch Width: 15 +# CHECK: Dispatch Width: 8 # CHECK-NEXT: uOps Per Cycle: 0.57 # CHECK-NEXT: IPC: 0.57 # CHECK-NEXT: Block RThroughput: 3.0 @@ -276,7 +276,7 @@ bfmlalb z0.s, z0.h, z1.h # CHECK-NEXT: Total Cycles: 703 # CHECK-NEXT: Total uOps: 400 -# CHECK: Dispatch Width: 15 +# CHECK: Dispatch Width: 8 # CHECK-NEXT: uOps Per Cycle: 0.57 # CHECK-NEXT: IPC: 0.57 # CHECK-NEXT: Block RThroughput: 3.0 @@ -314,7 +314,7 @@ bfmlalb z0.s, z0.h, z1.h # CHECK-NEXT: Total Cycles: 1703 # CHECK-NEXT: Total uOps: 600 -# CHECK: Dispatch Width: 15 +# CHECK: Dispatch Width: 8 # CHECK-NEXT: uOps Per Cycle: 0.35 # CHECK-NEXT: IPC: 0.35 # CHECK-NEXT: Block RThroughput: 1.5 @@ -331,10 +331,10 @@ bfmlalb z0.s, z0.h, z1.h # CHECK-NEXT: [0,5] D=============eeeeER. . . .. fmadd d0, d0, d1, d2 # CHECK-NEXT: [1,0] D=================eeER . . .. fadd d0, d0, d0 # CHECK-NEXT: [1,1] D===================eeeeER . .. fmadd d0, d1, d2, d0 -# CHECK-NEXT: [1,2] D=======================eeeER . .. fmul d0, d0, d0 -# CHECK-NEXT: [1,3] D========================eeeeER .. fmadd d0, d1, d2, d0 -# CHECK-NEXT: [1,4] D==========================eeeeER .. fmadd d0, d1, d2, d0 -# CHECK-NEXT: [1,5] D==============================eeeeER fmadd d0, d0, d1, d2 +# CHECK-NEXT: [1,2] .D======================eeeER . .. fmul d0, d0, d0 +# CHECK-NEXT: [1,3] .D=======================eeeeER .. fmadd d0, d1, d2, d0 +# CHECK-NEXT: [1,4] .D=========================eeeeER .. fmadd d0, d1, d2, d0 +# CHECK-NEXT: [1,5] .D=============================eeeeER fmadd d0, d0, d1, d2 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -345,11 +345,11 @@ bfmlalb z0.s, z0.h, z1.h # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 2 9.5 0.5 0.0 fadd d0, d0, d0 # CHECK-NEXT: 1. 2 11.5 0.0 0.0 fmadd d0, d1, d2, d0 -# CHECK-NEXT: 2. 2 15.5 0.0 0.0 fmul d0, d0, d0 -# CHECK-NEXT: 3. 2 16.5 0.0 0.0 fmadd d0, d1, d2, d0 -# CHECK-NEXT: 4. 2 18.5 0.0 0.0 fmadd d0, d1, d2, d0 -# CHECK-NEXT: 5. 2 22.5 0.0 0.0 fmadd d0, d0, d1, d2 -# CHECK-NEXT: 2 15.7 0.1 0.0 +# CHECK-NEXT: 2. 2 15.0 0.0 0.0 fmul d0, d0, d0 +# CHECK-NEXT: 3. 2 16.0 0.0 0.0 fmadd d0, d1, d2, d0 +# CHECK-NEXT: 4. 2 18.0 0.0 0.0 fmadd d0, d1, d2, d0 +# CHECK-NEXT: 5. 2 22.0 0.0 0.0 fmadd d0, d0, d1, d2 +# CHECK-NEXT: 2 15.3 0.1 0.0 # CHECK: [3] Code Region - saba @@ -358,7 +358,7 @@ bfmlalb z0.s, z0.h, z1.h # CHECK-NEXT: Total Cycles: 1303 # CHECK-NEXT: Total uOps: 400 -# CHECK: Dispatch Width: 15 +# CHECK: Dispatch Width: 8 # CHECK-NEXT: uOps Per Cycle: 0.31 # CHECK-NEXT: IPC: 0.31 # CHECK-NEXT: Block RThroughput: 1.5 @@ -396,7 +396,7 @@ bfmlalb z0.s, z0.h, z1.h # CHECK-NEXT: Total Cycles: 1303 # CHECK-NEXT: Total uOps: 400 -# CHECK: Dispatch Width: 15 +# CHECK: Dispatch Width: 8 # CHECK-NEXT: uOps Per Cycle: 0.31 # CHECK-NEXT: IPC: 0.31 # CHECK-NEXT: Block RThroughput: 1.5 @@ -434,7 +434,7 @@ bfmlalb z0.s, z0.h, z1.h # CHECK-NEXT: Total Cycles: 1103 # CHECK-NEXT: Total uOps: 400 -# CHECK: Dispatch Width: 15 +# CHECK: Dispatch Width: 8 # CHECK-NEXT: uOps Per Cycle: 0.36 # CHECK-NEXT: IPC: 0.36 # CHECK-NEXT: Block RThroughput: 0.8 @@ -472,7 +472,7 @@ bfmlalb z0.s, z0.h, z1.h # CHECK-NEXT: Total Cycles: 1103 # CHECK-NEXT: Total uOps: 400 -# CHECK: Dispatch Width: 15 +# CHECK: Dispatch Width: 8 # CHECK-NEXT: uOps Per Cycle: 0.36 # CHECK-NEXT: IPC: 0.36 # CHECK-NEXT: Block RThroughput: 0.8 @@ -510,7 +510,7 @@ bfmlalb z0.s, z0.h, z1.h # CHECK-NEXT: Total Cycles: 1303 # CHECK-NEXT: Total uOps: 400 -# CHECK: Dispatch Width: 15 +# CHECK: Dispatch Width: 8 # CHECK-NEXT: uOps Per Cycle: 0.31 # CHECK-NEXT: IPC: 0.31 # CHECK-NEXT: Block RThroughput: 2.0 @@ -548,7 +548,7 @@ bfmlalb z0.s, z0.h, z1.h # CHECK-NEXT: Total Cycles: 1303 # CHECK-NEXT: Total uOps: 400 -# CHECK: Dispatch Width: 15 +# CHECK: Dispatch Width: 8 # CHECK-NEXT: uOps Per Cycle: 0.31 # CHECK-NEXT: IPC: 0.31 # CHECK-NEXT: Block RThroughput: 2.0 @@ -586,7 +586,7 @@ bfmlalb z0.s, z0.h, z1.h # CHECK-NEXT: Total Cycles: 1303 # CHECK-NEXT: Total uOps: 400 -# CHECK: Dispatch Width: 15 +# CHECK: Dispatch Width: 8 # CHECK-NEXT: uOps Per Cycle: 0.31 # CHECK-NEXT: IPC: 0.31 # CHECK-NEXT: Block RThroughput: 1.5 @@ -624,7 +624,7 @@ bfmlalb z0.s, z0.h, z1.h # CHECK-NEXT: Total Cycles: 1303 # CHECK-NEXT: Total uOps: 400 -# CHECK: Dispatch Width: 15 +# CHECK: Dispatch Width: 8 # CHECK-NEXT: uOps Per Cycle: 0.31 # CHECK-NEXT: IPC: 0.31 # CHECK-NEXT: Block RThroughput: 1.0 @@ -662,7 +662,7 @@ bfmlalb z0.s, z0.h, z1.h # CHECK-NEXT: Total Cycles: 1703 # CHECK-NEXT: Total uOps: 600 -# CHECK: Dispatch Width: 15 +# CHECK: Dispatch Width: 8 # CHECK-NEXT: uOps Per Cycle: 0.35 # CHECK-NEXT: IPC: 0.35 # CHECK-NEXT: Block RThroughput: 1.5 @@ -679,10 +679,10 @@ bfmlalb z0.s, z0.h, z1.h # CHECK-NEXT: [0,5] D=============eeeeER. . . .. fmla v0.2d, v0.2d, v1.2d # CHECK-NEXT: [1,0] D=================eeeER . . .. fmul v0.2d, v0.2d, v0.2d # CHECK-NEXT: [1,1] D==================eeeeER. . .. fmla v0.2d, v1.2d, v2.2d -# CHECK-NEXT: [1,2] D======================eeER . .. fadd v0.2d, v0.2d, v0.2d -# CHECK-NEXT: [1,3] D========================eeeeER .. fmla v0.2d, v1.2d, v2.2d -# CHECK-NEXT: [1,4] D==========================eeeeER .. fmla v0.2d, v1.2d, v2.2d -# CHECK-NEXT: [1,5] D==============================eeeeER fmla v0.2d, v0.2d, v1.2d +# CHECK-NEXT: [1,2] .D=====================eeER . .. fadd v0.2d, v0.2d, v0.2d +# CHECK-NEXT: [1,3] .D=======================eeeeER .. fmla v0.2d, v1.2d, v2.2d +# CHECK-NEXT: [1,4] .D=========================eeeeER .. fmla v0.2d, v1.2d, v2.2d +# CHECK-NEXT: [1,5] .D=============================eeeeER fmla v0.2d, v0.2d, v1.2d # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -693,11 +693,11 @@ bfmlalb z0.s, z0.h, z1.h # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 2 9.5 0.5 0.0 fmul v0.2d, v0.2d, v0.2d # CHECK-NEXT: 1. 2 10.5 0.0 0.0 fmla v0.2d, v1.2d, v2.2d -# CHECK-NEXT: 2. 2 14.5 0.0 0.0 fadd v0.2d, v0.2d, v0.2d -# CHECK-NEXT: 3. 2 16.5 0.0 0.0 fmla v0.2d, v1.2d, v2.2d -# CHECK-NEXT: 4. 2 18.5 0.0 0.0 fmla v0.2d, v1.2d, v2.2d -# CHECK-NEXT: 5. 2 22.5 0.0 0.0 fmla v0.2d, v0.2d, v1.2d -# CHECK-NEXT: 2 15.3 0.1 0.0 +# CHECK-NEXT: 2. 2 14.0 0.0 0.0 fadd v0.2d, v0.2d, v0.2d +# CHECK-NEXT: 3. 2 16.0 0.0 0.0 fmla v0.2d, v1.2d, v2.2d +# CHECK-NEXT: 4. 2 18.0 0.0 0.0 fmla v0.2d, v1.2d, v2.2d +# CHECK-NEXT: 5. 2 22.0 0.0 0.0 fmla v0.2d, v0.2d, v1.2d +# CHECK-NEXT: 2 15.0 0.1 0.0 # CHECK: [12] Code Region - fmlal @@ -706,7 +706,7 @@ bfmlalb z0.s, z0.h, z1.h # CHECK-NEXT: Total Cycles: 2203 # CHECK-NEXT: Total uOps: 600 -# CHECK: Dispatch Width: 15 +# CHECK: Dispatch Width: 8 # CHECK-NEXT: uOps Per Cycle: 0.27 # CHECK-NEXT: IPC: 0.27 # CHECK-NEXT: Block RThroughput: 1.5 @@ -723,10 +723,10 @@ bfmlalb z0.s, z0.h, z1.h # CHECK-NEXT: [0,5] D=================eeeeeER. . . . .. fmlal v0.4s, v0.4h, v1.4h # CHECK-NEXT: [1,0] D======================eeeER . . . .. fmul v0.2d, v0.2d, v0.2d # CHECK-NEXT: [1,1] D=========================eeeeeER . . .. fmlal v0.4s, v1.4h, v2.4h -# CHECK-NEXT: [1,2] D==============================eeER. . .. fadd v0.2d, v0.2d, v0.2d -# CHECK-NEXT: [1,3] D================================eeeeeER. .. fmlal v0.4s, v1.4h, v2.4h -# CHECK-NEXT: [1,4] D==================================eeeeeER .. fmlal v0.4s, v1.4h, v2.4h -# CHECK-NEXT: [1,5] D=======================================eeeeeER fmlal v0.4s, v0.4h, v1.4h +# CHECK-NEXT: [1,2] .D=============================eeER. . .. fadd v0.2d, v0.2d, v0.2d +# CHECK-NEXT: [1,3] .D===============================eeeeeER. .. fmlal v0.4s, v1.4h, v2.4h +# CHECK-NEXT: [1,4] .D=================================eeeeeER .. fmlal v0.4s, v1.4h, v2.4h +# CHECK-NEXT: [1,5] .D======================================eeeeeER fmlal v0.4s, v0.4h, v1.4h # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -737,11 +737,11 @@ bfmlalb z0.s, z0.h, z1.h # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 2 12.0 0.5 0.0 fmul v0.2d, v0.2d, v0.2d # CHECK-NEXT: 1. 2 15.0 0.0 0.0 fmlal v0.4s, v1.4h, v2.4h -# CHECK-NEXT: 2. 2 20.0 0.0 0.0 fadd v0.2d, v0.2d, v0.2d -# CHECK-NEXT: 3. 2 22.0 0.0 0.0 fmlal v0.4s, v1.4h, v2.4h -# CHECK-NEXT: 4. 2 24.0 0.0 0.0 fmlal v0.4s, v1.4h, v2.4h -# CHECK-NEXT: 5. 2 29.0 0.0 0.0 fmlal v0.4s, v0.4h, v1.4h -# CHECK-NEXT: 2 20.3 0.1 0.0 +# CHECK-NEXT: 2. 2 19.5 0.0 0.0 fadd v0.2d, v0.2d, v0.2d +# CHECK-NEXT: 3. 2 21.5 0.0 0.0 fmlal v0.4s, v1.4h, v2.4h +# CHECK-NEXT: 4. 2 23.5 0.0 0.0 fmlal v0.4s, v1.4h, v2.4h +# CHECK-NEXT: 5. 2 28.5 0.0 0.0 fmlal v0.4s, v0.4h, v1.4h +# CHECK-NEXT: 2 20.0 0.1 0.0 # CHECK: [13] Code Region - bfdot @@ -750,7 +750,7 @@ bfmlalb z0.s, z0.h, z1.h # CHECK-NEXT: Total Cycles: 1303 # CHECK-NEXT: Total uOps: 400 -# CHECK: Dispatch Width: 15 +# CHECK: Dispatch Width: 8 # CHECK-NEXT: uOps Per Cycle: 0.31 # CHECK-NEXT: IPC: 0.31 # CHECK-NEXT: Block RThroughput: 1.0 @@ -788,7 +788,7 @@ bfmlalb z0.s, z0.h, z1.h # CHECK-NEXT: Total Cycles: 1603 # CHECK-NEXT: Total uOps: 400 -# CHECK: Dispatch Width: 15 +# CHECK: Dispatch Width: 8 # CHECK-NEXT: uOps Per Cycle: 0.25 # CHECK-NEXT: IPC: 0.25 # CHECK-NEXT: Block RThroughput: 1.0 @@ -826,7 +826,7 @@ bfmlalb z0.s, z0.h, z1.h # CHECK-NEXT: Total Cycles: 1303 # CHECK-NEXT: Total uOps: 400 -# CHECK: Dispatch Width: 15 +# CHECK: Dispatch Width: 8 # CHECK-NEXT: uOps Per Cycle: 0.31 # CHECK-NEXT: IPC: 0.31 # CHECK-NEXT: Block RThroughput: 1.0 @@ -864,7 +864,7 @@ bfmlalb z0.s, z0.h, z1.h # CHECK-NEXT: Total Cycles: 1403 # CHECK-NEXT: Total uOps: 1100 -# CHECK: Dispatch Width: 15 +# CHECK: Dispatch Width: 8 # CHECK-NEXT: uOps Per Cycle: 0.78 # CHECK-NEXT: IPC: 0.78 # CHECK-NEXT: Block RThroughput: 10.0 @@ -881,20 +881,20 @@ bfmlalb z0.s, z0.h, z1.h # CHECK-NEXT: [0,5] D=======eeER . . . . crc32h w0, w0, w21 # CHECK-NEXT: [0,6] D========eeER . . . . crc32w w0, w0, w24 # CHECK-NEXT: [0,7] D=========eeER . . . . crc32x w0, w0, x25 -# CHECK-NEXT: [0,8] D==========eeER. . . . crc32ch w0, w0, w16 -# CHECK-NEXT: [0,9] D===========eeER . . . crc32cw w0, w0, w23 -# CHECK-NEXT: [0,10] D============eeER . . . crc32cx w0, w0, x5 -# CHECK-NEXT: [1,0] D==============eeER . . . mul w0, w0, w0 -# CHECK-NEXT: [1,1] D================eeER . . crc32cb w0, w0, w1 -# CHECK-NEXT: [1,2] D=================eeER . . crc32cb w0, w0, w1 -# CHECK-NEXT: [1,3] D===================eeER . . crc32cb w0, w0, w0 +# CHECK-NEXT: [0,8] .D=========eeER. . . . crc32ch w0, w0, w16 +# CHECK-NEXT: [0,9] .D==========eeER . . . crc32cw w0, w0, w23 +# CHECK-NEXT: [0,10] .D===========eeER . . . crc32cx w0, w0, x5 +# CHECK-NEXT: [1,0] .D=============eeER . . . mul w0, w0, w0 +# CHECK-NEXT: [1,1] .D===============eeER . . crc32cb w0, w0, w1 +# CHECK-NEXT: [1,2] .D================eeER . . crc32cb w0, w0, w1 +# CHECK-NEXT: [1,3] .D==================eeER . . crc32cb w0, w0, w0 # CHECK-NEXT: [1,4] .D===================eeER. . crc32b w0, w0, w15 -# CHECK-NEXT: [1,5] .D====================eeER . crc32h w0, w0, w21 -# CHECK-NEXT: [1,6] .D=====================eeER . crc32w w0, w0, w24 -# CHECK-NEXT: [1,7] .D======================eeER . crc32x w0, w0, x25 -# CHECK-NEXT: [1,8] .D=======================eeER . crc32ch w0, w0, w16 -# CHECK-NEXT: [1,9] .D========================eeER. crc32cw w0, w0, w23 -# CHECK-NEXT: [1,10] .D=========================eeER crc32cx w0, w0, x5 +# CHECK-NEXT: [1,5] . D===================eeER . crc32h w0, w0, w21 +# CHECK-NEXT: [1,6] . D====================eeER . crc32w w0, w0, w24 +# CHECK-NEXT: [1,7] . D=====================eeER . crc32x w0, w0, x25 +# CHECK-NEXT: [1,8] . D======================eeER . crc32ch w0, w0, w16 +# CHECK-NEXT: [1,9] . D=======================eeER. crc32cw w0, w0, w23 +# CHECK-NEXT: [1,10] . D========================eeER crc32cx w0, w0, x5 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -903,18 +903,18 @@ bfmlalb z0.s, z0.h, z1.h # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage # CHECK: [0] [1] [2] [3] -# CHECK-NEXT: 0. 2 8.0 0.5 0.0 mul w0, w0, w0 -# CHECK-NEXT: 1. 2 10.0 0.0 0.0 crc32cb w0, w0, w1 -# CHECK-NEXT: 2. 2 11.0 0.0 0.0 crc32cb w0, w0, w1 -# CHECK-NEXT: 3. 2 13.0 0.0 0.0 crc32cb w0, w0, w0 +# CHECK-NEXT: 0. 2 7.5 0.5 0.0 mul w0, w0, w0 +# CHECK-NEXT: 1. 2 9.5 0.0 0.0 crc32cb w0, w0, w1 +# CHECK-NEXT: 2. 2 10.5 0.0 0.0 crc32cb w0, w0, w1 +# CHECK-NEXT: 3. 2 12.5 0.0 0.0 crc32cb w0, w0, w0 # CHECK-NEXT: 4. 2 13.5 0.0 0.0 crc32b w0, w0, w15 -# CHECK-NEXT: 5. 2 14.5 0.0 0.0 crc32h w0, w0, w21 -# CHECK-NEXT: 6. 2 15.5 0.0 0.0 crc32w w0, w0, w24 -# CHECK-NEXT: 7. 2 16.5 0.0 0.0 crc32x w0, w0, x25 -# CHECK-NEXT: 8. 2 17.5 0.0 0.0 crc32ch w0, w0, w16 -# CHECK-NEXT: 9. 2 18.5 0.0 0.0 crc32cw w0, w0, w23 -# CHECK-NEXT: 10. 2 19.5 0.0 0.0 crc32cx w0, w0, x5 -# CHECK-NEXT: 2 14.3 0.0 0.0 +# CHECK-NEXT: 5. 2 14.0 0.0 0.0 crc32h w0, w0, w21 +# CHECK-NEXT: 6. 2 15.0 0.0 0.0 crc32w w0, w0, w24 +# CHECK-NEXT: 7. 2 16.0 0.0 0.0 crc32x w0, w0, x25 +# CHECK-NEXT: 8. 2 16.5 0.0 0.0 crc32ch w0, w0, w16 +# CHECK-NEXT: 9. 2 17.5 0.0 0.0 crc32cw w0, w0, w23 +# CHECK-NEXT: 10. 2 18.5 0.0 0.0 crc32cx w0, w0, x5 +# CHECK-NEXT: 2 13.7 0.0 0.0 # CHECK: [17] Code Region - Z sdot.s @@ -923,7 +923,7 @@ bfmlalb z0.s, z0.h, z1.h # CHECK-NEXT: Total Cycles: 1203 # CHECK-NEXT: Total uOps: 500 -# CHECK: Dispatch Width: 15 +# CHECK: Dispatch Width: 8 # CHECK-NEXT: uOps Per Cycle: 0.42 # CHECK-NEXT: IPC: 0.33 # CHECK-NEXT: Block RThroughput: 2.0 @@ -938,8 +938,8 @@ bfmlalb z0.s, z0.h, z1.h # CHECK-NEXT: [0,3] D=========eeeER. . .. sdot z0.s, z0.b, z1.b # CHECK-NEXT: [1,0] D============eeeeeER. .. mul z0.d, p0/m, z0.d, z0.d # CHECK-NEXT: [1,1] D=================eeeER .. sdot z0.s, z1.b, z2.b -# CHECK-NEXT: [1,2] D==================eeeER .. sdot z0.s, z1.b, z2.b -# CHECK-NEXT: [1,3] D=====================eeeER sdot z0.s, z0.b, z1.b +# CHECK-NEXT: [1,2] .D=================eeeER .. sdot z0.s, z1.b, z2.b +# CHECK-NEXT: [1,3] .D====================eeeER sdot z0.s, z0.b, z1.b # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -950,9 +950,9 @@ bfmlalb z0.s, z0.h, z1.h # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 2 7.0 0.5 0.0 mul z0.d, p0/m, z0.d, z0.d # CHECK-NEXT: 1. 2 12.0 0.0 0.0 sdot z0.s, z1.b, z2.b -# CHECK-NEXT: 2. 2 13.0 0.0 0.0 sdot z0.s, z1.b, z2.b -# CHECK-NEXT: 3. 2 16.0 0.0 0.0 sdot z0.s, z0.b, z1.b -# CHECK-NEXT: 2 12.0 0.1 0.0 +# CHECK-NEXT: 2. 2 12.5 0.0 0.0 sdot z0.s, z1.b, z2.b +# CHECK-NEXT: 3. 2 15.5 0.0 0.0 sdot z0.s, z0.b, z1.b +# CHECK-NEXT: 2 11.8 0.1 0.0 # CHECK: [18] Code Region - Z sudot @@ -961,7 +961,7 @@ bfmlalb z0.s, z0.h, z1.h # CHECK-NEXT: Total Cycles: 1203 # CHECK-NEXT: Total uOps: 500 -# CHECK: Dispatch Width: 15 +# CHECK: Dispatch Width: 8 # CHECK-NEXT: uOps Per Cycle: 0.42 # CHECK-NEXT: IPC: 0.33 # CHECK-NEXT: Block RThroughput: 2.0 @@ -976,8 +976,8 @@ bfmlalb z0.s, z0.h, z1.h # CHECK-NEXT: [0,3] D=========eeeER. . .. sdot z0.s, z0.b, z1.b[1] # CHECK-NEXT: [1,0] D============eeeeeER. .. mul z0.d, p0/m, z0.d, z0.d # CHECK-NEXT: [1,1] D=================eeeER .. sdot z0.s, z1.b, z2.b[1] -# CHECK-NEXT: [1,2] D==================eeeER .. sdot z0.s, z1.b, z2.b[1] -# CHECK-NEXT: [1,3] D=====================eeeER sdot z0.s, z0.b, z1.b[1] +# CHECK-NEXT: [1,2] .D=================eeeER .. sdot z0.s, z1.b, z2.b[1] +# CHECK-NEXT: [1,3] .D====================eeeER sdot z0.s, z0.b, z1.b[1] # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -988,9 +988,9 @@ bfmlalb z0.s, z0.h, z1.h # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 2 7.0 0.5 0.0 mul z0.d, p0/m, z0.d, z0.d # CHECK-NEXT: 1. 2 12.0 0.0 0.0 sdot z0.s, z1.b, z2.b[1] -# CHECK-NEXT: 2. 2 13.0 0.0 0.0 sdot z0.s, z1.b, z2.b[1] -# CHECK-NEXT: 3. 2 16.0 0.0 0.0 sdot z0.s, z0.b, z1.b[1] -# CHECK-NEXT: 2 12.0 0.1 0.0 +# CHECK-NEXT: 2. 2 12.5 0.0 0.0 sdot z0.s, z1.b, z2.b[1] +# CHECK-NEXT: 3. 2 15.5 0.0 0.0 sdot z0.s, z0.b, z1.b[1] +# CHECK-NEXT: 2 11.8 0.1 0.0 # CHECK: [19] Code Region - Z sdot.d @@ -999,7 +999,7 @@ bfmlalb z0.s, z0.h, z1.h # CHECK-NEXT: Total Cycles: 1403 # CHECK-NEXT: Total uOps: 500 -# CHECK: Dispatch Width: 15 +# CHECK: Dispatch Width: 8 # CHECK-NEXT: uOps Per Cycle: 0.36 # CHECK-NEXT: IPC: 0.29 # CHECK-NEXT: Block RThroughput: 5.0 @@ -1014,8 +1014,8 @@ bfmlalb z0.s, z0.h, z1.h # CHECK-NEXT: [0,3] D==========eeeeER . . . sdot z0.d, z0.h, z1.h # CHECK-NEXT: [1,0] D==============eeeeeER . . mul z0.d, p0/m, z0.d, z0.d # CHECK-NEXT: [1,1] D===================eeeeER . sdot z0.d, z1.h, z2.h -# CHECK-NEXT: [1,2] D====================eeeeER . sdot z0.d, z1.h, z2.h -# CHECK-NEXT: [1,3] D========================eeeeER sdot z0.d, z0.h, z1.h +# CHECK-NEXT: [1,2] .D===================eeeeER . sdot z0.d, z1.h, z2.h +# CHECK-NEXT: [1,3] .D=======================eeeeER sdot z0.d, z0.h, z1.h # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -1026,9 +1026,9 @@ bfmlalb z0.s, z0.h, z1.h # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 2 8.0 0.5 0.0 mul z0.d, p0/m, z0.d, z0.d # CHECK-NEXT: 1. 2 13.0 0.0 0.0 sdot z0.d, z1.h, z2.h -# CHECK-NEXT: 2. 2 14.0 0.0 0.0 sdot z0.d, z1.h, z2.h -# CHECK-NEXT: 3. 2 18.0 0.0 0.0 sdot z0.d, z0.h, z1.h -# CHECK-NEXT: 2 13.3 0.1 0.0 +# CHECK-NEXT: 2. 2 13.5 0.0 0.0 sdot z0.d, z1.h, z2.h +# CHECK-NEXT: 3. 2 17.5 0.0 0.0 sdot z0.d, z0.h, z1.h +# CHECK-NEXT: 2 13.0 0.1 0.0 # CHECK: [20] Code Region - Z smmla @@ -1037,7 +1037,7 @@ bfmlalb z0.s, z0.h, z1.h # CHECK-NEXT: Total Cycles: 1203 # CHECK-NEXT: Total uOps: 500 -# CHECK: Dispatch Width: 15 +# CHECK: Dispatch Width: 8 # CHECK-NEXT: uOps Per Cycle: 0.42 # CHECK-NEXT: IPC: 0.33 # CHECK-NEXT: Block RThroughput: 2.0 @@ -1052,8 +1052,8 @@ bfmlalb z0.s, z0.h, z1.h # CHECK-NEXT: [0,3] D=========eeeER. . .. smmla z0.s, z0.b, z1.b # CHECK-NEXT: [1,0] D============eeeeeER. .. mul z0.d, p0/m, z0.d, z0.d # CHECK-NEXT: [1,1] D=================eeeER .. smmla z0.s, z1.b, z2.b -# CHECK-NEXT: [1,2] D==================eeeER .. smmla z0.s, z1.b, z2.b -# CHECK-NEXT: [1,3] D=====================eeeER smmla z0.s, z0.b, z1.b +# CHECK-NEXT: [1,2] .D=================eeeER .. smmla z0.s, z1.b, z2.b +# CHECK-NEXT: [1,3] .D====================eeeER smmla z0.s, z0.b, z1.b # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -1064,9 +1064,9 @@ bfmlalb z0.s, z0.h, z1.h # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 2 7.0 0.5 0.0 mul z0.d, p0/m, z0.d, z0.d # CHECK-NEXT: 1. 2 12.0 0.0 0.0 smmla z0.s, z1.b, z2.b -# CHECK-NEXT: 2. 2 13.0 0.0 0.0 smmla z0.s, z1.b, z2.b -# CHECK-NEXT: 3. 2 16.0 0.0 0.0 smmla z0.s, z0.b, z1.b -# CHECK-NEXT: 2 12.0 0.1 0.0 +# CHECK-NEXT: 2. 2 12.5 0.0 0.0 smmla z0.s, z1.b, z2.b +# CHECK-NEXT: 3. 2 15.5 0.0 0.0 smmla z0.s, z0.b, z1.b +# CHECK-NEXT: 2 11.8 0.1 0.0 # CHECK: [21] Code Region - Z mla.d @@ -1075,7 +1075,7 @@ bfmlalb z0.s, z0.h, z1.h # CHECK-NEXT: Total Cycles: 1703 # CHECK-NEXT: Total uOps: 800 -# CHECK: Dispatch Width: 15 +# CHECK: Dispatch Width: 8 # CHECK-NEXT: uOps Per Cycle: 0.47 # CHECK-NEXT: IPC: 0.23 # CHECK-NEXT: Block RThroughput: 8.0 @@ -1088,9 +1088,9 @@ bfmlalb z0.s, z0.h, z1.h # CHECK-NEXT: [0,1] D=====eeeeeER . . . . .. mla z0.d, p0/m, z1.d, z2.d # CHECK-NEXT: [0,2] D=======eeeeeER. . . . .. mla z0.d, p0/m, z1.d, z2.d # CHECK-NEXT: [0,3] D============eeeeeER. . . .. mla z0.d, p0/m, z0.d, z1.d -# CHECK-NEXT: [1,0] D=================eeeeeER. . .. mul z0.d, p0/m, z0.d, z0.d -# CHECK-NEXT: [1,1] D======================eeeeeER. .. mla z0.d, p0/m, z1.d, z2.d -# CHECK-NEXT: [1,2] D========================eeeeeER .. mla z0.d, p0/m, z1.d, z2.d +# CHECK-NEXT: [1,0] .D================eeeeeER. . .. mul z0.d, p0/m, z0.d, z0.d +# CHECK-NEXT: [1,1] .D=====================eeeeeER. .. mla z0.d, p0/m, z1.d, z2.d +# CHECK-NEXT: [1,2] .D=======================eeeeeER .. mla z0.d, p0/m, z1.d, z2.d # CHECK-NEXT: [1,3] .D============================eeeeeER mla z0.d, p0/m, z0.d, z1.d # CHECK: Average Wait times (based on the timeline view): @@ -1100,11 +1100,11 @@ bfmlalb z0.s, z0.h, z1.h # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage # CHECK: [0] [1] [2] [3] -# CHECK-NEXT: 0. 2 9.5 0.5 0.0 mul z0.d, p0/m, z0.d, z0.d -# CHECK-NEXT: 1. 2 14.5 0.0 0.0 mla z0.d, p0/m, z1.d, z2.d -# CHECK-NEXT: 2. 2 16.5 0.0 0.0 mla z0.d, p0/m, z1.d, z2.d +# CHECK-NEXT: 0. 2 9.0 0.5 0.0 mul z0.d, p0/m, z0.d, z0.d +# CHECK-NEXT: 1. 2 14.0 0.0 0.0 mla z0.d, p0/m, z1.d, z2.d +# CHECK-NEXT: 2. 2 16.0 0.0 0.0 mla z0.d, p0/m, z1.d, z2.d # CHECK-NEXT: 3. 2 21.0 0.0 0.0 mla z0.d, p0/m, z0.d, z1.d -# CHECK-NEXT: 2 15.4 0.1 0.0 +# CHECK-NEXT: 2 15.0 0.1 0.0 # CHECK: [22] Code Region - Z mad.d @@ -1113,7 +1113,7 @@ bfmlalb z0.s, z0.h, z1.h # CHECK-NEXT: Total Cycles: 1703 # CHECK-NEXT: Total uOps: 800 -# CHECK: Dispatch Width: 15 +# CHECK: Dispatch Width: 8 # CHECK-NEXT: uOps Per Cycle: 0.47 # CHECK-NEXT: IPC: 0.23 # CHECK-NEXT: Block RThroughput: 8.0 @@ -1126,9 +1126,9 @@ bfmlalb z0.s, z0.h, z1.h # CHECK-NEXT: [0,1] D=====eeeeeER . . . . .. mad z0.d, p0/m, z1.d, z2.d # CHECK-NEXT: [0,2] D=======eeeeeER. . . . .. mad z0.d, p0/m, z1.d, z2.d # CHECK-NEXT: [0,3] D============eeeeeER. . . .. mad z0.d, p0/m, z0.d, z1.d -# CHECK-NEXT: [1,0] D=================eeeeeER. . .. mul z0.d, p0/m, z0.d, z0.d -# CHECK-NEXT: [1,1] D======================eeeeeER. .. mad z0.d, p0/m, z1.d, z2.d -# CHECK-NEXT: [1,2] D========================eeeeeER .. mad z0.d, p0/m, z1.d, z2.d +# CHECK-NEXT: [1,0] .D================eeeeeER. . .. mul z0.d, p0/m, z0.d, z0.d +# CHECK-NEXT: [1,1] .D=====================eeeeeER. .. mad z0.d, p0/m, z1.d, z2.d +# CHECK-NEXT: [1,2] .D=======================eeeeeER .. mad z0.d, p0/m, z1.d, z2.d # CHECK-NEXT: [1,3] .D============================eeeeeER mad z0.d, p0/m, z0.d, z1.d # CHECK: Average Wait times (based on the timeline view): @@ -1138,11 +1138,11 @@ bfmlalb z0.s, z0.h, z1.h # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage # CHECK: [0] [1] [2] [3] -# CHECK-NEXT: 0. 2 9.5 0.5 0.0 mul z0.d, p0/m, z0.d, z0.d -# CHECK-NEXT: 1. 2 14.5 0.0 0.0 mad z0.d, p0/m, z1.d, z2.d -# CHECK-NEXT: 2. 2 16.5 0.0 0.0 mad z0.d, p0/m, z1.d, z2.d +# CHECK-NEXT: 0. 2 9.0 0.5 0.0 mul z0.d, p0/m, z0.d, z0.d +# CHECK-NEXT: 1. 2 14.0 0.0 0.0 mad z0.d, p0/m, z1.d, z2.d +# CHECK-NEXT: 2. 2 16.0 0.0 0.0 mad z0.d, p0/m, z1.d, z2.d # CHECK-NEXT: 3. 2 21.0 0.0 0.0 mad z0.d, p0/m, z0.d, z1.d -# CHECK-NEXT: 2 15.4 0.1 0.0 +# CHECK-NEXT: 2 15.0 0.1 0.0 # CHECK: [23] Code Region - Z msb.d @@ -1151,7 +1151,7 @@ bfmlalb z0.s, z0.h, z1.h # CHECK-NEXT: Total Cycles: 1703 # CHECK-NEXT: Total uOps: 800 -# CHECK: Dispatch Width: 15 +# CHECK: Dispatch Width: 8 # CHECK-NEXT: uOps Per Cycle: 0.47 # CHECK-NEXT: IPC: 0.23 # CHECK-NEXT: Block RThroughput: 8.0 @@ -1164,9 +1164,9 @@ bfmlalb z0.s, z0.h, z1.h # CHECK-NEXT: [0,1] D=====eeeeeER . . . . .. msb z0.d, p0/m, z1.d, z2.d # CHECK-NEXT: [0,2] D=======eeeeeER. . . . .. msb z0.d, p0/m, z1.d, z2.d # CHECK-NEXT: [0,3] D============eeeeeER. . . .. msb z0.d, p0/m, z0.d, z1.d -# CHECK-NEXT: [1,0] D=================eeeeeER. . .. mul z0.d, p0/m, z0.d, z0.d -# CHECK-NEXT: [1,1] D======================eeeeeER. .. msb z0.d, p0/m, z1.d, z2.d -# CHECK-NEXT: [1,2] D========================eeeeeER .. msb z0.d, p0/m, z1.d, z2.d +# CHECK-NEXT: [1,0] .D================eeeeeER. . .. mul z0.d, p0/m, z0.d, z0.d +# CHECK-NEXT: [1,1] .D=====================eeeeeER. .. msb z0.d, p0/m, z1.d, z2.d +# CHECK-NEXT: [1,2] .D=======================eeeeeER .. msb z0.d, p0/m, z1.d, z2.d # CHECK-NEXT: [1,3] .D============================eeeeeER msb z0.d, p0/m, z0.d, z1.d # CHECK: Average Wait times (based on the timeline view): @@ -1176,11 +1176,11 @@ bfmlalb z0.s, z0.h, z1.h # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage # CHECK: [0] [1] [2] [3] -# CHECK-NEXT: 0. 2 9.5 0.5 0.0 mul z0.d, p0/m, z0.d, z0.d -# CHECK-NEXT: 1. 2 14.5 0.0 0.0 msb z0.d, p0/m, z1.d, z2.d -# CHECK-NEXT: 2. 2 16.5 0.0 0.0 msb z0.d, p0/m, z1.d, z2.d +# CHECK-NEXT: 0. 2 9.0 0.5 0.0 mul z0.d, p0/m, z0.d, z0.d +# CHECK-NEXT: 1. 2 14.0 0.0 0.0 msb z0.d, p0/m, z1.d, z2.d +# CHECK-NEXT: 2. 2 16.0 0.0 0.0 msb z0.d, p0/m, z1.d, z2.d # CHECK-NEXT: 3. 2 21.0 0.0 0.0 msb z0.d, p0/m, z0.d, z1.d -# CHECK-NEXT: 2 15.4 0.1 0.0 +# CHECK-NEXT: 2 15.0 0.1 0.0 # CHECK: [24] Code Region - Z fcmla ZPmZZ @@ -1189,7 +1189,7 @@ bfmlalb z0.s, z0.h, z1.h # CHECK-NEXT: Total Cycles: 1503 # CHECK-NEXT: Total uOps: 400 -# CHECK: Dispatch Width: 15 +# CHECK: Dispatch Width: 8 # CHECK-NEXT: uOps Per Cycle: 0.27 # CHECK-NEXT: IPC: 0.27 # CHECK-NEXT: Block RThroughput: 2.0 @@ -1227,7 +1227,7 @@ bfmlalb z0.s, z0.h, z1.h # CHECK-NEXT: Total Cycles: 1503 # CHECK-NEXT: Total uOps: 400 -# CHECK: Dispatch Width: 15 +# CHECK: Dispatch Width: 8 # CHECK-NEXT: uOps Per Cycle: 0.27 # CHECK-NEXT: IPC: 0.27 # CHECK-NEXT: Block RThroughput: 2.0 @@ -1265,7 +1265,7 @@ bfmlalb z0.s, z0.h, z1.h # CHECK-NEXT: Total Cycles: 1303 # CHECK-NEXT: Total uOps: 400 -# CHECK: Dispatch Width: 15 +# CHECK: Dispatch Width: 8 # CHECK-NEXT: uOps Per Cycle: 0.31 # CHECK-NEXT: IPC: 0.31 # CHECK-NEXT: Block RThroughput: 2.0 @@ -1303,7 +1303,7 @@ bfmlalb z0.s, z0.h, z1.h # CHECK-NEXT: Total Cycles: 1303 # CHECK-NEXT: Total uOps: 400 -# CHECK: Dispatch Width: 15 +# CHECK: Dispatch Width: 8 # CHECK-NEXT: uOps Per Cycle: 0.31 # CHECK-NEXT: IPC: 0.31 # CHECK-NEXT: Block RThroughput: 2.0 @@ -1341,7 +1341,7 @@ bfmlalb z0.s, z0.h, z1.h # CHECK-NEXT: Total Cycles: 1303 # CHECK-NEXT: Total uOps: 400 -# CHECK: Dispatch Width: 15 +# CHECK: Dispatch Width: 8 # CHECK-NEXT: uOps Per Cycle: 0.31 # CHECK-NEXT: IPC: 0.31 # CHECK-NEXT: Block RThroughput: 2.0 @@ -1379,7 +1379,7 @@ bfmlalb z0.s, z0.h, z1.h # CHECK-NEXT: Total Cycles: 1603 # CHECK-NEXT: Total uOps: 400 -# CHECK: Dispatch Width: 15 +# CHECK: Dispatch Width: 8 # CHECK-NEXT: uOps Per Cycle: 0.25 # CHECK-NEXT: IPC: 0.25 # CHECK-NEXT: Block RThroughput: 2.0 @@ -1417,7 +1417,7 @@ bfmlalb z0.s, z0.h, z1.h # CHECK-NEXT: Total Cycles: 1503 # CHECK-NEXT: Total uOps: 400 -# CHECK: Dispatch Width: 15 +# CHECK: Dispatch Width: 8 # CHECK-NEXT: uOps Per Cycle: 0.27 # CHECK-NEXT: IPC: 0.27 # CHECK-NEXT: Block RThroughput: 2.0 diff --git a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-misc-instructions.s b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-misc-instructions.s index 4f48de8b42926..2af85a87c51af 100644 --- a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-misc-instructions.s +++ b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-misc-instructions.s @@ -30,21 +30,21 @@ sysl x16, #5, c11, c8, #5 # CHECK-NEXT: [6]: HasSideEffects (U) # CHECK: [1] [2] [3] [4] [5] [6] Instructions: -# CHECK-NEXT: 1 1 0.07 U at s12e1r, x28 -# CHECK-NEXT: 1 1 0.07 U brk #0x8415 -# CHECK-NEXT: 1 1 0.07 * * U clrex -# CHECK-NEXT: 1 1 0.07 * * U csdb -# CHECK-NEXT: 1 1 0.07 U dcps1 -# CHECK-NEXT: 1 1 0.07 U dcps2 -# CHECK-NEXT: 1 1 0.07 U dcps3 -# CHECK-NEXT: 1 1 0.07 * * U dmb sy -# CHECK-NEXT: 1 1 0.07 U hlt #0x7a67 -# CHECK-NEXT: 1 1 0.07 U hvc #0xecb9 -# CHECK-NEXT: 1 1 0.07 * * U isb -# CHECK-NEXT: 1 1 0.07 * * U pssbb -# CHECK-NEXT: 1 1 0.07 U smc #0x7e57 -# CHECK-NEXT: 1 1 0.07 U svc #0x89cb -# CHECK-NEXT: 1 1 0.07 U sysl x16, #5, c11, c8, #5 +# CHECK-NEXT: 1 1 0.12 U at s12e1r, x28 +# CHECK-NEXT: 1 1 0.12 U brk #0x8415 +# CHECK-NEXT: 1 1 0.12 * * U clrex +# CHECK-NEXT: 1 1 0.12 * * U csdb +# CHECK-NEXT: 1 1 0.12 U dcps1 +# CHECK-NEXT: 1 1 0.12 U dcps2 +# CHECK-NEXT: 1 1 0.12 U dcps3 +# CHECK-NEXT: 1 1 0.12 * * U dmb sy +# CHECK-NEXT: 1 1 0.12 U hlt #0x7a67 +# CHECK-NEXT: 1 1 0.12 U hvc #0xecb9 +# CHECK-NEXT: 1 1 0.12 * * U isb +# CHECK-NEXT: 1 1 0.12 * * U pssbb +# CHECK-NEXT: 1 1 0.12 U smc #0x7e57 +# CHECK-NEXT: 1 1 0.12 U svc #0x89cb +# CHECK-NEXT: 1 1 0.12 U sysl x16, #5, c11, c8, #5 # CHECK: Resources: # CHECK-NEXT: [0.0] - V1UnitB diff --git a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-sve-instructions.s b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-sve-instructions.s index d855ba06ec992..3c0f0b3ddcb15 100644 --- a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-sve-instructions.s +++ b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-sve-instructions.s @@ -3991,19 +3991,19 @@ zip2 z31.s, z31.s, z31.s # CHECK-NEXT: 2 2 2.00 2 V1UnitI[2],V1UnitM[2],V1UnitM0[2] ANDS_PPzPP movs p0.b, p0/z, p0.b # CHECK-NEXT: 2 2 2.00 2 V1UnitI[2],V1UnitM[2],V1UnitM0[2] ORRS_PPzPP movs p15.b, p15.b # CHECK-NEXT: 2 2 2.00 2 V1UnitI[2],V1UnitM[2],V1UnitM0[2] ANDS_PPzPP movs p15.b, p15/z, p15.b -# CHECK-NEXT: 1 1 0.07 U 1 MRS mrs x3, ID_AA64ZFR0_EL1 -# CHECK-NEXT: 1 1 0.07 U 1 MRS mrs x3, ZCR_EL1 -# CHECK-NEXT: 1 1 0.07 U 1 MRS mrs x3, ZCR_EL12 -# CHECK-NEXT: 1 1 0.07 U 1 MRS mrs x3, ZCR_EL2 -# CHECK-NEXT: 1 1 0.07 U 1 MRS mrs x3, ZCR_EL3 -# CHECK-NEXT: 1 1 0.07 U 1 MSR msr ZCR_EL1, x3 +# CHECK-NEXT: 1 1 0.12 U 1 MRS mrs x3, ID_AA64ZFR0_EL1 +# CHECK-NEXT: 1 1 0.12 U 1 MRS mrs x3, ZCR_EL1 +# CHECK-NEXT: 1 1 0.12 U 1 MRS mrs x3, ZCR_EL12 +# CHECK-NEXT: 1 1 0.12 U 1 MRS mrs x3, ZCR_EL2 +# CHECK-NEXT: 1 1 0.12 U 1 MRS mrs x3, ZCR_EL3 +# CHECK-NEXT: 1 1 0.12 U 1 MSR msr ZCR_EL1, x3 # CHECK-NEXT: 2 5 2.00 2 V1UnitV[2],V1UnitV0[2],V1UnitV01[2],V1UnitV02[2] MSB_ZPmZZ_D msb z0.d, p0/m, z0.d, z0.d # CHECK-NEXT: 1 4 1.00 4 V1UnitV,V1UnitV0,V1UnitV01,V1UnitV02 MSB_ZPmZZ_B msb z18.b, p1/m, z27.b, z0.b # CHECK-NEXT: 1 4 1.00 4 V1UnitV,V1UnitV0,V1UnitV01,V1UnitV02 MSB_ZPmZZ_H msb z27.h, p5/m, z23.h, z1.h # CHECK-NEXT: 1 4 1.00 4 V1UnitV,V1UnitV0,V1UnitV01,V1UnitV02 MSB_ZPmZZ_S msb z26.s, p2/m, z0.s, z2.s -# CHECK-NEXT: 1 1 0.07 U 1 MSR msr ZCR_EL12, x3 -# CHECK-NEXT: 1 1 0.07 U 1 MSR msr ZCR_EL2, x3 -# CHECK-NEXT: 1 1 0.07 U 1 MSR msr ZCR_EL3, x3 +# CHECK-NEXT: 1 1 0.12 U 1 MSR msr ZCR_EL12, x3 +# CHECK-NEXT: 1 1 0.12 U 1 MSR msr ZCR_EL2, x3 +# CHECK-NEXT: 1 1 0.12 U 1 MSR msr ZCR_EL3, x3 # CHECK-NEXT: 1 4 1.00 4 V1UnitV,V1UnitV0,V1UnitV01,V1UnitV02 MUL_ZPmZ_B mul z0.b, p7/m, z0.b, z31.b # CHECK-NEXT: 2 5 2.00 5 V1UnitV[2],V1UnitV0[2],V1UnitV01[2],V1UnitV02[2] MUL_ZPmZ_D mul z0.d, p7/m, z0.d, z31.d # CHECK-NEXT: 1 4 1.00 4 V1UnitV,V1UnitV0,V1UnitV01,V1UnitV02 MUL_ZPmZ_H mul z0.h, p7/m, z0.h, z31.h diff --git a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-writeback.s b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-writeback.s index 264ad8bccc58e..1961b24ae6aac 100644 --- a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-writeback.s +++ b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-writeback.s @@ -1165,7 +1165,7 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 508 # CHECK-NEXT: Total uOps: 1500 -# CHECK: Dispatch Width: 15 +# CHECK: Dispatch Width: 8 # CHECK-NEXT: uOps Per Cycle: 2.95 # CHECK-NEXT: IPC: 1.97 # CHECK-NEXT: Block RThroughput: 2.5 @@ -1179,11 +1179,11 @@ add x0, x27, 1 # CHECK-NEXT: [0,2] D=eeeeeeER. . ld1 { v1.2d }, [x27], #16 # CHECK-NEXT: [0,3] D==eE----R. . add x0, x27, #1 # CHECK-NEXT: [0,4] D==eeeeeeER . ld1 { v1.2s }, [x27], #8 -# CHECK-NEXT: [0,5] D===eE----R . add x0, x27, #1 -# CHECK-NEXT: [0,6] D===eeeeeeER. ld1 { v1.4h }, [x27], #8 -# CHECK-NEXT: [0,7] D====eE----R. add x0, x27, #1 -# CHECK-NEXT: [0,8] D====eeeeeeER ld1 { v1.4s }, [x27], #16 -# CHECK-NEXT: [0,9] D=====eE----R add x0, x27, #1 +# CHECK-NEXT: [0,5] .D==eE----R . add x0, x27, #1 +# CHECK-NEXT: [0,6] .D==eeeeeeER. ld1 { v1.4h }, [x27], #8 +# CHECK-NEXT: [0,7] .D===eE----R. add x0, x27, #1 +# CHECK-NEXT: [0,8] .D===eeeeeeER ld1 { v1.4s }, [x27], #16 +# CHECK-NEXT: [0,9] .D====eE----R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -1197,12 +1197,12 @@ add x0, x27, 1 # CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld1 { v1.2d }, [x27], #16 # CHECK-NEXT: 3. 1 3.0 0.0 4.0 add x0, x27, #1 # CHECK-NEXT: 4. 1 3.0 0.0 0.0 ld1 { v1.2s }, [x27], #8 -# CHECK-NEXT: 5. 1 4.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 4.0 0.0 0.0 ld1 { v1.4h }, [x27], #8 -# CHECK-NEXT: 7. 1 5.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 5.0 0.0 0.0 ld1 { v1.4s }, [x27], #16 -# CHECK-NEXT: 9. 1 6.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 1 3.5 0.1 2.0 +# CHECK-NEXT: 5. 1 3.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ld1 { v1.4h }, [x27], #8 +# CHECK-NEXT: 7. 1 4.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 4.0 0.0 0.0 ld1 { v1.4s }, [x27], #16 +# CHECK-NEXT: 9. 1 5.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 1 3.0 0.1 2.0 # CHECK: [1] Code Region - G02 @@ -1211,7 +1211,7 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 508 # CHECK-NEXT: Total uOps: 1500 -# CHECK: Dispatch Width: 15 +# CHECK: Dispatch Width: 8 # CHECK-NEXT: uOps Per Cycle: 2.95 # CHECK-NEXT: IPC: 1.97 # CHECK-NEXT: Block RThroughput: 2.5 @@ -1225,11 +1225,11 @@ add x0, x27, 1 # CHECK-NEXT: [0,2] D=eeeeeeER. . ld1 { v1.8h }, [x27], #16 # CHECK-NEXT: [0,3] D==eE----R. . add x0, x27, #1 # CHECK-NEXT: [0,4] D==eeeeeeER . ld1 { v1.16b }, [x27], #16 -# CHECK-NEXT: [0,5] D===eE----R . add x0, x27, #1 -# CHECK-NEXT: [0,6] D===eeeeeeER. ld1 { v1.1d }, [x27], x28 -# CHECK-NEXT: [0,7] D====eE----R. add x0, x27, #1 -# CHECK-NEXT: [0,8] D====eeeeeeER ld1 { v1.2d }, [x27], x28 -# CHECK-NEXT: [0,9] D=====eE----R add x0, x27, #1 +# CHECK-NEXT: [0,5] .D==eE----R . add x0, x27, #1 +# CHECK-NEXT: [0,6] .D==eeeeeeER. ld1 { v1.1d }, [x27], x28 +# CHECK-NEXT: [0,7] .D===eE----R. add x0, x27, #1 +# CHECK-NEXT: [0,8] .D===eeeeeeER ld1 { v1.2d }, [x27], x28 +# CHECK-NEXT: [0,9] .D====eE----R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -1243,12 +1243,12 @@ add x0, x27, 1 # CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld1 { v1.8h }, [x27], #16 # CHECK-NEXT: 3. 1 3.0 0.0 4.0 add x0, x27, #1 # CHECK-NEXT: 4. 1 3.0 0.0 0.0 ld1 { v1.16b }, [x27], #16 -# CHECK-NEXT: 5. 1 4.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 4.0 0.0 0.0 ld1 { v1.1d }, [x27], x28 -# CHECK-NEXT: 7. 1 5.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 5.0 0.0 0.0 ld1 { v1.2d }, [x27], x28 -# CHECK-NEXT: 9. 1 6.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 1 3.5 0.1 2.0 +# CHECK-NEXT: 5. 1 3.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ld1 { v1.1d }, [x27], x28 +# CHECK-NEXT: 7. 1 4.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 4.0 0.0 0.0 ld1 { v1.2d }, [x27], x28 +# CHECK-NEXT: 9. 1 5.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 1 3.0 0.1 2.0 # CHECK: [2] Code Region - G03 @@ -1257,7 +1257,7 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 508 # CHECK-NEXT: Total uOps: 1500 -# CHECK: Dispatch Width: 15 +# CHECK: Dispatch Width: 8 # CHECK-NEXT: uOps Per Cycle: 2.95 # CHECK-NEXT: IPC: 1.97 # CHECK-NEXT: Block RThroughput: 2.5 @@ -1271,11 +1271,11 @@ add x0, x27, 1 # CHECK-NEXT: [0,2] D=eeeeeeER. . ld1 { v1.4h }, [x27], x28 # CHECK-NEXT: [0,3] D==eE----R. . add x0, x27, #1 # CHECK-NEXT: [0,4] D==eeeeeeER . ld1 { v1.4s }, [x27], x28 -# CHECK-NEXT: [0,5] D===eE----R . add x0, x27, #1 -# CHECK-NEXT: [0,6] D===eeeeeeER. ld1 { v1.8b }, [x27], x28 -# CHECK-NEXT: [0,7] D====eE----R. add x0, x27, #1 -# CHECK-NEXT: [0,8] D====eeeeeeER ld1 { v1.8h }, [x27], x28 -# CHECK-NEXT: [0,9] D=====eE----R add x0, x27, #1 +# CHECK-NEXT: [0,5] .D==eE----R . add x0, x27, #1 +# CHECK-NEXT: [0,6] .D==eeeeeeER. ld1 { v1.8b }, [x27], x28 +# CHECK-NEXT: [0,7] .D===eE----R. add x0, x27, #1 +# CHECK-NEXT: [0,8] .D===eeeeeeER ld1 { v1.8h }, [x27], x28 +# CHECK-NEXT: [0,9] .D====eE----R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -1289,12 +1289,12 @@ add x0, x27, 1 # CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld1 { v1.4h }, [x27], x28 # CHECK-NEXT: 3. 1 3.0 0.0 4.0 add x0, x27, #1 # CHECK-NEXT: 4. 1 3.0 0.0 0.0 ld1 { v1.4s }, [x27], x28 -# CHECK-NEXT: 5. 1 4.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 4.0 0.0 0.0 ld1 { v1.8b }, [x27], x28 -# CHECK-NEXT: 7. 1 5.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 5.0 0.0 0.0 ld1 { v1.8h }, [x27], x28 -# CHECK-NEXT: 9. 1 6.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 1 3.5 0.1 2.0 +# CHECK-NEXT: 5. 1 3.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ld1 { v1.8b }, [x27], x28 +# CHECK-NEXT: 7. 1 4.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 4.0 0.0 0.0 ld1 { v1.8h }, [x27], x28 +# CHECK-NEXT: 9. 1 5.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 1 3.0 0.1 2.0 # CHECK: [3] Code Region - G04 @@ -1303,7 +1303,7 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 508 # CHECK-NEXT: Total uOps: 1900 -# CHECK: Dispatch Width: 15 +# CHECK: Dispatch Width: 8 # CHECK-NEXT: uOps Per Cycle: 3.74 # CHECK-NEXT: IPC: 1.97 # CHECK-NEXT: Block RThroughput: 3.0 @@ -1316,12 +1316,12 @@ add x0, x27, 1 # CHECK-NEXT: [0,1] D=eE----R . . add x0, x27, #1 # CHECK-NEXT: [0,2] D=eeeeeeER. . ld1 { v1.1d, v2.1d }, [x27], #16 # CHECK-NEXT: [0,3] D==eE----R. . add x0, x27, #1 -# CHECK-NEXT: [0,4] D==eeeeeeER . ld1 { v1.2d, v2.2d }, [x27], #32 -# CHECK-NEXT: [0,5] D===eE----R . add x0, x27, #1 -# CHECK-NEXT: [0,6] D===eeeeeeER. ld1 { v1.2s, v2.2s }, [x27], #16 -# CHECK-NEXT: [0,7] D====eE----R. add x0, x27, #1 -# CHECK-NEXT: [0,8] .D===eeeeeeER ld1 { v1.4h, v2.4h }, [x27], #16 -# CHECK-NEXT: [0,9] .D====eE----R add x0, x27, #1 +# CHECK-NEXT: [0,4] .D=eeeeeeER . ld1 { v1.2d, v2.2d }, [x27], #32 +# CHECK-NEXT: [0,5] .D==eE----R . add x0, x27, #1 +# CHECK-NEXT: [0,6] .D==eeeeeeER. ld1 { v1.2s, v2.2s }, [x27], #16 +# CHECK-NEXT: [0,7] .D===eE----R. add x0, x27, #1 +# CHECK-NEXT: [0,8] . D==eeeeeeER ld1 { v1.4h, v2.4h }, [x27], #16 +# CHECK-NEXT: [0,9] . D===eE----R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -1334,13 +1334,13 @@ add x0, x27, 1 # CHECK-NEXT: 1. 1 2.0 0.0 4.0 add x0, x27, #1 # CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld1 { v1.1d, v2.1d }, [x27], #16 # CHECK-NEXT: 3. 1 3.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ld1 { v1.2d, v2.2d }, [x27], #32 -# CHECK-NEXT: 5. 1 4.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 4.0 0.0 0.0 ld1 { v1.2s, v2.2s }, [x27], #16 -# CHECK-NEXT: 7. 1 5.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 4.0 0.0 0.0 ld1 { v1.4h, v2.4h }, [x27], #16 -# CHECK-NEXT: 9. 1 5.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 1 3.3 0.1 2.0 +# CHECK-NEXT: 4. 1 2.0 0.0 0.0 ld1 { v1.2d, v2.2d }, [x27], #32 +# CHECK-NEXT: 5. 1 3.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ld1 { v1.2s, v2.2s }, [x27], #16 +# CHECK-NEXT: 7. 1 4.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 3.0 0.0 0.0 ld1 { v1.4h, v2.4h }, [x27], #16 +# CHECK-NEXT: 9. 1 4.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 1 2.7 0.1 2.0 # CHECK: [4] Code Region - G05 @@ -1349,7 +1349,7 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 508 # CHECK-NEXT: Total uOps: 2000 -# CHECK: Dispatch Width: 15 +# CHECK: Dispatch Width: 8 # CHECK-NEXT: uOps Per Cycle: 3.94 # CHECK-NEXT: IPC: 1.97 # CHECK-NEXT: Block RThroughput: 3.3 @@ -1362,12 +1362,12 @@ add x0, x27, 1 # CHECK-NEXT: [0,1] D=eE----R . . add x0, x27, #1 # CHECK-NEXT: [0,2] D=eeeeeeER. . ld1 { v1.8b, v2.8b }, [x27], #16 # CHECK-NEXT: [0,3] D==eE----R. . add x0, x27, #1 -# CHECK-NEXT: [0,4] D==eeeeeeER . ld1 { v1.8h, v2.8h }, [x27], #32 -# CHECK-NEXT: [0,5] D===eE----R . add x0, x27, #1 -# CHECK-NEXT: [0,6] D===eeeeeeER. ld1 { v1.16b, v2.16b }, [x27], #32 +# CHECK-NEXT: [0,4] .D=eeeeeeER . ld1 { v1.8h, v2.8h }, [x27], #32 +# CHECK-NEXT: [0,5] .D==eE----R . add x0, x27, #1 +# CHECK-NEXT: [0,6] .D==eeeeeeER. ld1 { v1.16b, v2.16b }, [x27], #32 # CHECK-NEXT: [0,7] .D===eE----R. add x0, x27, #1 -# CHECK-NEXT: [0,8] .D===eeeeeeER ld1 { v1.1d, v2.1d }, [x27], x28 -# CHECK-NEXT: [0,9] .D====eE----R add x0, x27, #1 +# CHECK-NEXT: [0,8] . D==eeeeeeER ld1 { v1.1d, v2.1d }, [x27], x28 +# CHECK-NEXT: [0,9] . D===eE----R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -1380,13 +1380,13 @@ add x0, x27, 1 # CHECK-NEXT: 1. 1 2.0 0.0 4.0 add x0, x27, #1 # CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld1 { v1.8b, v2.8b }, [x27], #16 # CHECK-NEXT: 3. 1 3.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ld1 { v1.8h, v2.8h }, [x27], #32 -# CHECK-NEXT: 5. 1 4.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 4.0 0.0 0.0 ld1 { v1.16b, v2.16b }, [x27], #32 +# CHECK-NEXT: 4. 1 2.0 0.0 0.0 ld1 { v1.8h, v2.8h }, [x27], #32 +# CHECK-NEXT: 5. 1 3.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ld1 { v1.16b, v2.16b }, [x27], #32 # CHECK-NEXT: 7. 1 4.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 4.0 0.0 0.0 ld1 { v1.1d, v2.1d }, [x27], x28 -# CHECK-NEXT: 9. 1 5.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 1 3.2 0.1 2.0 +# CHECK-NEXT: 8. 1 3.0 0.0 0.0 ld1 { v1.1d, v2.1d }, [x27], x28 +# CHECK-NEXT: 9. 1 4.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 1 2.7 0.1 2.0 # CHECK: [5] Code Region - G06 @@ -1395,7 +1395,7 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 508 # CHECK-NEXT: Total uOps: 2000 -# CHECK: Dispatch Width: 15 +# CHECK: Dispatch Width: 8 # CHECK-NEXT: uOps Per Cycle: 3.94 # CHECK-NEXT: IPC: 1.97 # CHECK-NEXT: Block RThroughput: 3.3 @@ -1408,12 +1408,12 @@ add x0, x27, 1 # CHECK-NEXT: [0,1] D=eE----R . . add x0, x27, #1 # CHECK-NEXT: [0,2] D=eeeeeeER. . ld1 { v1.2s, v2.2s }, [x27], x28 # CHECK-NEXT: [0,3] D==eE----R. . add x0, x27, #1 -# CHECK-NEXT: [0,4] D==eeeeeeER . ld1 { v1.4h, v2.4h }, [x27], x28 -# CHECK-NEXT: [0,5] D===eE----R . add x0, x27, #1 -# CHECK-NEXT: [0,6] D===eeeeeeER. ld1 { v1.4s, v2.4s }, [x27], x28 +# CHECK-NEXT: [0,4] .D=eeeeeeER . ld1 { v1.4h, v2.4h }, [x27], x28 +# CHECK-NEXT: [0,5] .D==eE----R . add x0, x27, #1 +# CHECK-NEXT: [0,6] .D==eeeeeeER. ld1 { v1.4s, v2.4s }, [x27], x28 # CHECK-NEXT: [0,7] .D===eE----R. add x0, x27, #1 -# CHECK-NEXT: [0,8] .D===eeeeeeER ld1 { v1.8b, v2.8b }, [x27], x28 -# CHECK-NEXT: [0,9] .D====eE----R add x0, x27, #1 +# CHECK-NEXT: [0,8] . D==eeeeeeER ld1 { v1.8b, v2.8b }, [x27], x28 +# CHECK-NEXT: [0,9] . D===eE----R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -1426,13 +1426,13 @@ add x0, x27, 1 # CHECK-NEXT: 1. 1 2.0 0.0 4.0 add x0, x27, #1 # CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld1 { v1.2s, v2.2s }, [x27], x28 # CHECK-NEXT: 3. 1 3.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ld1 { v1.4h, v2.4h }, [x27], x28 -# CHECK-NEXT: 5. 1 4.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 4.0 0.0 0.0 ld1 { v1.4s, v2.4s }, [x27], x28 +# CHECK-NEXT: 4. 1 2.0 0.0 0.0 ld1 { v1.4h, v2.4h }, [x27], x28 +# CHECK-NEXT: 5. 1 3.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ld1 { v1.4s, v2.4s }, [x27], x28 # CHECK-NEXT: 7. 1 4.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 4.0 0.0 0.0 ld1 { v1.8b, v2.8b }, [x27], x28 -# CHECK-NEXT: 9. 1 5.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 1 3.2 0.1 2.0 +# CHECK-NEXT: 8. 1 3.0 0.0 0.0 ld1 { v1.8b, v2.8b }, [x27], x28 +# CHECK-NEXT: 9. 1 4.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 1 2.7 0.1 2.0 # CHECK: [6] Code Region - G07 @@ -1441,7 +1441,7 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 508 # CHECK-NEXT: Total uOps: 2300 -# CHECK: Dispatch Width: 15 +# CHECK: Dispatch Width: 8 # CHECK-NEXT: uOps Per Cycle: 4.53 # CHECK-NEXT: IPC: 1.97 # CHECK-NEXT: Block RThroughput: 4.3 @@ -1454,12 +1454,12 @@ add x0, x27, 1 # CHECK-NEXT: [0,1] D=eE----R . . add x0, x27, #1 # CHECK-NEXT: [0,2] D=eeeeeeER. . ld1 { v1.16b, v2.16b }, [x27], x28 # CHECK-NEXT: [0,3] D==eE----R. . add x0, x27, #1 -# CHECK-NEXT: [0,4] D==eeeeeeER . ld1 { v1.1d, v2.1d, v3.1d }, [x27], #24 -# CHECK-NEXT: [0,5] D===eE----R . add x0, x27, #1 -# CHECK-NEXT: [0,6] .D==eeeeeeER. ld1 { v1.2d, v2.2d, v3.2d }, [x27], #48 -# CHECK-NEXT: [0,7] .D===eE----R. add x0, x27, #1 -# CHECK-NEXT: [0,8] .D===eeeeeeER ld1 { v1.2s, v2.2s, v3.2s }, [x27], #24 -# CHECK-NEXT: [0,9] .D====eE----R add x0, x27, #1 +# CHECK-NEXT: [0,4] .D=eeeeeeER . ld1 { v1.1d, v2.1d, v3.1d }, [x27], #24 +# CHECK-NEXT: [0,5] .D==eE----R . add x0, x27, #1 +# CHECK-NEXT: [0,6] . D=eeeeeeER. ld1 { v1.2d, v2.2d, v3.2d }, [x27], #48 +# CHECK-NEXT: [0,7] . D==eE----R. add x0, x27, #1 +# CHECK-NEXT: [0,8] . D=eeeeeeER ld1 { v1.2s, v2.2s, v3.2s }, [x27], #24 +# CHECK-NEXT: [0,9] . D==eE----R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -1472,13 +1472,13 @@ add x0, x27, 1 # CHECK-NEXT: 1. 1 2.0 0.0 4.0 add x0, x27, #1 # CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld1 { v1.16b, v2.16b }, [x27], x28 # CHECK-NEXT: 3. 1 3.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ld1 { v1.1d, v2.1d, v3.1d }, [x27], #24 -# CHECK-NEXT: 5. 1 4.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ld1 { v1.2d, v2.2d, v3.2d }, [x27], #48 -# CHECK-NEXT: 7. 1 4.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 4.0 0.0 0.0 ld1 { v1.2s, v2.2s, v3.2s }, [x27], #24 -# CHECK-NEXT: 9. 1 5.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 1 3.1 0.1 2.0 +# CHECK-NEXT: 4. 1 2.0 0.0 0.0 ld1 { v1.1d, v2.1d, v3.1d }, [x27], #24 +# CHECK-NEXT: 5. 1 3.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 2.0 0.0 0.0 ld1 { v1.2d, v2.2d, v3.2d }, [x27], #48 +# CHECK-NEXT: 7. 1 3.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 2.0 0.0 0.0 ld1 { v1.2s, v2.2s, v3.2s }, [x27], #24 +# CHECK-NEXT: 9. 1 3.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 1 2.3 0.1 2.0 # CHECK: [7] Code Region - G08 @@ -1487,7 +1487,7 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 508 # CHECK-NEXT: Total uOps: 2500 -# CHECK: Dispatch Width: 15 +# CHECK: Dispatch Width: 8 # CHECK-NEXT: uOps Per Cycle: 4.92 # CHECK-NEXT: IPC: 1.97 # CHECK-NEXT: Block RThroughput: 5.0 @@ -1498,14 +1498,14 @@ add x0, x27, 1 # CHECK: [0,0] DeeeeeeER . . ld1 { v1.4h, v2.4h, v3.4h }, [x27], #24 # CHECK-NEXT: [0,1] D=eE----R . . add x0, x27, #1 -# CHECK-NEXT: [0,2] D=eeeeeeER. . ld1 { v1.4s, v2.4s, v3.4s }, [x27], #48 -# CHECK-NEXT: [0,3] D==eE----R. . add x0, x27, #1 -# CHECK-NEXT: [0,4] D==eeeeeeER . ld1 { v1.8b, v2.8b, v3.8b }, [x27], #24 -# CHECK-NEXT: [0,5] D===eE----R . add x0, x27, #1 -# CHECK-NEXT: [0,6] .D==eeeeeeER. ld1 { v1.8h, v2.8h, v3.8h }, [x27], #48 -# CHECK-NEXT: [0,7] .D===eE----R. add x0, x27, #1 -# CHECK-NEXT: [0,8] .D===eeeeeeER ld1 { v1.16b, v2.16b, v3.16b }, [x27], #48 -# CHECK-NEXT: [0,9] .D====eE----R add x0, x27, #1 +# CHECK-NEXT: [0,2] .DeeeeeeER. . ld1 { v1.4s, v2.4s, v3.4s }, [x27], #48 +# CHECK-NEXT: [0,3] .D=eE----R. . add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeeeeeER . ld1 { v1.8b, v2.8b, v3.8b }, [x27], #24 +# CHECK-NEXT: [0,5] . D=eE----R . add x0, x27, #1 +# CHECK-NEXT: [0,6] . DeeeeeeER. ld1 { v1.8h, v2.8h, v3.8h }, [x27], #48 +# CHECK-NEXT: [0,7] . D=eE----R. add x0, x27, #1 +# CHECK-NEXT: [0,8] . DeeeeeeER ld1 { v1.16b, v2.16b, v3.16b }, [x27], #48 +# CHECK-NEXT: [0,9] . D=eE----R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -1516,15 +1516,15 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.4h, v2.4h, v3.4h }, [x27], #24 # CHECK-NEXT: 1. 1 2.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld1 { v1.4s, v2.4s, v3.4s }, [x27], #48 -# CHECK-NEXT: 3. 1 3.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ld1 { v1.8b, v2.8b, v3.8b }, [x27], #24 -# CHECK-NEXT: 5. 1 4.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ld1 { v1.8h, v2.8h, v3.8h }, [x27], #48 -# CHECK-NEXT: 7. 1 4.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 4.0 0.0 0.0 ld1 { v1.16b, v2.16b, v3.16b }, [x27], #48 -# CHECK-NEXT: 9. 1 5.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 1 3.1 0.1 2.0 +# CHECK-NEXT: 2. 1 1.0 0.0 0.0 ld1 { v1.4s, v2.4s, v3.4s }, [x27], #48 +# CHECK-NEXT: 3. 1 2.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 0.0 0.0 ld1 { v1.8b, v2.8b, v3.8b }, [x27], #24 +# CHECK-NEXT: 5. 1 2.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 0.0 0.0 ld1 { v1.8h, v2.8h, v3.8h }, [x27], #48 +# CHECK-NEXT: 7. 1 2.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 0.0 0.0 ld1 { v1.16b, v2.16b, v3.16b }, [x27], #48 +# CHECK-NEXT: 9. 1 2.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.5 0.1 2.0 # CHECK: [8] Code Region - G09 @@ -1533,7 +1533,7 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 508 # CHECK-NEXT: Total uOps: 2500 -# CHECK: Dispatch Width: 15 +# CHECK: Dispatch Width: 8 # CHECK-NEXT: uOps Per Cycle: 4.92 # CHECK-NEXT: IPC: 1.97 # CHECK-NEXT: Block RThroughput: 5.0 @@ -1544,14 +1544,14 @@ add x0, x27, 1 # CHECK: [0,0] DeeeeeeER . . ld1 { v1.1d, v2.1d, v3.1d }, [x27], x28 # CHECK-NEXT: [0,1] D=eE----R . . add x0, x27, #1 -# CHECK-NEXT: [0,2] D=eeeeeeER. . ld1 { v1.2d, v2.2d, v3.2d }, [x27], x28 -# CHECK-NEXT: [0,3] D==eE----R. . add x0, x27, #1 -# CHECK-NEXT: [0,4] D==eeeeeeER . ld1 { v1.2s, v2.2s, v3.2s }, [x27], x28 -# CHECK-NEXT: [0,5] D===eE----R . add x0, x27, #1 -# CHECK-NEXT: [0,6] .D==eeeeeeER. ld1 { v1.4h, v2.4h, v3.4h }, [x27], x28 -# CHECK-NEXT: [0,7] .D===eE----R. add x0, x27, #1 -# CHECK-NEXT: [0,8] .D===eeeeeeER ld1 { v1.4s, v2.4s, v3.4s }, [x27], x28 -# CHECK-NEXT: [0,9] .D====eE----R add x0, x27, #1 +# CHECK-NEXT: [0,2] .DeeeeeeER. . ld1 { v1.2d, v2.2d, v3.2d }, [x27], x28 +# CHECK-NEXT: [0,3] .D=eE----R. . add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeeeeeER . ld1 { v1.2s, v2.2s, v3.2s }, [x27], x28 +# CHECK-NEXT: [0,5] . D=eE----R . add x0, x27, #1 +# CHECK-NEXT: [0,6] . DeeeeeeER. ld1 { v1.4h, v2.4h, v3.4h }, [x27], x28 +# CHECK-NEXT: [0,7] . D=eE----R. add x0, x27, #1 +# CHECK-NEXT: [0,8] . DeeeeeeER ld1 { v1.4s, v2.4s, v3.4s }, [x27], x28 +# CHECK-NEXT: [0,9] . D=eE----R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -1562,15 +1562,15 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.1d, v2.1d, v3.1d }, [x27], x28 # CHECK-NEXT: 1. 1 2.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld1 { v1.2d, v2.2d, v3.2d }, [x27], x28 -# CHECK-NEXT: 3. 1 3.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ld1 { v1.2s, v2.2s, v3.2s }, [x27], x28 -# CHECK-NEXT: 5. 1 4.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ld1 { v1.4h, v2.4h, v3.4h }, [x27], x28 -# CHECK-NEXT: 7. 1 4.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 4.0 0.0 0.0 ld1 { v1.4s, v2.4s, v3.4s }, [x27], x28 -# CHECK-NEXT: 9. 1 5.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 1 3.1 0.1 2.0 +# CHECK-NEXT: 2. 1 1.0 0.0 0.0 ld1 { v1.2d, v2.2d, v3.2d }, [x27], x28 +# CHECK-NEXT: 3. 1 2.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 0.0 0.0 ld1 { v1.2s, v2.2s, v3.2s }, [x27], x28 +# CHECK-NEXT: 5. 1 2.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 0.0 0.0 ld1 { v1.4h, v2.4h, v3.4h }, [x27], x28 +# CHECK-NEXT: 7. 1 2.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 0.0 0.0 ld1 { v1.4s, v2.4s, v3.4s }, [x27], x28 +# CHECK-NEXT: 9. 1 2.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.5 0.1 2.0 # CHECK: [9] Code Region - G10 @@ -1579,7 +1579,7 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 608 # CHECK-NEXT: Total uOps: 2500 -# CHECK: Dispatch Width: 15 +# CHECK: Dispatch Width: 8 # CHECK-NEXT: uOps Per Cycle: 4.11 # CHECK-NEXT: IPC: 1.64 # CHECK-NEXT: Block RThroughput: 5.0 @@ -1590,14 +1590,14 @@ add x0, x27, 1 # CHECK: [0,0] DeeeeeeER . . ld1 { v1.8b, v2.8b, v3.8b }, [x27], x28 # CHECK-NEXT: [0,1] D=eE----R . . add x0, x27, #1 -# CHECK-NEXT: [0,2] D=eeeeeeER. . ld1 { v1.8h, v2.8h, v3.8h }, [x27], x28 -# CHECK-NEXT: [0,3] D==eE----R. . add x0, x27, #1 -# CHECK-NEXT: [0,4] D==eeeeeeER . ld1 { v1.16b, v2.16b, v3.16b }, [x27], x28 -# CHECK-NEXT: [0,5] D===eE----R . add x0, x27, #1 -# CHECK-NEXT: [0,6] .D==eeeeeeER . ld1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32 -# CHECK-NEXT: [0,7] .D===eE----R . add x0, x27, #1 -# CHECK-NEXT: [0,8] .D===eeeeeeeER ld1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64 -# CHECK-NEXT: [0,9] .D====eE-----R add x0, x27, #1 +# CHECK-NEXT: [0,2] .DeeeeeeER. . ld1 { v1.8h, v2.8h, v3.8h }, [x27], x28 +# CHECK-NEXT: [0,3] .D=eE----R. . add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeeeeeER . ld1 { v1.16b, v2.16b, v3.16b }, [x27], x28 +# CHECK-NEXT: [0,5] . D=eE----R . add x0, x27, #1 +# CHECK-NEXT: [0,6] . D=eeeeeeER . ld1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32 +# CHECK-NEXT: [0,7] . D=eE----R . add x0, x27, #1 +# CHECK-NEXT: [0,8] . D=eeeeeeeER ld1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64 +# CHECK-NEXT: [0,9] . D==eE-----R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -1608,15 +1608,15 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.8b, v2.8b, v3.8b }, [x27], x28 # CHECK-NEXT: 1. 1 2.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld1 { v1.8h, v2.8h, v3.8h }, [x27], x28 -# CHECK-NEXT: 3. 1 3.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ld1 { v1.16b, v2.16b, v3.16b }, [x27], x28 -# CHECK-NEXT: 5. 1 4.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ld1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32 -# CHECK-NEXT: 7. 1 4.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 4.0 0.0 0.0 ld1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64 -# CHECK-NEXT: 9. 1 5.0 0.0 5.0 add x0, x27, #1 -# CHECK-NEXT: 1 3.1 0.1 2.1 +# CHECK-NEXT: 2. 1 1.0 0.0 0.0 ld1 { v1.8h, v2.8h, v3.8h }, [x27], x28 +# CHECK-NEXT: 3. 1 2.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 0.0 0.0 ld1 { v1.16b, v2.16b, v3.16b }, [x27], x28 +# CHECK-NEXT: 5. 1 2.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 2.0 0.0 0.0 ld1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32 +# CHECK-NEXT: 7. 1 2.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 2.0 0.0 0.0 ld1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64 +# CHECK-NEXT: 9. 1 3.0 0.0 5.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.8 0.1 2.1 # CHECK: [10] Code Region - G11 @@ -1625,7 +1625,7 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 509 # CHECK-NEXT: Total uOps: 2400 -# CHECK: Dispatch Width: 15 +# CHECK: Dispatch Width: 8 # CHECK-NEXT: uOps Per Cycle: 4.72 # CHECK-NEXT: IPC: 1.96 # CHECK-NEXT: Block RThroughput: 4.7 @@ -1638,12 +1638,12 @@ add x0, x27, 1 # CHECK-NEXT: [0,1] D=eE----R . . add x0, x27, #1 # CHECK-NEXT: [0,2] D=eeeeeeER. . ld1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32 # CHECK-NEXT: [0,3] D==eE----R. . add x0, x27, #1 -# CHECK-NEXT: [0,4] D==eeeeeeeER . ld1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64 -# CHECK-NEXT: [0,5] D===eE-----R . add x0, x27, #1 -# CHECK-NEXT: [0,6] .D==eeeeeeER . ld1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32 -# CHECK-NEXT: [0,7] .D===eE----R . add x0, x27, #1 -# CHECK-NEXT: [0,8] .D===eeeeeeeER ld1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64 -# CHECK-NEXT: [0,9] .D====eE-----R add x0, x27, #1 +# CHECK-NEXT: [0,4] .D=eeeeeeeER . ld1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64 +# CHECK-NEXT: [0,5] .D==eE-----R . add x0, x27, #1 +# CHECK-NEXT: [0,6] . D=eeeeeeER . ld1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32 +# CHECK-NEXT: [0,7] . D==eE----R . add x0, x27, #1 +# CHECK-NEXT: [0,8] . D=eeeeeeeER ld1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64 +# CHECK-NEXT: [0,9] . D==eE-----R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -1656,13 +1656,13 @@ add x0, x27, 1 # CHECK-NEXT: 1. 1 2.0 0.0 4.0 add x0, x27, #1 # CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32 # CHECK-NEXT: 3. 1 3.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ld1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64 -# CHECK-NEXT: 5. 1 4.0 0.0 5.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ld1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32 -# CHECK-NEXT: 7. 1 4.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 4.0 0.0 0.0 ld1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64 -# CHECK-NEXT: 9. 1 5.0 0.0 5.0 add x0, x27, #1 -# CHECK-NEXT: 1 3.1 0.1 2.2 +# CHECK-NEXT: 4. 1 2.0 0.0 0.0 ld1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64 +# CHECK-NEXT: 5. 1 3.0 0.0 5.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 2.0 0.0 0.0 ld1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32 +# CHECK-NEXT: 7. 1 3.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 2.0 0.0 0.0 ld1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64 +# CHECK-NEXT: 9. 1 3.0 0.0 5.0 add x0, x27, #1 +# CHECK-NEXT: 1 2.3 0.1 2.2 # CHECK: [11] Code Region - G12 @@ -1671,7 +1671,7 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 508 # CHECK-NEXT: Total uOps: 2400 -# CHECK: Dispatch Width: 15 +# CHECK: Dispatch Width: 8 # CHECK-NEXT: uOps Per Cycle: 4.72 # CHECK-NEXT: IPC: 1.97 # CHECK-NEXT: Block RThroughput: 4.7 @@ -1682,14 +1682,14 @@ add x0, x27, 1 # CHECK: [0,0] DeeeeeeeER. . ld1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64 # CHECK-NEXT: [0,1] D=eE-----R. . add x0, x27, #1 -# CHECK-NEXT: [0,2] D=eeeeeeER. . ld1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28 -# CHECK-NEXT: [0,3] D==eE----R. . add x0, x27, #1 -# CHECK-NEXT: [0,4] D==eeeeeeeER. ld1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28 -# CHECK-NEXT: [0,5] .D==eE-----R. add x0, x27, #1 -# CHECK-NEXT: [0,6] .D==eeeeeeER. ld1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28 -# CHECK-NEXT: [0,7] .D===eE----R. add x0, x27, #1 -# CHECK-NEXT: [0,8] .D===eeeeeeER ld1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28 -# CHECK-NEXT: [0,9] .D====eE----R add x0, x27, #1 +# CHECK-NEXT: [0,2] .DeeeeeeER. . ld1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28 +# CHECK-NEXT: [0,3] .D=eE----R. . add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeeeeeeER. ld1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28 +# CHECK-NEXT: [0,5] . D=eE-----R. add x0, x27, #1 +# CHECK-NEXT: [0,6] . DeeeeeeER. ld1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28 +# CHECK-NEXT: [0,7] . D=eE----R. add x0, x27, #1 +# CHECK-NEXT: [0,8] . D=eeeeeeER ld1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28 +# CHECK-NEXT: [0,9] . D==eE----R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -1700,15 +1700,15 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64 # CHECK-NEXT: 1. 1 2.0 0.0 5.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28 -# CHECK-NEXT: 3. 1 3.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ld1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28 -# CHECK-NEXT: 5. 1 3.0 0.0 5.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ld1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28 -# CHECK-NEXT: 7. 1 4.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 4.0 0.0 0.0 ld1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28 -# CHECK-NEXT: 9. 1 5.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 1 3.0 0.1 2.2 +# CHECK-NEXT: 2. 1 1.0 0.0 0.0 ld1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28 +# CHECK-NEXT: 3. 1 2.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 0.0 0.0 ld1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28 +# CHECK-NEXT: 5. 1 2.0 0.0 5.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 0.0 0.0 ld1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28 +# CHECK-NEXT: 7. 1 2.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 2.0 0.0 0.0 ld1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28 +# CHECK-NEXT: 9. 1 3.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.7 0.1 2.2 # CHECK: [12] Code Region - G13 @@ -1717,7 +1717,7 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 1110 # CHECK-NEXT: Total uOps: 2600 -# CHECK: Dispatch Width: 15 +# CHECK: Dispatch Width: 8 # CHECK-NEXT: uOps Per Cycle: 2.34 # CHECK-NEXT: IPC: 0.90 # CHECK-NEXT: Block RThroughput: 5.0 @@ -1728,14 +1728,14 @@ add x0, x27, 1 # CHECK: [0,0] DeeeeeeeER. . . ld1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28 # CHECK-NEXT: [0,1] D=eE-----R. . . add x0, x27, #1 -# CHECK-NEXT: [0,2] D=eeeeeeER. . . ld1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28 -# CHECK-NEXT: [0,3] D==eE----R. . . add x0, x27, #1 -# CHECK-NEXT: [0,4] D==eeeeeeeER . . ld1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28 -# CHECK-NEXT: [0,5] .D==eE-----R . . add x0, x27, #1 -# CHECK-NEXT: [0,6] .D==eeeeeeeER . . ld1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28 -# CHECK-NEXT: [0,7] .D===eE-----R . . add x0, x27, #1 -# CHECK-NEXT: [0,8] .D=========eeeeeeeeER ld1 { v1.b }[0], [x27], #1 -# CHECK-NEXT: [0,9] .D==========eE------R add x0, x27, #1 +# CHECK-NEXT: [0,2] .DeeeeeeER. . . ld1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28 +# CHECK-NEXT: [0,3] .D=eE----R. . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeeeeeeER . . ld1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28 +# CHECK-NEXT: [0,5] . D=eE-----R . . add x0, x27, #1 +# CHECK-NEXT: [0,6] . DeeeeeeeER . . ld1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28 +# CHECK-NEXT: [0,7] . D=eE-----R . . add x0, x27, #1 +# CHECK-NEXT: [0,8] . D======eeeeeeeeER ld1 { v1.b }[0], [x27], #1 +# CHECK-NEXT: [0,9] . D=======eE------R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -1746,15 +1746,15 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28 # CHECK-NEXT: 1. 1 2.0 0.0 5.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28 -# CHECK-NEXT: 3. 1 3.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ld1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28 -# CHECK-NEXT: 5. 1 3.0 0.0 5.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ld1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28 -# CHECK-NEXT: 7. 1 4.0 0.0 5.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 10.0 0.0 0.0 ld1 { v1.b }[0], [x27], #1 -# CHECK-NEXT: 9. 1 11.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 1 4.2 0.1 2.5 +# CHECK-NEXT: 2. 1 1.0 0.0 0.0 ld1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28 +# CHECK-NEXT: 3. 1 2.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 0.0 0.0 ld1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28 +# CHECK-NEXT: 5. 1 2.0 0.0 5.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 0.0 0.0 ld1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28 +# CHECK-NEXT: 7. 1 2.0 0.0 5.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 7.0 0.0 0.0 ld1 { v1.b }[0], [x27], #1 +# CHECK-NEXT: 9. 1 8.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 1 2.7 0.1 2.5 # CHECK: [13] Code Region - G14 @@ -1763,7 +1763,7 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 4003 # CHECK-NEXT: Total uOps: 2000 -# CHECK: Dispatch Width: 15 +# CHECK: Dispatch Width: 8 # CHECK-NEXT: uOps Per Cycle: 0.50 # CHECK-NEXT: IPC: 0.25 # CHECK-NEXT: Block RThroughput: 2.5 @@ -1776,12 +1776,12 @@ add x0, x27, 1 # CHECK-NEXT: [0,1] D=eE------R . . . . . . . add x0, x27, #1 # CHECK-NEXT: [0,2] D========eeeeeeeeER . . . . . . ld1 { v1.b }[0], [x27], x28 # CHECK-NEXT: [0,3] D=========eE------R . . . . . . add x0, x27, #1 -# CHECK-NEXT: [0,4] D================eeeeeeeeER . . . . ld1 { v1.b }[8], [x27], x28 -# CHECK-NEXT: [0,5] D=================eE------R . . . . add x0, x27, #1 -# CHECK-NEXT: [0,6] D========================eeeeeeeeER. . . ld1 { v1.h }[0], [x27], #2 +# CHECK-NEXT: [0,4] .D===============eeeeeeeeER . . . . ld1 { v1.b }[8], [x27], x28 +# CHECK-NEXT: [0,5] .D================eE------R . . . . add x0, x27, #1 +# CHECK-NEXT: [0,6] .D=======================eeeeeeeeER. . . ld1 { v1.h }[0], [x27], #2 # CHECK-NEXT: [0,7] .D========================eE------R. . . add x0, x27, #1 -# CHECK-NEXT: [0,8] .D===============================eeeeeeeeER ld1 { v1.h }[4], [x27], #2 -# CHECK-NEXT: [0,9] .D================================eE------R add x0, x27, #1 +# CHECK-NEXT: [0,8] . D==============================eeeeeeeeER ld1 { v1.h }[4], [x27], #2 +# CHECK-NEXT: [0,9] . D===============================eE------R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -1794,13 +1794,13 @@ add x0, x27, 1 # CHECK-NEXT: 1. 1 2.0 0.0 6.0 add x0, x27, #1 # CHECK-NEXT: 2. 1 9.0 0.0 0.0 ld1 { v1.b }[0], [x27], x28 # CHECK-NEXT: 3. 1 10.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 17.0 0.0 0.0 ld1 { v1.b }[8], [x27], x28 -# CHECK-NEXT: 5. 1 18.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 25.0 0.0 0.0 ld1 { v1.h }[0], [x27], #2 +# CHECK-NEXT: 4. 1 16.0 0.0 0.0 ld1 { v1.b }[8], [x27], x28 +# CHECK-NEXT: 5. 1 17.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 24.0 0.0 0.0 ld1 { v1.h }[0], [x27], #2 # CHECK-NEXT: 7. 1 25.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 32.0 0.0 0.0 ld1 { v1.h }[4], [x27], #2 -# CHECK-NEXT: 9. 1 33.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 1 17.2 0.1 3.0 +# CHECK-NEXT: 8. 1 31.0 0.0 0.0 ld1 { v1.h }[4], [x27], #2 +# CHECK-NEXT: 9. 1 32.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 1 16.7 0.1 3.0 # CHECK: [14] Code Region - G15 @@ -1809,7 +1809,7 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 4003 # CHECK-NEXT: Total uOps: 2000 -# CHECK: Dispatch Width: 15 +# CHECK: Dispatch Width: 8 # CHECK-NEXT: uOps Per Cycle: 0.50 # CHECK-NEXT: IPC: 0.25 # CHECK-NEXT: Block RThroughput: 2.5 @@ -1822,12 +1822,12 @@ add x0, x27, 1 # CHECK-NEXT: [0,1] D=eE------R . . . . . . . add x0, x27, #1 # CHECK-NEXT: [0,2] D========eeeeeeeeER . . . . . . ld1 { v1.h }[4], [x27], x28 # CHECK-NEXT: [0,3] D=========eE------R . . . . . . add x0, x27, #1 -# CHECK-NEXT: [0,4] D================eeeeeeeeER . . . . ld1 { v1.s }[0], [x27], #4 -# CHECK-NEXT: [0,5] D=================eE------R . . . . add x0, x27, #1 -# CHECK-NEXT: [0,6] D========================eeeeeeeeER. . . ld1 { v1.s }[0], [x27], x28 +# CHECK-NEXT: [0,4] .D===============eeeeeeeeER . . . . ld1 { v1.s }[0], [x27], #4 +# CHECK-NEXT: [0,5] .D================eE------R . . . . add x0, x27, #1 +# CHECK-NEXT: [0,6] .D=======================eeeeeeeeER. . . ld1 { v1.s }[0], [x27], x28 # CHECK-NEXT: [0,7] .D========================eE------R. . . add x0, x27, #1 -# CHECK-NEXT: [0,8] .D===============================eeeeeeeeER ld1 { v1.d }[0], [x27], #8 -# CHECK-NEXT: [0,9] .D================================eE------R add x0, x27, #1 +# CHECK-NEXT: [0,8] . D==============================eeeeeeeeER ld1 { v1.d }[0], [x27], #8 +# CHECK-NEXT: [0,9] . D===============================eE------R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -1840,13 +1840,13 @@ add x0, x27, 1 # CHECK-NEXT: 1. 1 2.0 0.0 6.0 add x0, x27, #1 # CHECK-NEXT: 2. 1 9.0 0.0 0.0 ld1 { v1.h }[4], [x27], x28 # CHECK-NEXT: 3. 1 10.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 17.0 0.0 0.0 ld1 { v1.s }[0], [x27], #4 -# CHECK-NEXT: 5. 1 18.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 25.0 0.0 0.0 ld1 { v1.s }[0], [x27], x28 +# CHECK-NEXT: 4. 1 16.0 0.0 0.0 ld1 { v1.s }[0], [x27], #4 +# CHECK-NEXT: 5. 1 17.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 24.0 0.0 0.0 ld1 { v1.s }[0], [x27], x28 # CHECK-NEXT: 7. 1 25.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 32.0 0.0 0.0 ld1 { v1.d }[0], [x27], #8 -# CHECK-NEXT: 9. 1 33.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 1 17.2 0.1 3.0 +# CHECK-NEXT: 8. 1 31.0 0.0 0.0 ld1 { v1.d }[0], [x27], #8 +# CHECK-NEXT: 9. 1 32.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 1 16.7 0.1 3.0 # CHECK: [15] Code Region - G16 @@ -1855,7 +1855,7 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 1203 # CHECK-NEXT: Total uOps: 2000 -# CHECK: Dispatch Width: 15 +# CHECK: Dispatch Width: 8 # CHECK-NEXT: uOps Per Cycle: 1.66 # CHECK-NEXT: IPC: 0.83 # CHECK-NEXT: Block RThroughput: 2.5 @@ -1868,12 +1868,12 @@ add x0, x27, 1 # CHECK-NEXT: [0,1] D=eE------R . add x0, x27, #1 # CHECK-NEXT: [0,2] D=eeeeeeeeER . ld1r { v1.1d }, [x27], #8 # CHECK-NEXT: [0,3] D==eE------R . add x0, x27, #1 -# CHECK-NEXT: [0,4] D==eeeeeeeeER . ld1r { v1.2d }, [x27], #8 -# CHECK-NEXT: [0,5] D===eE------R . add x0, x27, #1 -# CHECK-NEXT: [0,6] D===eeeeeeeeER. ld1r { v1.2s }, [x27], #4 +# CHECK-NEXT: [0,4] .D=eeeeeeeeER . ld1r { v1.2d }, [x27], #8 +# CHECK-NEXT: [0,5] .D==eE------R . add x0, x27, #1 +# CHECK-NEXT: [0,6] .D==eeeeeeeeER. ld1r { v1.2s }, [x27], #4 # CHECK-NEXT: [0,7] .D===eE------R. add x0, x27, #1 -# CHECK-NEXT: [0,8] .D===eeeeeeeeER ld1r { v1.4h }, [x27], #2 -# CHECK-NEXT: [0,9] .D====eE------R add x0, x27, #1 +# CHECK-NEXT: [0,8] . D==eeeeeeeeER ld1r { v1.4h }, [x27], #2 +# CHECK-NEXT: [0,9] . D===eE------R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -1886,13 +1886,13 @@ add x0, x27, 1 # CHECK-NEXT: 1. 1 2.0 0.0 6.0 add x0, x27, #1 # CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld1r { v1.1d }, [x27], #8 # CHECK-NEXT: 3. 1 3.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ld1r { v1.2d }, [x27], #8 -# CHECK-NEXT: 5. 1 4.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 4.0 0.0 0.0 ld1r { v1.2s }, [x27], #4 +# CHECK-NEXT: 4. 1 2.0 0.0 0.0 ld1r { v1.2d }, [x27], #8 +# CHECK-NEXT: 5. 1 3.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ld1r { v1.2s }, [x27], #4 # CHECK-NEXT: 7. 1 4.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 4.0 0.0 0.0 ld1r { v1.4h }, [x27], #2 -# CHECK-NEXT: 9. 1 5.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 1 3.2 0.1 3.0 +# CHECK-NEXT: 8. 1 3.0 0.0 0.0 ld1r { v1.4h }, [x27], #2 +# CHECK-NEXT: 9. 1 4.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 1 2.7 0.1 3.0 # CHECK: [16] Code Region - G17 @@ -1901,7 +1901,7 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 510 # CHECK-NEXT: Total uOps: 2000 -# CHECK: Dispatch Width: 15 +# CHECK: Dispatch Width: 8 # CHECK-NEXT: uOps Per Cycle: 3.92 # CHECK-NEXT: IPC: 1.96 # CHECK-NEXT: Block RThroughput: 2.5 @@ -1914,12 +1914,12 @@ add x0, x27, 1 # CHECK-NEXT: [0,1] D=eE------R . add x0, x27, #1 # CHECK-NEXT: [0,2] D=eeeeeeeeER . ld1r { v1.8b }, [x27], #1 # CHECK-NEXT: [0,3] D==eE------R . add x0, x27, #1 -# CHECK-NEXT: [0,4] D==eeeeeeeeER . ld1r { v1.8h }, [x27], #2 -# CHECK-NEXT: [0,5] D===eE------R . add x0, x27, #1 -# CHECK-NEXT: [0,6] D===eeeeeeeeER. ld1r { v1.16b }, [x27], #1 +# CHECK-NEXT: [0,4] .D=eeeeeeeeER . ld1r { v1.8h }, [x27], #2 +# CHECK-NEXT: [0,5] .D==eE------R . add x0, x27, #1 +# CHECK-NEXT: [0,6] .D==eeeeeeeeER. ld1r { v1.16b }, [x27], #1 # CHECK-NEXT: [0,7] .D===eE------R. add x0, x27, #1 -# CHECK-NEXT: [0,8] .D===eeeeeeeeER ld1r { v1.1d }, [x27], x28 -# CHECK-NEXT: [0,9] .D====eE------R add x0, x27, #1 +# CHECK-NEXT: [0,8] . D==eeeeeeeeER ld1r { v1.1d }, [x27], x28 +# CHECK-NEXT: [0,9] . D===eE------R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -1932,13 +1932,13 @@ add x0, x27, 1 # CHECK-NEXT: 1. 1 2.0 0.0 6.0 add x0, x27, #1 # CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld1r { v1.8b }, [x27], #1 # CHECK-NEXT: 3. 1 3.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ld1r { v1.8h }, [x27], #2 -# CHECK-NEXT: 5. 1 4.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 4.0 0.0 0.0 ld1r { v1.16b }, [x27], #1 +# CHECK-NEXT: 4. 1 2.0 0.0 0.0 ld1r { v1.8h }, [x27], #2 +# CHECK-NEXT: 5. 1 3.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ld1r { v1.16b }, [x27], #1 # CHECK-NEXT: 7. 1 4.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 4.0 0.0 0.0 ld1r { v1.1d }, [x27], x28 -# CHECK-NEXT: 9. 1 5.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 1 3.2 0.1 3.0 +# CHECK-NEXT: 8. 1 3.0 0.0 0.0 ld1r { v1.1d }, [x27], x28 +# CHECK-NEXT: 9. 1 4.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 1 2.7 0.1 3.0 # CHECK: [17] Code Region - G18 @@ -1947,7 +1947,7 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 510 # CHECK-NEXT: Total uOps: 2000 -# CHECK: Dispatch Width: 15 +# CHECK: Dispatch Width: 8 # CHECK-NEXT: uOps Per Cycle: 3.92 # CHECK-NEXT: IPC: 1.96 # CHECK-NEXT: Block RThroughput: 2.5 @@ -1960,12 +1960,12 @@ add x0, x27, 1 # CHECK-NEXT: [0,1] D=eE------R . add x0, x27, #1 # CHECK-NEXT: [0,2] D=eeeeeeeeER . ld1r { v1.2s }, [x27], x28 # CHECK-NEXT: [0,3] D==eE------R . add x0, x27, #1 -# CHECK-NEXT: [0,4] D==eeeeeeeeER . ld1r { v1.4h }, [x27], x28 -# CHECK-NEXT: [0,5] D===eE------R . add x0, x27, #1 -# CHECK-NEXT: [0,6] D===eeeeeeeeER. ld1r { v1.4s }, [x27], x28 +# CHECK-NEXT: [0,4] .D=eeeeeeeeER . ld1r { v1.4h }, [x27], x28 +# CHECK-NEXT: [0,5] .D==eE------R . add x0, x27, #1 +# CHECK-NEXT: [0,6] .D==eeeeeeeeER. ld1r { v1.4s }, [x27], x28 # CHECK-NEXT: [0,7] .D===eE------R. add x0, x27, #1 -# CHECK-NEXT: [0,8] .D===eeeeeeeeER ld1r { v1.8b }, [x27], x28 -# CHECK-NEXT: [0,9] .D====eE------R add x0, x27, #1 +# CHECK-NEXT: [0,8] . D==eeeeeeeeER ld1r { v1.8b }, [x27], x28 +# CHECK-NEXT: [0,9] . D===eE------R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -1978,13 +1978,13 @@ add x0, x27, 1 # CHECK-NEXT: 1. 1 2.0 0.0 6.0 add x0, x27, #1 # CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld1r { v1.2s }, [x27], x28 # CHECK-NEXT: 3. 1 3.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ld1r { v1.4h }, [x27], x28 -# CHECK-NEXT: 5. 1 4.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 4.0 0.0 0.0 ld1r { v1.4s }, [x27], x28 +# CHECK-NEXT: 4. 1 2.0 0.0 0.0 ld1r { v1.4h }, [x27], x28 +# CHECK-NEXT: 5. 1 3.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ld1r { v1.4s }, [x27], x28 # CHECK-NEXT: 7. 1 4.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 4.0 0.0 0.0 ld1r { v1.8b }, [x27], x28 -# CHECK-NEXT: 9. 1 5.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 1 3.2 0.1 3.0 +# CHECK-NEXT: 8. 1 3.0 0.0 0.0 ld1r { v1.8b }, [x27], x28 +# CHECK-NEXT: 9. 1 4.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 1 2.7 0.1 3.0 # CHECK: [18] Code Region - G19 @@ -1993,10 +1993,10 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 510 # CHECK-NEXT: Total uOps: 2400 -# CHECK: Dispatch Width: 15 +# CHECK: Dispatch Width: 8 # CHECK-NEXT: uOps Per Cycle: 4.71 # CHECK-NEXT: IPC: 1.96 -# CHECK-NEXT: Block RThroughput: 2.5 +# CHECK-NEXT: Block RThroughput: 3.0 # CHECK: Timeline view: # CHECK-NEXT: 01234 @@ -2006,12 +2006,12 @@ add x0, x27, 1 # CHECK-NEXT: [0,1] D=eE------R . add x0, x27, #1 # CHECK-NEXT: [0,2] D=eeeeeeeeER . ld1r { v1.16b }, [x27], x28 # CHECK-NEXT: [0,3] D==eE------R . add x0, x27, #1 -# CHECK-NEXT: [0,4] D==eeeeeeeeER . ld2 { v1.2d, v2.2d }, [x27], #32 -# CHECK-NEXT: [0,5] D===eE------R . add x0, x27, #1 -# CHECK-NEXT: [0,6] .D==eeeeeeeeER. ld2 { v1.2s, v2.2s }, [x27], #16 -# CHECK-NEXT: [0,7] .D===eE------R. add x0, x27, #1 -# CHECK-NEXT: [0,8] .D===eeeeeeeeER ld2 { v1.4h, v2.4h }, [x27], #16 -# CHECK-NEXT: [0,9] .D====eE------R add x0, x27, #1 +# CHECK-NEXT: [0,4] .D=eeeeeeeeER . ld2 { v1.2d, v2.2d }, [x27], #32 +# CHECK-NEXT: [0,5] .D==eE------R . add x0, x27, #1 +# CHECK-NEXT: [0,6] . D=eeeeeeeeER. ld2 { v1.2s, v2.2s }, [x27], #16 +# CHECK-NEXT: [0,7] . D==eE------R. add x0, x27, #1 +# CHECK-NEXT: [0,8] . D=eeeeeeeeER ld2 { v1.4h, v2.4h }, [x27], #16 +# CHECK-NEXT: [0,9] . D==eE------R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -2024,13 +2024,13 @@ add x0, x27, 1 # CHECK-NEXT: 1. 1 2.0 0.0 6.0 add x0, x27, #1 # CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld1r { v1.16b }, [x27], x28 # CHECK-NEXT: 3. 1 3.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ld2 { v1.2d, v2.2d }, [x27], #32 -# CHECK-NEXT: 5. 1 4.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ld2 { v1.2s, v2.2s }, [x27], #16 -# CHECK-NEXT: 7. 1 4.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 4.0 0.0 0.0 ld2 { v1.4h, v2.4h }, [x27], #16 -# CHECK-NEXT: 9. 1 5.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 1 3.1 0.1 3.0 +# CHECK-NEXT: 4. 1 2.0 0.0 0.0 ld2 { v1.2d, v2.2d }, [x27], #32 +# CHECK-NEXT: 5. 1 3.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 2.0 0.0 0.0 ld2 { v1.2s, v2.2s }, [x27], #16 +# CHECK-NEXT: 7. 1 3.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 2.0 0.0 0.0 ld2 { v1.4h, v2.4h }, [x27], #16 +# CHECK-NEXT: 9. 1 3.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 1 2.3 0.1 3.0 # CHECK: [19] Code Region - G20 @@ -2039,10 +2039,10 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 510 # CHECK-NEXT: Total uOps: 2900 -# CHECK: Dispatch Width: 15 +# CHECK: Dispatch Width: 8 # CHECK-NEXT: uOps Per Cycle: 5.69 # CHECK-NEXT: IPC: 1.96 -# CHECK-NEXT: Block RThroughput: 3.0 +# CHECK-NEXT: Block RThroughput: 3.6 # CHECK: Timeline view: # CHECK-NEXT: 01234 @@ -2050,14 +2050,14 @@ add x0, x27, 1 # CHECK: [0,0] DeeeeeeeeER . ld2 { v1.4s, v2.4s }, [x27], #32 # CHECK-NEXT: [0,1] D=eE------R . add x0, x27, #1 -# CHECK-NEXT: [0,2] D=eeeeeeeeER . ld2 { v1.8b, v2.8b }, [x27], #16 -# CHECK-NEXT: [0,3] D==eE------R . add x0, x27, #1 -# CHECK-NEXT: [0,4] .D=eeeeeeeeER . ld2 { v1.8h, v2.8h }, [x27], #32 -# CHECK-NEXT: [0,5] .D==eE------R . add x0, x27, #1 -# CHECK-NEXT: [0,6] .D==eeeeeeeeER. ld2 { v1.16b, v2.16b }, [x27], #32 -# CHECK-NEXT: [0,7] .D===eE------R. add x0, x27, #1 -# CHECK-NEXT: [0,8] . D==eeeeeeeeER ld2 { v1.2d, v2.2d }, [x27], x28 -# CHECK-NEXT: [0,9] . D===eE------R add x0, x27, #1 +# CHECK-NEXT: [0,2] .DeeeeeeeeER . ld2 { v1.8b, v2.8b }, [x27], #16 +# CHECK-NEXT: [0,3] .D=eE------R . add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeeeeeeeER . ld2 { v1.8h, v2.8h }, [x27], #32 +# CHECK-NEXT: [0,5] . D=eE------R . add x0, x27, #1 +# CHECK-NEXT: [0,6] . DeeeeeeeeER. ld2 { v1.16b, v2.16b }, [x27], #32 +# CHECK-NEXT: [0,7] . D=eE------R. add x0, x27, #1 +# CHECK-NEXT: [0,8] . DeeeeeeeeER ld2 { v1.2d, v2.2d }, [x27], x28 +# CHECK-NEXT: [0,9] . D=eE------R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -2068,15 +2068,15 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld2 { v1.4s, v2.4s }, [x27], #32 # CHECK-NEXT: 1. 1 2.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld2 { v1.8b, v2.8b }, [x27], #16 -# CHECK-NEXT: 3. 1 3.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 2.0 0.0 0.0 ld2 { v1.8h, v2.8h }, [x27], #32 -# CHECK-NEXT: 5. 1 3.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ld2 { v1.16b, v2.16b }, [x27], #32 -# CHECK-NEXT: 7. 1 4.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 3.0 0.0 0.0 ld2 { v1.2d, v2.2d }, [x27], x28 -# CHECK-NEXT: 9. 1 4.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 1 2.7 0.1 3.0 +# CHECK-NEXT: 2. 1 1.0 0.0 0.0 ld2 { v1.8b, v2.8b }, [x27], #16 +# CHECK-NEXT: 3. 1 2.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 0.0 0.0 ld2 { v1.8h, v2.8h }, [x27], #32 +# CHECK-NEXT: 5. 1 2.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 0.0 0.0 ld2 { v1.16b, v2.16b }, [x27], #32 +# CHECK-NEXT: 7. 1 2.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 0.0 0.0 ld2 { v1.2d, v2.2d }, [x27], x28 +# CHECK-NEXT: 9. 1 2.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.5 0.1 3.0 # CHECK: [20] Code Region - G21 @@ -2085,10 +2085,10 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 510 # CHECK-NEXT: Total uOps: 2700 -# CHECK: Dispatch Width: 15 +# CHECK: Dispatch Width: 8 # CHECK-NEXT: uOps Per Cycle: 5.29 # CHECK-NEXT: IPC: 1.96 -# CHECK-NEXT: Block RThroughput: 2.5 +# CHECK-NEXT: Block RThroughput: 3.4 # CHECK: Timeline view: # CHECK-NEXT: 01234 @@ -2096,14 +2096,14 @@ add x0, x27, 1 # CHECK: [0,0] DeeeeeeeeER . ld2 { v1.2s, v2.2s }, [x27], x28 # CHECK-NEXT: [0,1] D=eE------R . add x0, x27, #1 -# CHECK-NEXT: [0,2] D=eeeeeeeeER . ld2 { v1.4h, v2.4h }, [x27], x28 -# CHECK-NEXT: [0,3] D==eE------R . add x0, x27, #1 -# CHECK-NEXT: [0,4] D==eeeeeeeeER . ld2 { v1.4s, v2.4s }, [x27], x28 -# CHECK-NEXT: [0,5] .D==eE------R . add x0, x27, #1 -# CHECK-NEXT: [0,6] .D==eeeeeeeeER. ld2 { v1.8b, v2.8b }, [x27], x28 -# CHECK-NEXT: [0,7] .D===eE------R. add x0, x27, #1 -# CHECK-NEXT: [0,8] .D===eeeeeeeeER ld2 { v1.8h, v2.8h }, [x27], x28 -# CHECK-NEXT: [0,9] .D====eE------R add x0, x27, #1 +# CHECK-NEXT: [0,2] .DeeeeeeeeER . ld2 { v1.4h, v2.4h }, [x27], x28 +# CHECK-NEXT: [0,3] .D=eE------R . add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeeeeeeeER . ld2 { v1.4s, v2.4s }, [x27], x28 +# CHECK-NEXT: [0,5] . D=eE------R . add x0, x27, #1 +# CHECK-NEXT: [0,6] . DeeeeeeeeER. ld2 { v1.8b, v2.8b }, [x27], x28 +# CHECK-NEXT: [0,7] . D=eE------R. add x0, x27, #1 +# CHECK-NEXT: [0,8] . DeeeeeeeeER ld2 { v1.8h, v2.8h }, [x27], x28 +# CHECK-NEXT: [0,9] . D=eE------R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -2114,15 +2114,15 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld2 { v1.2s, v2.2s }, [x27], x28 # CHECK-NEXT: 1. 1 2.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld2 { v1.4h, v2.4h }, [x27], x28 -# CHECK-NEXT: 3. 1 3.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ld2 { v1.4s, v2.4s }, [x27], x28 -# CHECK-NEXT: 5. 1 3.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ld2 { v1.8b, v2.8b }, [x27], x28 -# CHECK-NEXT: 7. 1 4.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 4.0 0.0 0.0 ld2 { v1.8h, v2.8h }, [x27], x28 -# CHECK-NEXT: 9. 1 5.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 1 3.0 0.1 3.0 +# CHECK-NEXT: 2. 1 1.0 0.0 0.0 ld2 { v1.4h, v2.4h }, [x27], x28 +# CHECK-NEXT: 3. 1 2.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 0.0 0.0 ld2 { v1.4s, v2.4s }, [x27], x28 +# CHECK-NEXT: 5. 1 2.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 0.0 0.0 ld2 { v1.8b, v2.8b }, [x27], x28 +# CHECK-NEXT: 7. 1 2.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 0.0 0.0 ld2 { v1.8h, v2.8h }, [x27], x28 +# CHECK-NEXT: 9. 1 2.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.5 0.1 3.0 # CHECK: [21] Code Region - G22 @@ -2131,10 +2131,10 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 3310 # CHECK-NEXT: Total uOps: 2600 -# CHECK: Dispatch Width: 15 +# CHECK: Dispatch Width: 8 # CHECK-NEXT: uOps Per Cycle: 0.79 # CHECK-NEXT: IPC: 0.30 -# CHECK-NEXT: Block RThroughput: 2.5 +# CHECK-NEXT: Block RThroughput: 3.3 # CHECK: Timeline view: # CHECK-NEXT: 0123456789 0123456789 @@ -2142,14 +2142,14 @@ add x0, x27, 1 # CHECK: [0,0] DeeeeeeeeER . . . . . . . ld2 { v1.16b, v2.16b }, [x27], x28 # CHECK-NEXT: [0,1] D=eE------R . . . . . . . add x0, x27, #1 -# CHECK-NEXT: [0,2] D========eeeeeeeeER . . . . . . ld2 { v1.b, v2.b }[0], [x27], #2 -# CHECK-NEXT: [0,3] D=========eE------R . . . . . . add x0, x27, #1 -# CHECK-NEXT: [0,4] D================eeeeeeeeER . . . . ld2 { v1.b, v2.b }[8], [x27], #2 -# CHECK-NEXT: [0,5] .D================eE------R . . . . add x0, x27, #1 -# CHECK-NEXT: [0,6] .D=======================eeeeeeeeER. . . ld2 { v1.b, v2.b }[0], [x27], x28 -# CHECK-NEXT: [0,7] .D========================eE------R. . . add x0, x27, #1 -# CHECK-NEXT: [0,8] .D===============================eeeeeeeeER ld2 { v1.b, v2.b }[8], [x27], x28 -# CHECK-NEXT: [0,9] .D================================eE------R add x0, x27, #1 +# CHECK-NEXT: [0,2] .D=======eeeeeeeeER . . . . . . ld2 { v1.b, v2.b }[0], [x27], #2 +# CHECK-NEXT: [0,3] .D========eE------R . . . . . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . D==============eeeeeeeeER . . . . ld2 { v1.b, v2.b }[8], [x27], #2 +# CHECK-NEXT: [0,5] . D===============eE------R . . . . add x0, x27, #1 +# CHECK-NEXT: [0,6] . D=====================eeeeeeeeER. . . ld2 { v1.b, v2.b }[0], [x27], x28 +# CHECK-NEXT: [0,7] . D======================eE------R. . . add x0, x27, #1 +# CHECK-NEXT: [0,8] . D============================eeeeeeeeER ld2 { v1.b, v2.b }[8], [x27], x28 +# CHECK-NEXT: [0,9] . D=============================eE------R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -2160,15 +2160,15 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld2 { v1.16b, v2.16b }, [x27], x28 # CHECK-NEXT: 1. 1 2.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 9.0 0.0 0.0 ld2 { v1.b, v2.b }[0], [x27], #2 -# CHECK-NEXT: 3. 1 10.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 17.0 0.0 0.0 ld2 { v1.b, v2.b }[8], [x27], #2 -# CHECK-NEXT: 5. 1 17.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 24.0 0.0 0.0 ld2 { v1.b, v2.b }[0], [x27], x28 -# CHECK-NEXT: 7. 1 25.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 32.0 0.0 0.0 ld2 { v1.b, v2.b }[8], [x27], x28 -# CHECK-NEXT: 9. 1 33.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 1 17.0 0.1 3.0 +# CHECK-NEXT: 2. 1 8.0 0.0 0.0 ld2 { v1.b, v2.b }[0], [x27], #2 +# CHECK-NEXT: 3. 1 9.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 15.0 0.0 0.0 ld2 { v1.b, v2.b }[8], [x27], #2 +# CHECK-NEXT: 5. 1 16.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 22.0 0.0 0.0 ld2 { v1.b, v2.b }[0], [x27], x28 +# CHECK-NEXT: 7. 1 23.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 29.0 0.0 0.0 ld2 { v1.b, v2.b }[8], [x27], x28 +# CHECK-NEXT: 9. 1 30.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 1 15.5 0.1 3.0 # CHECK: [22] Code Region - G23 @@ -2177,10 +2177,10 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 4003 # CHECK-NEXT: Total uOps: 2500 -# CHECK: Dispatch Width: 15 +# CHECK: Dispatch Width: 8 # CHECK-NEXT: uOps Per Cycle: 0.62 # CHECK-NEXT: IPC: 0.25 -# CHECK-NEXT: Block RThroughput: 2.5 +# CHECK-NEXT: Block RThroughput: 3.1 # CHECK: Timeline view: # CHECK-NEXT: 0123456789 0123456789 @@ -2188,14 +2188,14 @@ add x0, x27, 1 # CHECK: [0,0] DeeeeeeeeER . . . . . . . ld2 { v1.h, v2.h }[0], [x27], #4 # CHECK-NEXT: [0,1] D=eE------R . . . . . . . add x0, x27, #1 -# CHECK-NEXT: [0,2] D========eeeeeeeeER . . . . . . ld2 { v1.h, v2.h }[4], [x27], #4 -# CHECK-NEXT: [0,3] D=========eE------R . . . . . . add x0, x27, #1 -# CHECK-NEXT: [0,4] D================eeeeeeeeER . . . . ld2 { v1.h, v2.h }[0], [x27], x28 -# CHECK-NEXT: [0,5] D=================eE------R . . . . add x0, x27, #1 -# CHECK-NEXT: [0,6] .D=======================eeeeeeeeER. . . ld2 { v1.h, v2.h }[4], [x27], x28 -# CHECK-NEXT: [0,7] .D========================eE------R. . . add x0, x27, #1 -# CHECK-NEXT: [0,8] .D===============================eeeeeeeeER ld2 { v1.s, v2.s }[0], [x27], #8 -# CHECK-NEXT: [0,9] .D================================eE------R add x0, x27, #1 +# CHECK-NEXT: [0,2] .D=======eeeeeeeeER . . . . . . ld2 { v1.h, v2.h }[4], [x27], #4 +# CHECK-NEXT: [0,3] .D========eE------R . . . . . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . D==============eeeeeeeeER . . . . ld2 { v1.h, v2.h }[0], [x27], x28 +# CHECK-NEXT: [0,5] . D===============eE------R . . . . add x0, x27, #1 +# CHECK-NEXT: [0,6] . D=====================eeeeeeeeER. . . ld2 { v1.h, v2.h }[4], [x27], x28 +# CHECK-NEXT: [0,7] . D======================eE------R. . . add x0, x27, #1 +# CHECK-NEXT: [0,8] . D============================eeeeeeeeER ld2 { v1.s, v2.s }[0], [x27], #8 +# CHECK-NEXT: [0,9] . D=============================eE------R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -2206,15 +2206,15 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld2 { v1.h, v2.h }[0], [x27], #4 # CHECK-NEXT: 1. 1 2.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 9.0 0.0 0.0 ld2 { v1.h, v2.h }[4], [x27], #4 -# CHECK-NEXT: 3. 1 10.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 17.0 0.0 0.0 ld2 { v1.h, v2.h }[0], [x27], x28 -# CHECK-NEXT: 5. 1 18.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 24.0 0.0 0.0 ld2 { v1.h, v2.h }[4], [x27], x28 -# CHECK-NEXT: 7. 1 25.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 32.0 0.0 0.0 ld2 { v1.s, v2.s }[0], [x27], #8 -# CHECK-NEXT: 9. 1 33.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 1 17.1 0.1 3.0 +# CHECK-NEXT: 2. 1 8.0 0.0 0.0 ld2 { v1.h, v2.h }[4], [x27], #4 +# CHECK-NEXT: 3. 1 9.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 15.0 0.0 0.0 ld2 { v1.h, v2.h }[0], [x27], x28 +# CHECK-NEXT: 5. 1 16.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 22.0 0.0 0.0 ld2 { v1.h, v2.h }[4], [x27], x28 +# CHECK-NEXT: 7. 1 23.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 29.0 0.0 0.0 ld2 { v1.s, v2.s }[0], [x27], #8 +# CHECK-NEXT: 9. 1 30.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 1 15.5 0.1 3.0 # CHECK: [23] Code Region - G24 @@ -2223,10 +2223,10 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 2603 # CHECK-NEXT: Total uOps: 2500 -# CHECK: Dispatch Width: 15 +# CHECK: Dispatch Width: 8 # CHECK-NEXT: uOps Per Cycle: 0.96 # CHECK-NEXT: IPC: 0.38 -# CHECK-NEXT: Block RThroughput: 2.5 +# CHECK-NEXT: Block RThroughput: 3.1 # CHECK: Timeline view: # CHECK-NEXT: 0123456789 @@ -2234,14 +2234,14 @@ add x0, x27, 1 # CHECK: [0,0] DeeeeeeeeER . . . . ld2 { v1.s, v2.s }[0], [x27], x28 # CHECK-NEXT: [0,1] D=eE------R . . . . add x0, x27, #1 -# CHECK-NEXT: [0,2] D========eeeeeeeeER . . . ld2 { v1.d, v2.d }[0], [x27], #16 -# CHECK-NEXT: [0,3] D=========eE------R . . . add x0, x27, #1 -# CHECK-NEXT: [0,4] D================eeeeeeeeER . ld2 { v1.d, v2.d }[0], [x27], x28 -# CHECK-NEXT: [0,5] D=================eE------R . add x0, x27, #1 -# CHECK-NEXT: [0,6] .D================eeeeeeeeER. ld2r { v1.1d, v2.1d }, [x27], #16 -# CHECK-NEXT: [0,7] .D=================eE------R. add x0, x27, #1 -# CHECK-NEXT: [0,8] .D=================eeeeeeeeER ld2r { v1.2d, v2.2d }, [x27], #16 -# CHECK-NEXT: [0,9] .D==================eE------R add x0, x27, #1 +# CHECK-NEXT: [0,2] .D=======eeeeeeeeER . . . ld2 { v1.d, v2.d }[0], [x27], #16 +# CHECK-NEXT: [0,3] .D========eE------R . . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . D==============eeeeeeeeER . ld2 { v1.d, v2.d }[0], [x27], x28 +# CHECK-NEXT: [0,5] . D===============eE------R . add x0, x27, #1 +# CHECK-NEXT: [0,6] . D==============eeeeeeeeER. ld2r { v1.1d, v2.1d }, [x27], #16 +# CHECK-NEXT: [0,7] . D===============eE------R. add x0, x27, #1 +# CHECK-NEXT: [0,8] . D==============eeeeeeeeER ld2r { v1.2d, v2.2d }, [x27], #16 +# CHECK-NEXT: [0,9] . D===============eE------R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -2252,15 +2252,15 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld2 { v1.s, v2.s }[0], [x27], x28 # CHECK-NEXT: 1. 1 2.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 9.0 0.0 0.0 ld2 { v1.d, v2.d }[0], [x27], #16 -# CHECK-NEXT: 3. 1 10.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 17.0 0.0 0.0 ld2 { v1.d, v2.d }[0], [x27], x28 -# CHECK-NEXT: 5. 1 18.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 17.0 0.0 0.0 ld2r { v1.1d, v2.1d }, [x27], #16 -# CHECK-NEXT: 7. 1 18.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 18.0 0.0 0.0 ld2r { v1.2d, v2.2d }, [x27], #16 -# CHECK-NEXT: 9. 1 19.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 1 12.9 0.1 3.0 +# CHECK-NEXT: 2. 1 8.0 0.0 0.0 ld2 { v1.d, v2.d }[0], [x27], #16 +# CHECK-NEXT: 3. 1 9.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 15.0 0.0 0.0 ld2 { v1.d, v2.d }[0], [x27], x28 +# CHECK-NEXT: 5. 1 16.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 15.0 0.0 0.0 ld2r { v1.1d, v2.1d }, [x27], #16 +# CHECK-NEXT: 7. 1 16.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 15.0 0.0 0.0 ld2r { v1.2d, v2.2d }, [x27], #16 +# CHECK-NEXT: 9. 1 16.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 1 11.3 0.1 3.0 # CHECK: [24] Code Region - G25 @@ -2269,10 +2269,10 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 510 # CHECK-NEXT: Total uOps: 2500 -# CHECK: Dispatch Width: 15 +# CHECK: Dispatch Width: 8 # CHECK-NEXT: uOps Per Cycle: 4.90 # CHECK-NEXT: IPC: 1.96 -# CHECK-NEXT: Block RThroughput: 2.5 +# CHECK-NEXT: Block RThroughput: 3.1 # CHECK: Timeline view: # CHECK-NEXT: 01234 @@ -2280,14 +2280,14 @@ add x0, x27, 1 # CHECK: [0,0] DeeeeeeeeER . ld2r { v1.2s, v2.2s }, [x27], #8 # CHECK-NEXT: [0,1] D=eE------R . add x0, x27, #1 -# CHECK-NEXT: [0,2] D=eeeeeeeeER . ld2r { v1.4h, v2.4h }, [x27], #4 -# CHECK-NEXT: [0,3] D==eE------R . add x0, x27, #1 -# CHECK-NEXT: [0,4] D==eeeeeeeeER . ld2r { v1.4s, v2.4s }, [x27], #8 -# CHECK-NEXT: [0,5] D===eE------R . add x0, x27, #1 -# CHECK-NEXT: [0,6] .D==eeeeeeeeER. ld2r { v1.8b, v2.8b }, [x27], #2 -# CHECK-NEXT: [0,7] .D===eE------R. add x0, x27, #1 -# CHECK-NEXT: [0,8] .D===eeeeeeeeER ld2r { v1.8h, v2.8h }, [x27], #4 -# CHECK-NEXT: [0,9] .D====eE------R add x0, x27, #1 +# CHECK-NEXT: [0,2] .DeeeeeeeeER . ld2r { v1.4h, v2.4h }, [x27], #4 +# CHECK-NEXT: [0,3] .D=eE------R . add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeeeeeeeER . ld2r { v1.4s, v2.4s }, [x27], #8 +# CHECK-NEXT: [0,5] . D=eE------R . add x0, x27, #1 +# CHECK-NEXT: [0,6] . DeeeeeeeeER. ld2r { v1.8b, v2.8b }, [x27], #2 +# CHECK-NEXT: [0,7] . D=eE------R. add x0, x27, #1 +# CHECK-NEXT: [0,8] . DeeeeeeeeER ld2r { v1.8h, v2.8h }, [x27], #4 +# CHECK-NEXT: [0,9] . D=eE------R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -2298,15 +2298,15 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld2r { v1.2s, v2.2s }, [x27], #8 # CHECK-NEXT: 1. 1 2.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld2r { v1.4h, v2.4h }, [x27], #4 -# CHECK-NEXT: 3. 1 3.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ld2r { v1.4s, v2.4s }, [x27], #8 -# CHECK-NEXT: 5. 1 4.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ld2r { v1.8b, v2.8b }, [x27], #2 -# CHECK-NEXT: 7. 1 4.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 4.0 0.0 0.0 ld2r { v1.8h, v2.8h }, [x27], #4 -# CHECK-NEXT: 9. 1 5.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 1 3.1 0.1 3.0 +# CHECK-NEXT: 2. 1 1.0 0.0 0.0 ld2r { v1.4h, v2.4h }, [x27], #4 +# CHECK-NEXT: 3. 1 2.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 0.0 0.0 ld2r { v1.4s, v2.4s }, [x27], #8 +# CHECK-NEXT: 5. 1 2.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 0.0 0.0 ld2r { v1.8b, v2.8b }, [x27], #2 +# CHECK-NEXT: 7. 1 2.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 0.0 0.0 ld2r { v1.8h, v2.8h }, [x27], #4 +# CHECK-NEXT: 9. 1 2.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.5 0.1 3.0 # CHECK: [25] Code Region - G26 @@ -2315,10 +2315,10 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 510 # CHECK-NEXT: Total uOps: 2500 -# CHECK: Dispatch Width: 15 +# CHECK: Dispatch Width: 8 # CHECK-NEXT: uOps Per Cycle: 4.90 # CHECK-NEXT: IPC: 1.96 -# CHECK-NEXT: Block RThroughput: 2.5 +# CHECK-NEXT: Block RThroughput: 3.1 # CHECK: Timeline view: # CHECK-NEXT: 01234 @@ -2326,14 +2326,14 @@ add x0, x27, 1 # CHECK: [0,0] DeeeeeeeeER . ld2r { v1.16b, v2.16b }, [x27], #2 # CHECK-NEXT: [0,1] D=eE------R . add x0, x27, #1 -# CHECK-NEXT: [0,2] D=eeeeeeeeER . ld2r { v1.1d, v2.1d }, [x27], x28 -# CHECK-NEXT: [0,3] D==eE------R . add x0, x27, #1 -# CHECK-NEXT: [0,4] D==eeeeeeeeER . ld2r { v1.2d, v2.2d }, [x27], x28 -# CHECK-NEXT: [0,5] D===eE------R . add x0, x27, #1 -# CHECK-NEXT: [0,6] .D==eeeeeeeeER. ld2r { v1.2s, v2.2s }, [x27], x28 -# CHECK-NEXT: [0,7] .D===eE------R. add x0, x27, #1 -# CHECK-NEXT: [0,8] .D===eeeeeeeeER ld2r { v1.4h, v2.4h }, [x27], x28 -# CHECK-NEXT: [0,9] .D====eE------R add x0, x27, #1 +# CHECK-NEXT: [0,2] .DeeeeeeeeER . ld2r { v1.1d, v2.1d }, [x27], x28 +# CHECK-NEXT: [0,3] .D=eE------R . add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeeeeeeeER . ld2r { v1.2d, v2.2d }, [x27], x28 +# CHECK-NEXT: [0,5] . D=eE------R . add x0, x27, #1 +# CHECK-NEXT: [0,6] . DeeeeeeeeER. ld2r { v1.2s, v2.2s }, [x27], x28 +# CHECK-NEXT: [0,7] . D=eE------R. add x0, x27, #1 +# CHECK-NEXT: [0,8] . DeeeeeeeeER ld2r { v1.4h, v2.4h }, [x27], x28 +# CHECK-NEXT: [0,9] . D=eE------R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -2344,15 +2344,15 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld2r { v1.16b, v2.16b }, [x27], #2 # CHECK-NEXT: 1. 1 2.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld2r { v1.1d, v2.1d }, [x27], x28 -# CHECK-NEXT: 3. 1 3.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ld2r { v1.2d, v2.2d }, [x27], x28 -# CHECK-NEXT: 5. 1 4.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ld2r { v1.2s, v2.2s }, [x27], x28 -# CHECK-NEXT: 7. 1 4.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 4.0 0.0 0.0 ld2r { v1.4h, v2.4h }, [x27], x28 -# CHECK-NEXT: 9. 1 5.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 1 3.1 0.1 3.0 +# CHECK-NEXT: 2. 1 1.0 0.0 0.0 ld2r { v1.1d, v2.1d }, [x27], x28 +# CHECK-NEXT: 3. 1 2.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 0.0 0.0 ld2r { v1.2d, v2.2d }, [x27], x28 +# CHECK-NEXT: 5. 1 2.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 0.0 0.0 ld2r { v1.2s, v2.2s }, [x27], x28 +# CHECK-NEXT: 7. 1 2.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 0.0 0.0 ld2r { v1.4h, v2.4h }, [x27], x28 +# CHECK-NEXT: 9. 1 2.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.5 0.1 3.0 # CHECK: [26] Code Region - G27 @@ -2361,10 +2361,10 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 510 # CHECK-NEXT: Total uOps: 2800 -# CHECK: Dispatch Width: 15 +# CHECK: Dispatch Width: 8 # CHECK-NEXT: uOps Per Cycle: 5.49 # CHECK-NEXT: IPC: 1.96 -# CHECK-NEXT: Block RThroughput: 2.8 +# CHECK-NEXT: Block RThroughput: 3.5 # CHECK: Timeline view: # CHECK-NEXT: 01234 @@ -2372,14 +2372,14 @@ add x0, x27, 1 # CHECK: [0,0] DeeeeeeeeER . ld2r { v1.4s, v2.4s }, [x27], x28 # CHECK-NEXT: [0,1] D=eE------R . add x0, x27, #1 -# CHECK-NEXT: [0,2] D=eeeeeeeeER . ld2r { v1.8b, v2.8b }, [x27], x28 -# CHECK-NEXT: [0,3] D==eE------R . add x0, x27, #1 -# CHECK-NEXT: [0,4] D==eeeeeeeeER . ld2r { v1.8h, v2.8h }, [x27], x28 -# CHECK-NEXT: [0,5] D===eE------R . add x0, x27, #1 -# CHECK-NEXT: [0,6] .D==eeeeeeeeER. ld2r { v1.16b, v2.16b }, [x27], x28 -# CHECK-NEXT: [0,7] .D===eE------R. add x0, x27, #1 -# CHECK-NEXT: [0,8] .D===eeeeeeeeER ld3 { v1.2d, v2.2d, v3.2d }, [x27], #48 -# CHECK-NEXT: [0,9] .D====eE------R add x0, x27, #1 +# CHECK-NEXT: [0,2] .DeeeeeeeeER . ld2r { v1.8b, v2.8b }, [x27], x28 +# CHECK-NEXT: [0,3] .D=eE------R . add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeeeeeeeER . ld2r { v1.8h, v2.8h }, [x27], x28 +# CHECK-NEXT: [0,5] . D=eE------R . add x0, x27, #1 +# CHECK-NEXT: [0,6] . DeeeeeeeeER. ld2r { v1.16b, v2.16b }, [x27], x28 +# CHECK-NEXT: [0,7] . D=eE------R. add x0, x27, #1 +# CHECK-NEXT: [0,8] . DeeeeeeeeER ld3 { v1.2d, v2.2d, v3.2d }, [x27], #48 +# CHECK-NEXT: [0,9] . D=eE------R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -2390,15 +2390,15 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld2r { v1.4s, v2.4s }, [x27], x28 # CHECK-NEXT: 1. 1 2.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld2r { v1.8b, v2.8b }, [x27], x28 -# CHECK-NEXT: 3. 1 3.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ld2r { v1.8h, v2.8h }, [x27], x28 -# CHECK-NEXT: 5. 1 4.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ld2r { v1.16b, v2.16b }, [x27], x28 -# CHECK-NEXT: 7. 1 4.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 4.0 0.0 0.0 ld3 { v1.2d, v2.2d, v3.2d }, [x27], #48 -# CHECK-NEXT: 9. 1 5.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 1 3.1 0.1 3.0 +# CHECK-NEXT: 2. 1 1.0 0.0 0.0 ld2r { v1.8b, v2.8b }, [x27], x28 +# CHECK-NEXT: 3. 1 2.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 0.0 0.0 ld2r { v1.8h, v2.8h }, [x27], x28 +# CHECK-NEXT: 5. 1 2.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 0.0 0.0 ld2r { v1.16b, v2.16b }, [x27], x28 +# CHECK-NEXT: 7. 1 2.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 0.0 0.0 ld3 { v1.2d, v2.2d, v3.2d }, [x27], #48 +# CHECK-NEXT: 9. 1 2.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.5 0.1 3.0 # CHECK: [27] Code Region - G28 @@ -2407,10 +2407,10 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 510 # CHECK-NEXT: Total uOps: 3700 -# CHECK: Dispatch Width: 15 +# CHECK: Dispatch Width: 8 # CHECK-NEXT: uOps Per Cycle: 7.25 # CHECK-NEXT: IPC: 1.96 -# CHECK-NEXT: Block RThroughput: 4.0 +# CHECK-NEXT: Block RThroughput: 4.6 # CHECK: Timeline view: # CHECK-NEXT: 01234 @@ -2418,14 +2418,14 @@ add x0, x27, 1 # CHECK: [0,0] DeeeeeeeeER . ld3 { v1.2s, v2.2s, v3.2s }, [x27], #24 # CHECK-NEXT: [0,1] D=eE------R . add x0, x27, #1 -# CHECK-NEXT: [0,2] D=eeeeeeeeER . ld3 { v1.4h, v2.4h, v3.4h }, [x27], #24 -# CHECK-NEXT: [0,3] D==eE------R . add x0, x27, #1 -# CHECK-NEXT: [0,4] .D=eeeeeeeeER . ld3 { v1.4s, v2.4s, v3.4s }, [x27], #48 -# CHECK-NEXT: [0,5] .D==eE------R . add x0, x27, #1 -# CHECK-NEXT: [0,6] .D==eeeeeeeeER. ld3 { v1.8b, v2.8b, v3.8b }, [x27], #24 -# CHECK-NEXT: [0,7] .D===eE------R. add x0, x27, #1 -# CHECK-NEXT: [0,8] . D==eeeeeeeeER ld3 { v1.8h, v2.8h, v3.8h }, [x27], #48 -# CHECK-NEXT: [0,9] . D===eE------R add x0, x27, #1 +# CHECK-NEXT: [0,2] .DeeeeeeeeER . ld3 { v1.4h, v2.4h, v3.4h }, [x27], #24 +# CHECK-NEXT: [0,3] .D=eE------R . add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeeeeeeeER . ld3 { v1.4s, v2.4s, v3.4s }, [x27], #48 +# CHECK-NEXT: [0,5] . D=eE------R . add x0, x27, #1 +# CHECK-NEXT: [0,6] . DeeeeeeeeER. ld3 { v1.8b, v2.8b, v3.8b }, [x27], #24 +# CHECK-NEXT: [0,7] . D=eE------R. add x0, x27, #1 +# CHECK-NEXT: [0,8] . DeeeeeeeeER ld3 { v1.8h, v2.8h, v3.8h }, [x27], #48 +# CHECK-NEXT: [0,9] . D=eE------R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -2436,15 +2436,15 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld3 { v1.2s, v2.2s, v3.2s }, [x27], #24 # CHECK-NEXT: 1. 1 2.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld3 { v1.4h, v2.4h, v3.4h }, [x27], #24 -# CHECK-NEXT: 3. 1 3.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 2.0 0.0 0.0 ld3 { v1.4s, v2.4s, v3.4s }, [x27], #48 -# CHECK-NEXT: 5. 1 3.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ld3 { v1.8b, v2.8b, v3.8b }, [x27], #24 -# CHECK-NEXT: 7. 1 4.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 3.0 0.0 0.0 ld3 { v1.8h, v2.8h, v3.8h }, [x27], #48 -# CHECK-NEXT: 9. 1 4.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 1 2.7 0.1 3.0 +# CHECK-NEXT: 2. 1 1.0 0.0 0.0 ld3 { v1.4h, v2.4h, v3.4h }, [x27], #24 +# CHECK-NEXT: 3. 1 2.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 0.0 0.0 ld3 { v1.4s, v2.4s, v3.4s }, [x27], #48 +# CHECK-NEXT: 5. 1 2.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 0.0 0.0 ld3 { v1.8b, v2.8b, v3.8b }, [x27], #24 +# CHECK-NEXT: 7. 1 2.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 0.0 0.0 ld3 { v1.8h, v2.8h, v3.8h }, [x27], #48 +# CHECK-NEXT: 9. 1 2.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.5 0.1 3.0 # CHECK: [28] Code Region - G29 @@ -2453,10 +2453,10 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 510 # CHECK-NEXT: Total uOps: 3800 -# CHECK: Dispatch Width: 15 +# CHECK: Dispatch Width: 8 # CHECK-NEXT: uOps Per Cycle: 7.45 # CHECK-NEXT: IPC: 1.96 -# CHECK-NEXT: Block RThroughput: 4.3 +# CHECK-NEXT: Block RThroughput: 4.8 # CHECK: Timeline view: # CHECK-NEXT: 01234 @@ -2464,14 +2464,14 @@ add x0, x27, 1 # CHECK: [0,0] DeeeeeeeeER . ld3 { v1.16b, v2.16b, v3.16b }, [x27], #48 # CHECK-NEXT: [0,1] D=eE------R . add x0, x27, #1 -# CHECK-NEXT: [0,2] D=eeeeeeeeER . ld3 { v1.2d, v2.2d, v3.2d }, [x27], x28 +# CHECK-NEXT: [0,2] .DeeeeeeeeER . ld3 { v1.2d, v2.2d, v3.2d }, [x27], x28 # CHECK-NEXT: [0,3] .D=eE------R . add x0, x27, #1 -# CHECK-NEXT: [0,4] .D=eeeeeeeeER . ld3 { v1.2s, v2.2s, v3.2s }, [x27], x28 -# CHECK-NEXT: [0,5] .D==eE------R . add x0, x27, #1 -# CHECK-NEXT: [0,6] .D==eeeeeeeeER. ld3 { v1.4h, v2.4h, v3.4h }, [x27], x28 -# CHECK-NEXT: [0,7] .D===eE------R. add x0, x27, #1 -# CHECK-NEXT: [0,8] . D==eeeeeeeeER ld3 { v1.4s, v2.4s, v3.4s }, [x27], x28 -# CHECK-NEXT: [0,9] . D===eE------R add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeeeeeeeER . ld3 { v1.2s, v2.2s, v3.2s }, [x27], x28 +# CHECK-NEXT: [0,5] . D=eE------R . add x0, x27, #1 +# CHECK-NEXT: [0,6] . DeeeeeeeeER. ld3 { v1.4h, v2.4h, v3.4h }, [x27], x28 +# CHECK-NEXT: [0,7] . D=eE------R. add x0, x27, #1 +# CHECK-NEXT: [0,8] . DeeeeeeeeER ld3 { v1.4s, v2.4s, v3.4s }, [x27], x28 +# CHECK-NEXT: [0,9] . D=eE------R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -2482,15 +2482,15 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld3 { v1.16b, v2.16b, v3.16b }, [x27], #48 # CHECK-NEXT: 1. 1 2.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld3 { v1.2d, v2.2d, v3.2d }, [x27], x28 +# CHECK-NEXT: 2. 1 1.0 0.0 0.0 ld3 { v1.2d, v2.2d, v3.2d }, [x27], x28 # CHECK-NEXT: 3. 1 2.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 2.0 0.0 0.0 ld3 { v1.2s, v2.2s, v3.2s }, [x27], x28 -# CHECK-NEXT: 5. 1 3.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ld3 { v1.4h, v2.4h, v3.4h }, [x27], x28 -# CHECK-NEXT: 7. 1 4.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 3.0 0.0 0.0 ld3 { v1.4s, v2.4s, v3.4s }, [x27], x28 -# CHECK-NEXT: 9. 1 4.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 1 2.6 0.1 3.0 +# CHECK-NEXT: 4. 1 1.0 0.0 0.0 ld3 { v1.2s, v2.2s, v3.2s }, [x27], x28 +# CHECK-NEXT: 5. 1 2.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 0.0 0.0 ld3 { v1.4h, v2.4h, v3.4h }, [x27], x28 +# CHECK-NEXT: 7. 1 2.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 0.0 0.0 ld3 { v1.4s, v2.4s, v3.4s }, [x27], x28 +# CHECK-NEXT: 9. 1 2.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.5 0.1 3.0 # CHECK: [29] Code Region - G30 @@ -2499,10 +2499,10 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 1910 # CHECK-NEXT: Total uOps: 3700 -# CHECK: Dispatch Width: 15 +# CHECK: Dispatch Width: 8 # CHECK-NEXT: uOps Per Cycle: 1.94 # CHECK-NEXT: IPC: 0.52 -# CHECK-NEXT: Block RThroughput: 4.0 +# CHECK-NEXT: Block RThroughput: 4.6 # CHECK: Timeline view: # CHECK-NEXT: 0123456789 @@ -2510,14 +2510,14 @@ add x0, x27, 1 # CHECK: [0,0] DeeeeeeeeER . . . . ld3 { v1.8b, v2.8b, v3.8b }, [x27], x28 # CHECK-NEXT: [0,1] D=eE------R . . . . add x0, x27, #1 -# CHECK-NEXT: [0,2] D=eeeeeeeeER . . . . ld3 { v1.8h, v2.8h, v3.8h }, [x27], x28 -# CHECK-NEXT: [0,3] D==eE------R . . . . add x0, x27, #1 -# CHECK-NEXT: [0,4] .D=eeeeeeeeER . . . . ld3 { v1.16b, v2.16b, v3.16b }, [x27], x28 -# CHECK-NEXT: [0,5] .D==eE------R . . . . add x0, x27, #1 -# CHECK-NEXT: [0,6] .D=========eeeeeeeeER . . ld3 { v1.b, v2.b, v3.b }[0], [x27], #3 -# CHECK-NEXT: [0,7] .D==========eE------R . . add x0, x27, #1 -# CHECK-NEXT: [0,8] . D================eeeeeeeeER ld3 { v1.b, v2.b, v3.b }[8], [x27], #3 -# CHECK-NEXT: [0,9] . D=================eE------R add x0, x27, #1 +# CHECK-NEXT: [0,2] .DeeeeeeeeER . . . . ld3 { v1.8h, v2.8h, v3.8h }, [x27], x28 +# CHECK-NEXT: [0,3] .D=eE------R . . . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeeeeeeeER . . . . ld3 { v1.16b, v2.16b, v3.16b }, [x27], x28 +# CHECK-NEXT: [0,5] . D=eE------R . . . . add x0, x27, #1 +# CHECK-NEXT: [0,6] . D=======eeeeeeeeER . . ld3 { v1.b, v2.b, v3.b }[0], [x27], #3 +# CHECK-NEXT: [0,7] . D========eE------R . . add x0, x27, #1 +# CHECK-NEXT: [0,8] . D==============eeeeeeeeER ld3 { v1.b, v2.b, v3.b }[8], [x27], #3 +# CHECK-NEXT: [0,9] . D===============eE------R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -2528,15 +2528,15 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld3 { v1.8b, v2.8b, v3.8b }, [x27], x28 # CHECK-NEXT: 1. 1 2.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld3 { v1.8h, v2.8h, v3.8h }, [x27], x28 -# CHECK-NEXT: 3. 1 3.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 2.0 0.0 0.0 ld3 { v1.16b, v2.16b, v3.16b }, [x27], x28 -# CHECK-NEXT: 5. 1 3.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 10.0 0.0 0.0 ld3 { v1.b, v2.b, v3.b }[0], [x27], #3 -# CHECK-NEXT: 7. 1 11.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 17.0 0.0 0.0 ld3 { v1.b, v2.b, v3.b }[8], [x27], #3 -# CHECK-NEXT: 9. 1 18.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 1 6.9 0.1 3.0 +# CHECK-NEXT: 2. 1 1.0 0.0 0.0 ld3 { v1.8h, v2.8h, v3.8h }, [x27], x28 +# CHECK-NEXT: 3. 1 2.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 0.0 0.0 ld3 { v1.16b, v2.16b, v3.16b }, [x27], x28 +# CHECK-NEXT: 5. 1 2.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 8.0 0.0 0.0 ld3 { v1.b, v2.b, v3.b }[0], [x27], #3 +# CHECK-NEXT: 7. 1 9.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 15.0 0.0 0.0 ld3 { v1.b, v2.b, v3.b }[8], [x27], #3 +# CHECK-NEXT: 9. 1 16.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 1 5.7 0.1 3.0 # CHECK: [30] Code Region - G31 @@ -2545,10 +2545,10 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 4003 # CHECK-NEXT: Total uOps: 3500 -# CHECK: Dispatch Width: 15 +# CHECK: Dispatch Width: 8 # CHECK-NEXT: uOps Per Cycle: 0.87 # CHECK-NEXT: IPC: 0.25 -# CHECK-NEXT: Block RThroughput: 3.8 +# CHECK-NEXT: Block RThroughput: 4.4 # CHECK: Timeline view: # CHECK-NEXT: 0123456789 0123456789 @@ -2556,14 +2556,14 @@ add x0, x27, 1 # CHECK: [0,0] DeeeeeeeeER . . . . . . . ld3 { v1.b, v2.b, v3.b }[0], [x27], x28 # CHECK-NEXT: [0,1] D=eE------R . . . . . . . add x0, x27, #1 -# CHECK-NEXT: [0,2] D========eeeeeeeeER . . . . . . ld3 { v1.b, v2.b, v3.b }[8], [x27], x28 -# CHECK-NEXT: [0,3] D=========eE------R . . . . . . add x0, x27, #1 -# CHECK-NEXT: [0,4] .D===============eeeeeeeeER . . . . ld3 { v1.h, v2.h, v3.h }[0], [x27], #6 -# CHECK-NEXT: [0,5] .D================eE------R . . . . add x0, x27, #1 -# CHECK-NEXT: [0,6] .D=======================eeeeeeeeER. . . ld3 { v1.h, v2.h, v3.h }[4], [x27], #6 -# CHECK-NEXT: [0,7] .D========================eE------R. . . add x0, x27, #1 -# CHECK-NEXT: [0,8] . D==============================eeeeeeeeER ld3 { v1.h, v2.h, v3.h }[0], [x27], x28 -# CHECK-NEXT: [0,9] . D===============================eE------R add x0, x27, #1 +# CHECK-NEXT: [0,2] .D=======eeeeeeeeER . . . . . . ld3 { v1.b, v2.b, v3.b }[8], [x27], x28 +# CHECK-NEXT: [0,3] .D========eE------R . . . . . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . D==============eeeeeeeeER . . . . ld3 { v1.h, v2.h, v3.h }[0], [x27], #6 +# CHECK-NEXT: [0,5] . D===============eE------R . . . . add x0, x27, #1 +# CHECK-NEXT: [0,6] . D=====================eeeeeeeeER. . . ld3 { v1.h, v2.h, v3.h }[4], [x27], #6 +# CHECK-NEXT: [0,7] . D======================eE------R. . . add x0, x27, #1 +# CHECK-NEXT: [0,8] . D============================eeeeeeeeER ld3 { v1.h, v2.h, v3.h }[0], [x27], x28 +# CHECK-NEXT: [0,9] . D=============================eE------R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -2574,15 +2574,15 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld3 { v1.b, v2.b, v3.b }[0], [x27], x28 # CHECK-NEXT: 1. 1 2.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 9.0 0.0 0.0 ld3 { v1.b, v2.b, v3.b }[8], [x27], x28 -# CHECK-NEXT: 3. 1 10.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 16.0 0.0 0.0 ld3 { v1.h, v2.h, v3.h }[0], [x27], #6 -# CHECK-NEXT: 5. 1 17.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 24.0 0.0 0.0 ld3 { v1.h, v2.h, v3.h }[4], [x27], #6 -# CHECK-NEXT: 7. 1 25.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 31.0 0.0 0.0 ld3 { v1.h, v2.h, v3.h }[0], [x27], x28 -# CHECK-NEXT: 9. 1 32.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 1 16.7 0.1 3.0 +# CHECK-NEXT: 2. 1 8.0 0.0 0.0 ld3 { v1.b, v2.b, v3.b }[8], [x27], x28 +# CHECK-NEXT: 3. 1 9.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 15.0 0.0 0.0 ld3 { v1.h, v2.h, v3.h }[0], [x27], #6 +# CHECK-NEXT: 5. 1 16.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 22.0 0.0 0.0 ld3 { v1.h, v2.h, v3.h }[4], [x27], #6 +# CHECK-NEXT: 7. 1 23.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 29.0 0.0 0.0 ld3 { v1.h, v2.h, v3.h }[0], [x27], x28 +# CHECK-NEXT: 9. 1 30.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 1 15.5 0.1 3.0 # CHECK: [31] Code Region - G32 @@ -2591,10 +2591,10 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 4003 # CHECK-NEXT: Total uOps: 3500 -# CHECK: Dispatch Width: 15 +# CHECK: Dispatch Width: 8 # CHECK-NEXT: uOps Per Cycle: 0.87 # CHECK-NEXT: IPC: 0.25 -# CHECK-NEXT: Block RThroughput: 3.8 +# CHECK-NEXT: Block RThroughput: 4.4 # CHECK: Timeline view: # CHECK-NEXT: 0123456789 0123456789 @@ -2602,14 +2602,14 @@ add x0, x27, 1 # CHECK: [0,0] DeeeeeeeeER . . . . . . . ld3 { v1.h, v2.h, v3.h }[4], [x27], x28 # CHECK-NEXT: [0,1] D=eE------R . . . . . . . add x0, x27, #1 -# CHECK-NEXT: [0,2] D========eeeeeeeeER . . . . . . ld3 { v1.s, v2.s, v3.s }[0], [x27], #12 -# CHECK-NEXT: [0,3] D=========eE------R . . . . . . add x0, x27, #1 -# CHECK-NEXT: [0,4] .D===============eeeeeeeeER . . . . ld3 { v1.s, v2.s, v3.s }[0], [x27], x28 -# CHECK-NEXT: [0,5] .D================eE------R . . . . add x0, x27, #1 -# CHECK-NEXT: [0,6] .D=======================eeeeeeeeER. . . ld3 { v1.d, v2.d, v3.d }[0], [x27], #24 -# CHECK-NEXT: [0,7] .D========================eE------R. . . add x0, x27, #1 -# CHECK-NEXT: [0,8] . D==============================eeeeeeeeER ld3 { v1.d, v2.d, v3.d }[0], [x27], x28 -# CHECK-NEXT: [0,9] . D===============================eE------R add x0, x27, #1 +# CHECK-NEXT: [0,2] .D=======eeeeeeeeER . . . . . . ld3 { v1.s, v2.s, v3.s }[0], [x27], #12 +# CHECK-NEXT: [0,3] .D========eE------R . . . . . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . D==============eeeeeeeeER . . . . ld3 { v1.s, v2.s, v3.s }[0], [x27], x28 +# CHECK-NEXT: [0,5] . D===============eE------R . . . . add x0, x27, #1 +# CHECK-NEXT: [0,6] . D=====================eeeeeeeeER. . . ld3 { v1.d, v2.d, v3.d }[0], [x27], #24 +# CHECK-NEXT: [0,7] . D======================eE------R. . . add x0, x27, #1 +# CHECK-NEXT: [0,8] . D============================eeeeeeeeER ld3 { v1.d, v2.d, v3.d }[0], [x27], x28 +# CHECK-NEXT: [0,9] . D=============================eE------R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -2620,15 +2620,15 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld3 { v1.h, v2.h, v3.h }[4], [x27], x28 # CHECK-NEXT: 1. 1 2.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 9.0 0.0 0.0 ld3 { v1.s, v2.s, v3.s }[0], [x27], #12 -# CHECK-NEXT: 3. 1 10.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 16.0 0.0 0.0 ld3 { v1.s, v2.s, v3.s }[0], [x27], x28 -# CHECK-NEXT: 5. 1 17.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 24.0 0.0 0.0 ld3 { v1.d, v2.d, v3.d }[0], [x27], #24 -# CHECK-NEXT: 7. 1 25.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 31.0 0.0 0.0 ld3 { v1.d, v2.d, v3.d }[0], [x27], x28 -# CHECK-NEXT: 9. 1 32.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 1 16.7 0.1 3.0 +# CHECK-NEXT: 2. 1 8.0 0.0 0.0 ld3 { v1.s, v2.s, v3.s }[0], [x27], #12 +# CHECK-NEXT: 3. 1 9.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 15.0 0.0 0.0 ld3 { v1.s, v2.s, v3.s }[0], [x27], x28 +# CHECK-NEXT: 5. 1 16.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 22.0 0.0 0.0 ld3 { v1.d, v2.d, v3.d }[0], [x27], #24 +# CHECK-NEXT: 7. 1 23.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 29.0 0.0 0.0 ld3 { v1.d, v2.d, v3.d }[0], [x27], x28 +# CHECK-NEXT: 9. 1 30.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 1 15.5 0.1 3.0 # CHECK: [32] Code Region - G33 @@ -2637,10 +2637,10 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 510 # CHECK-NEXT: Total uOps: 3500 -# CHECK: Dispatch Width: 15 +# CHECK: Dispatch Width: 8 # CHECK-NEXT: uOps Per Cycle: 6.86 # CHECK-NEXT: IPC: 1.96 -# CHECK-NEXT: Block RThroughput: 3.8 +# CHECK-NEXT: Block RThroughput: 4.4 # CHECK: Timeline view: # CHECK-NEXT: 01234 @@ -2648,14 +2648,14 @@ add x0, x27, 1 # CHECK: [0,0] DeeeeeeeeER . ld3r { v1.1d, v2.1d, v3.1d }, [x27], #24 # CHECK-NEXT: [0,1] D=eE------R . add x0, x27, #1 -# CHECK-NEXT: [0,2] D=eeeeeeeeER . ld3r { v1.2d, v2.2d, v3.2d }, [x27], #24 -# CHECK-NEXT: [0,3] D==eE------R . add x0, x27, #1 -# CHECK-NEXT: [0,4] .D=eeeeeeeeER . ld3r { v1.2s, v2.2s, v3.2s }, [x27], #12 -# CHECK-NEXT: [0,5] .D==eE------R . add x0, x27, #1 -# CHECK-NEXT: [0,6] .D==eeeeeeeeER. ld3r { v1.4h, v2.4h, v3.4h }, [x27], #6 -# CHECK-NEXT: [0,7] .D===eE------R. add x0, x27, #1 -# CHECK-NEXT: [0,8] . D==eeeeeeeeER ld3r { v1.4s, v2.4s, v3.4s }, [x27], #12 -# CHECK-NEXT: [0,9] . D===eE------R add x0, x27, #1 +# CHECK-NEXT: [0,2] .DeeeeeeeeER . ld3r { v1.2d, v2.2d, v3.2d }, [x27], #24 +# CHECK-NEXT: [0,3] .D=eE------R . add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeeeeeeeER . ld3r { v1.2s, v2.2s, v3.2s }, [x27], #12 +# CHECK-NEXT: [0,5] . D=eE------R . add x0, x27, #1 +# CHECK-NEXT: [0,6] . DeeeeeeeeER. ld3r { v1.4h, v2.4h, v3.4h }, [x27], #6 +# CHECK-NEXT: [0,7] . D=eE------R. add x0, x27, #1 +# CHECK-NEXT: [0,8] . DeeeeeeeeER ld3r { v1.4s, v2.4s, v3.4s }, [x27], #12 +# CHECK-NEXT: [0,9] . D=eE------R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -2666,15 +2666,15 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld3r { v1.1d, v2.1d, v3.1d }, [x27], #24 # CHECK-NEXT: 1. 1 2.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld3r { v1.2d, v2.2d, v3.2d }, [x27], #24 -# CHECK-NEXT: 3. 1 3.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 2.0 0.0 0.0 ld3r { v1.2s, v2.2s, v3.2s }, [x27], #12 -# CHECK-NEXT: 5. 1 3.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ld3r { v1.4h, v2.4h, v3.4h }, [x27], #6 -# CHECK-NEXT: 7. 1 4.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 3.0 0.0 0.0 ld3r { v1.4s, v2.4s, v3.4s }, [x27], #12 -# CHECK-NEXT: 9. 1 4.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 1 2.7 0.1 3.0 +# CHECK-NEXT: 2. 1 1.0 0.0 0.0 ld3r { v1.2d, v2.2d, v3.2d }, [x27], #24 +# CHECK-NEXT: 3. 1 2.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 0.0 0.0 ld3r { v1.2s, v2.2s, v3.2s }, [x27], #12 +# CHECK-NEXT: 5. 1 2.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 0.0 0.0 ld3r { v1.4h, v2.4h, v3.4h }, [x27], #6 +# CHECK-NEXT: 7. 1 2.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 0.0 0.0 ld3r { v1.4s, v2.4s, v3.4s }, [x27], #12 +# CHECK-NEXT: 9. 1 2.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.5 0.1 3.0 # CHECK: [33] Code Region - G34 @@ -2683,10 +2683,10 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 510 # CHECK-NEXT: Total uOps: 3500 -# CHECK: Dispatch Width: 15 +# CHECK: Dispatch Width: 8 # CHECK-NEXT: uOps Per Cycle: 6.86 # CHECK-NEXT: IPC: 1.96 -# CHECK-NEXT: Block RThroughput: 3.8 +# CHECK-NEXT: Block RThroughput: 4.4 # CHECK: Timeline view: # CHECK-NEXT: 01234 @@ -2694,14 +2694,14 @@ add x0, x27, 1 # CHECK: [0,0] DeeeeeeeeER . ld3r { v1.8b, v2.8b, v3.8b }, [x27], #3 # CHECK-NEXT: [0,1] D=eE------R . add x0, x27, #1 -# CHECK-NEXT: [0,2] D=eeeeeeeeER . ld3r { v1.8h, v2.8h, v3.8h }, [x27], #6 -# CHECK-NEXT: [0,3] D==eE------R . add x0, x27, #1 -# CHECK-NEXT: [0,4] .D=eeeeeeeeER . ld3r { v1.16b, v2.16b, v3.16b }, [x27], #3 -# CHECK-NEXT: [0,5] .D==eE------R . add x0, x27, #1 -# CHECK-NEXT: [0,6] .D==eeeeeeeeER. ld3r { v1.1d, v2.1d, v3.1d }, [x27], x28 -# CHECK-NEXT: [0,7] .D===eE------R. add x0, x27, #1 -# CHECK-NEXT: [0,8] . D==eeeeeeeeER ld3r { v1.2d, v2.2d, v3.2d }, [x27], x28 -# CHECK-NEXT: [0,9] . D===eE------R add x0, x27, #1 +# CHECK-NEXT: [0,2] .DeeeeeeeeER . ld3r { v1.8h, v2.8h, v3.8h }, [x27], #6 +# CHECK-NEXT: [0,3] .D=eE------R . add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeeeeeeeER . ld3r { v1.16b, v2.16b, v3.16b }, [x27], #3 +# CHECK-NEXT: [0,5] . D=eE------R . add x0, x27, #1 +# CHECK-NEXT: [0,6] . DeeeeeeeeER. ld3r { v1.1d, v2.1d, v3.1d }, [x27], x28 +# CHECK-NEXT: [0,7] . D=eE------R. add x0, x27, #1 +# CHECK-NEXT: [0,8] . DeeeeeeeeER ld3r { v1.2d, v2.2d, v3.2d }, [x27], x28 +# CHECK-NEXT: [0,9] . D=eE------R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -2712,15 +2712,15 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld3r { v1.8b, v2.8b, v3.8b }, [x27], #3 # CHECK-NEXT: 1. 1 2.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld3r { v1.8h, v2.8h, v3.8h }, [x27], #6 -# CHECK-NEXT: 3. 1 3.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 2.0 0.0 0.0 ld3r { v1.16b, v2.16b, v3.16b }, [x27], #3 -# CHECK-NEXT: 5. 1 3.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ld3r { v1.1d, v2.1d, v3.1d }, [x27], x28 -# CHECK-NEXT: 7. 1 4.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 3.0 0.0 0.0 ld3r { v1.2d, v2.2d, v3.2d }, [x27], x28 -# CHECK-NEXT: 9. 1 4.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 1 2.7 0.1 3.0 +# CHECK-NEXT: 2. 1 1.0 0.0 0.0 ld3r { v1.8h, v2.8h, v3.8h }, [x27], #6 +# CHECK-NEXT: 3. 1 2.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 0.0 0.0 ld3r { v1.16b, v2.16b, v3.16b }, [x27], #3 +# CHECK-NEXT: 5. 1 2.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 0.0 0.0 ld3r { v1.1d, v2.1d, v3.1d }, [x27], x28 +# CHECK-NEXT: 7. 1 2.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 0.0 0.0 ld3r { v1.2d, v2.2d, v3.2d }, [x27], x28 +# CHECK-NEXT: 9. 1 2.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.5 0.1 3.0 # CHECK: [34] Code Region - G35 @@ -2729,10 +2729,10 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 510 # CHECK-NEXT: Total uOps: 3500 -# CHECK: Dispatch Width: 15 +# CHECK: Dispatch Width: 8 # CHECK-NEXT: uOps Per Cycle: 6.86 # CHECK-NEXT: IPC: 1.96 -# CHECK-NEXT: Block RThroughput: 3.8 +# CHECK-NEXT: Block RThroughput: 4.4 # CHECK: Timeline view: # CHECK-NEXT: 01234 @@ -2740,14 +2740,14 @@ add x0, x27, 1 # CHECK: [0,0] DeeeeeeeeER . ld3r { v1.2s, v2.2s, v3.2s }, [x27], x28 # CHECK-NEXT: [0,1] D=eE------R . add x0, x27, #1 -# CHECK-NEXT: [0,2] D=eeeeeeeeER . ld3r { v1.4h, v2.4h, v3.4h }, [x27], x28 -# CHECK-NEXT: [0,3] D==eE------R . add x0, x27, #1 -# CHECK-NEXT: [0,4] .D=eeeeeeeeER . ld3r { v1.4s, v2.4s, v3.4s }, [x27], x28 -# CHECK-NEXT: [0,5] .D==eE------R . add x0, x27, #1 -# CHECK-NEXT: [0,6] .D==eeeeeeeeER. ld3r { v1.8b, v2.8b, v3.8b }, [x27], x28 -# CHECK-NEXT: [0,7] .D===eE------R. add x0, x27, #1 -# CHECK-NEXT: [0,8] . D==eeeeeeeeER ld3r { v1.8h, v2.8h, v3.8h }, [x27], x28 -# CHECK-NEXT: [0,9] . D===eE------R add x0, x27, #1 +# CHECK-NEXT: [0,2] .DeeeeeeeeER . ld3r { v1.4h, v2.4h, v3.4h }, [x27], x28 +# CHECK-NEXT: [0,3] .D=eE------R . add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeeeeeeeER . ld3r { v1.4s, v2.4s, v3.4s }, [x27], x28 +# CHECK-NEXT: [0,5] . D=eE------R . add x0, x27, #1 +# CHECK-NEXT: [0,6] . DeeeeeeeeER. ld3r { v1.8b, v2.8b, v3.8b }, [x27], x28 +# CHECK-NEXT: [0,7] . D=eE------R. add x0, x27, #1 +# CHECK-NEXT: [0,8] . DeeeeeeeeER ld3r { v1.8h, v2.8h, v3.8h }, [x27], x28 +# CHECK-NEXT: [0,9] . D=eE------R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -2758,42 +2758,42 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld3r { v1.2s, v2.2s, v3.2s }, [x27], x28 # CHECK-NEXT: 1. 1 2.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld3r { v1.4h, v2.4h, v3.4h }, [x27], x28 -# CHECK-NEXT: 3. 1 3.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 2.0 0.0 0.0 ld3r { v1.4s, v2.4s, v3.4s }, [x27], x28 -# CHECK-NEXT: 5. 1 3.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ld3r { v1.8b, v2.8b, v3.8b }, [x27], x28 -# CHECK-NEXT: 7. 1 4.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 3.0 0.0 0.0 ld3r { v1.8h, v2.8h, v3.8h }, [x27], x28 -# CHECK-NEXT: 9. 1 4.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 1 2.7 0.1 3.0 +# CHECK-NEXT: 2. 1 1.0 0.0 0.0 ld3r { v1.4h, v2.4h, v3.4h }, [x27], x28 +# CHECK-NEXT: 3. 1 2.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 0.0 0.0 ld3r { v1.4s, v2.4s, v3.4s }, [x27], x28 +# CHECK-NEXT: 5. 1 2.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 0.0 0.0 ld3r { v1.8b, v2.8b, v3.8b }, [x27], x28 +# CHECK-NEXT: 7. 1 2.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 0.0 0.0 ld3r { v1.8h, v2.8h, v3.8h }, [x27], x28 +# CHECK-NEXT: 9. 1 2.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.5 0.1 3.0 # CHECK: [35] Code Region - G36 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 611 +# CHECK-NEXT: Total Cycles: 910 # CHECK-NEXT: Total uOps: 4500 -# CHECK: Dispatch Width: 15 -# CHECK-NEXT: uOps Per Cycle: 7.36 -# CHECK-NEXT: IPC: 1.64 -# CHECK-NEXT: Block RThroughput: 5.3 +# CHECK: Dispatch Width: 8 +# CHECK-NEXT: uOps Per Cycle: 4.95 +# CHECK-NEXT: IPC: 1.10 +# CHECK-NEXT: Block RThroughput: 5.6 # CHECK: Timeline view: -# CHECK-NEXT: 0123456 +# CHECK-NEXT: 012345678 # CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeeeeeeER .. ld3r { v1.16b, v2.16b, v3.16b }, [x27], x28 -# CHECK-NEXT: [0,1] D=eE------R .. add x0, x27, #1 -# CHECK-NEXT: [0,2] .DeeeeeeeeeER .. ld4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64 -# CHECK-NEXT: [0,3] .D=eE-------R .. add x0, x27, #1 -# CHECK-NEXT: [0,4] . DeeeeeeeeER .. ld4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32 -# CHECK-NEXT: [0,5] . D=eE------R .. add x0, x27, #1 -# CHECK-NEXT: [0,6] . DeeeeeeeeER .. ld4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32 -# CHECK-NEXT: [0,7] . D=eE------R .. add x0, x27, #1 -# CHECK-NEXT: [0,8] . D=eeeeeeeeeER ld4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64 -# CHECK-NEXT: [0,9] . D==eE-------R add x0, x27, #1 +# CHECK: [0,0] DeeeeeeeeER . . ld3r { v1.16b, v2.16b, v3.16b }, [x27], x28 +# CHECK-NEXT: [0,1] D=eE------R . . add x0, x27, #1 +# CHECK-NEXT: [0,2] .DeeeeeeeeeER . . ld4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64 +# CHECK-NEXT: [0,3] . DeE-------R . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeeeeeeeER . . ld4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32 +# CHECK-NEXT: [0,5] . DeE------R . . add x0, x27, #1 +# CHECK-NEXT: [0,6] . DeeeeeeeeER . ld4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32 +# CHECK-NEXT: [0,7] . .DeE------R . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . DeeeeeeeeeER ld4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64 +# CHECK-NEXT: [0,9] . . DeE-------R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -2805,41 +2805,41 @@ add x0, x27, 1 # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld3r { v1.16b, v2.16b, v3.16b }, [x27], x28 # CHECK-NEXT: 1. 1 2.0 0.0 6.0 add x0, x27, #1 # CHECK-NEXT: 2. 1 1.0 0.0 0.0 ld4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64 -# CHECK-NEXT: 3. 1 2.0 0.0 7.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 1.0 0.0 0.0 ld4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32 -# CHECK-NEXT: 5. 1 2.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 1.0 0.0 0.0 ld4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32 -# CHECK-NEXT: 7. 1 2.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 2.0 1.0 0.0 ld4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64 -# CHECK-NEXT: 9. 1 3.0 0.0 7.0 add x0, x27, #1 -# CHECK-NEXT: 1 1.7 0.2 3.2 +# CHECK-NEXT: 3. 1 1.0 0.0 7.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 1.0 0.0 ld4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32 +# CHECK-NEXT: 5. 1 1.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 1.0 0.0 ld4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32 +# CHECK-NEXT: 7. 1 1.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 1.0 0.0 ld4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64 +# CHECK-NEXT: 9. 1 1.0 0.0 7.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.1 0.4 3.2 # CHECK: [36] Code Region - G37 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 610 +# CHECK-NEXT: Total Cycles: 1009 # CHECK-NEXT: Total uOps: 4800 -# CHECK: Dispatch Width: 15 -# CHECK-NEXT: uOps Per Cycle: 7.87 -# CHECK-NEXT: IPC: 1.64 +# CHECK: Dispatch Width: 8 +# CHECK-NEXT: uOps Per Cycle: 4.76 +# CHECK-NEXT: IPC: 0.99 # CHECK-NEXT: Block RThroughput: 6.0 # CHECK: Timeline view: -# CHECK-NEXT: 012345 +# CHECK-NEXT: 012345678 # CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeeeeeeER . ld4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32 -# CHECK-NEXT: [0,1] D=eE------R . add x0, x27, #1 -# CHECK-NEXT: [0,2] .DeeeeeeeeeER . ld4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64 -# CHECK-NEXT: [0,3] .D=eE-------R . add x0, x27, #1 -# CHECK-NEXT: [0,4] . DeeeeeeeeeER . ld4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64 -# CHECK-NEXT: [0,5] . D=eE-------R . add x0, x27, #1 -# CHECK-NEXT: [0,6] . DeeeeeeeeeER. ld4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28 -# CHECK-NEXT: [0,7] . D=eE-------R. add x0, x27, #1 -# CHECK-NEXT: [0,8] . D=eeeeeeeeER ld4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28 -# CHECK-NEXT: [0,9] . D==eE------R add x0, x27, #1 +# CHECK: [0,0] DeeeeeeeeER . . ld4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32 +# CHECK-NEXT: [0,1] .DeE------R . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeeeeeeeeER . . ld4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64 +# CHECK-NEXT: [0,3] . DeE-------R . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeeeeeeeeER . ld4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64 +# CHECK-NEXT: [0,5] . DeE-------R . add x0, x27, #1 +# CHECK-NEXT: [0,6] . .DeeeeeeeeeER. ld4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28 +# CHECK-NEXT: [0,7] . . DeE-------R. add x0, x27, #1 +# CHECK-NEXT: [0,8] . . DeeeeeeeeER ld4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28 +# CHECK-NEXT: [0,9] . . DeE------R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -2849,43 +2849,43 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32 -# CHECK-NEXT: 1. 1 2.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 1.0 0.0 0.0 ld4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64 -# CHECK-NEXT: 3. 1 2.0 0.0 7.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 1.0 0.0 0.0 ld4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64 -# CHECK-NEXT: 5. 1 2.0 0.0 7.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 1.0 0.0 0.0 ld4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28 -# CHECK-NEXT: 7. 1 2.0 0.0 7.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 2.0 1.0 0.0 ld4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28 -# CHECK-NEXT: 9. 1 3.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 1 1.7 0.2 3.3 +# CHECK-NEXT: 1. 1 1.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 1.0 1.0 0.0 ld4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64 +# CHECK-NEXT: 3. 1 1.0 0.0 7.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 1.0 0.0 ld4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64 +# CHECK-NEXT: 5. 1 1.0 0.0 7.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 1.0 0.0 ld4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28 +# CHECK-NEXT: 7. 1 1.0 0.0 7.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 1.0 0.0 ld4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28 +# CHECK-NEXT: 9. 1 1.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.0 0.5 3.3 # CHECK: [37] Code Region - G38 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 660 +# CHECK-NEXT: Total Cycles: 1010 # CHECK-NEXT: Total uOps: 4800 -# CHECK: Dispatch Width: 15 -# CHECK-NEXT: uOps Per Cycle: 7.27 -# CHECK-NEXT: IPC: 1.52 +# CHECK: Dispatch Width: 8 +# CHECK-NEXT: uOps Per Cycle: 4.75 +# CHECK-NEXT: IPC: 0.99 # CHECK-NEXT: Block RThroughput: 6.0 # CHECK: Timeline view: -# CHECK-NEXT: 0123456 +# CHECK-NEXT: 0123456789 # CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeeeeeeER .. ld4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28 -# CHECK-NEXT: [0,1] D=eE------R .. add x0, x27, #1 -# CHECK-NEXT: [0,2] .DeeeeeeeeeER .. ld4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28 -# CHECK-NEXT: [0,3] .D=eE-------R .. add x0, x27, #1 -# CHECK-NEXT: [0,4] . DeeeeeeeeER .. ld4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28 -# CHECK-NEXT: [0,5] . D=eE------R .. add x0, x27, #1 -# CHECK-NEXT: [0,6] . DeeeeeeeeeER.. ld4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28 -# CHECK-NEXT: [0,7] . D=eE-------R.. add x0, x27, #1 -# CHECK-NEXT: [0,8] . D=eeeeeeeeeER ld4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28 -# CHECK-NEXT: [0,9] . D==eE-------R add x0, x27, #1 +# CHECK: [0,0] DeeeeeeeeER . . ld4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28 +# CHECK-NEXT: [0,1] .DeE------R . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeeeeeeeeER . . ld4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28 +# CHECK-NEXT: [0,3] . DeE-------R . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeeeeeeeER. . ld4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28 +# CHECK-NEXT: [0,5] . DeE------R. . add x0, x27, #1 +# CHECK-NEXT: [0,6] . .DeeeeeeeeeER . ld4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28 +# CHECK-NEXT: [0,7] . . DeE-------R . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . DeeeeeeeeeER ld4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28 +# CHECK-NEXT: [0,9] . . DeE-------R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -2895,16 +2895,16 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28 -# CHECK-NEXT: 1. 1 2.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 1.0 0.0 0.0 ld4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28 -# CHECK-NEXT: 3. 1 2.0 0.0 7.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 1.0 0.0 0.0 ld4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28 -# CHECK-NEXT: 5. 1 2.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 1.0 0.0 0.0 ld4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28 -# CHECK-NEXT: 7. 1 2.0 0.0 7.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 2.0 1.0 0.0 ld4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28 -# CHECK-NEXT: 9. 1 3.0 0.0 7.0 add x0, x27, #1 -# CHECK-NEXT: 1 1.7 0.2 3.3 +# CHECK-NEXT: 1. 1 1.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 1.0 1.0 0.0 ld4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28 +# CHECK-NEXT: 3. 1 1.0 0.0 7.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 1.0 0.0 ld4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28 +# CHECK-NEXT: 5. 1 1.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 1.0 0.0 ld4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28 +# CHECK-NEXT: 7. 1 1.0 0.0 7.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 1.0 0.0 ld4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28 +# CHECK-NEXT: 9. 1 1.0 0.0 7.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.0 0.5 3.3 # CHECK: [38] Code Region - G39 @@ -2913,25 +2913,25 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 4003 # CHECK-NEXT: Total uOps: 4500 -# CHECK: Dispatch Width: 15 +# CHECK: Dispatch Width: 8 # CHECK-NEXT: uOps Per Cycle: 1.12 # CHECK-NEXT: IPC: 0.25 -# CHECK-NEXT: Block RThroughput: 5.0 +# CHECK-NEXT: Block RThroughput: 5.6 # CHECK: Timeline view: # CHECK-NEXT: 0123456789 0123456789 # CHECK-NEXT: Index 0123456789 0123456789 012 # CHECK: [0,0] DeeeeeeeeER . . . . . . . ld4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], #4 -# CHECK-NEXT: [0,1] D=eE------R . . . . . . . add x0, x27, #1 -# CHECK-NEXT: [0,2] .D=======eeeeeeeeER . . . . . . ld4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], #4 -# CHECK-NEXT: [0,3] .D========eE------R . . . . . . add x0, x27, #1 -# CHECK-NEXT: [0,4] . D==============eeeeeeeeER . . . . ld4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], x28 -# CHECK-NEXT: [0,5] . D===============eE------R . . . . add x0, x27, #1 -# CHECK-NEXT: [0,6] . D=====================eeeeeeeeER. . . ld4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], x28 -# CHECK-NEXT: [0,7] . D======================eE------R. . . add x0, x27, #1 -# CHECK-NEXT: [0,8] . D============================eeeeeeeeER ld4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], #8 -# CHECK-NEXT: [0,9] . D=============================eE------R add x0, x27, #1 +# CHECK-NEXT: [0,1] .DeE------R . . . . . . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . D======eeeeeeeeER . . . . . . ld4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], #4 +# CHECK-NEXT: [0,3] . D======eE------R . . . . . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . D============eeeeeeeeER . . . . ld4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], x28 +# CHECK-NEXT: [0,5] . D============eE------R . . . . add x0, x27, #1 +# CHECK-NEXT: [0,6] . .D==================eeeeeeeeER. . . ld4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], x28 +# CHECK-NEXT: [0,7] . . D==================eE------R. . . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . D========================eeeeeeeeER ld4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], #8 +# CHECK-NEXT: [0,9] . . D========================eE------R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -2941,16 +2941,16 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], #4 -# CHECK-NEXT: 1. 1 2.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 8.0 0.0 0.0 ld4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], #4 -# CHECK-NEXT: 3. 1 9.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 15.0 0.0 0.0 ld4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], x28 -# CHECK-NEXT: 5. 1 16.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 22.0 0.0 0.0 ld4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], x28 -# CHECK-NEXT: 7. 1 23.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 29.0 0.0 0.0 ld4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], #8 -# CHECK-NEXT: 9. 1 30.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 1 15.5 0.1 3.0 +# CHECK-NEXT: 1. 1 1.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 7.0 0.0 0.0 ld4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], #4 +# CHECK-NEXT: 3. 1 7.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 13.0 0.0 0.0 ld4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], x28 +# CHECK-NEXT: 5. 1 13.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 19.0 0.0 0.0 ld4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], x28 +# CHECK-NEXT: 7. 1 19.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 25.0 0.0 0.0 ld4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], #8 +# CHECK-NEXT: 9. 1 25.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 1 13.0 0.1 3.0 # CHECK: [39] Code Region - G40 @@ -2959,25 +2959,25 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 4003 # CHECK-NEXT: Total uOps: 4500 -# CHECK: Dispatch Width: 15 +# CHECK: Dispatch Width: 8 # CHECK-NEXT: uOps Per Cycle: 1.12 # CHECK-NEXT: IPC: 0.25 -# CHECK-NEXT: Block RThroughput: 5.0 +# CHECK-NEXT: Block RThroughput: 5.6 # CHECK: Timeline view: # CHECK-NEXT: 0123456789 0123456789 # CHECK-NEXT: Index 0123456789 0123456789 012 # CHECK: [0,0] DeeeeeeeeER . . . . . . . ld4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], #8 -# CHECK-NEXT: [0,1] D=eE------R . . . . . . . add x0, x27, #1 -# CHECK-NEXT: [0,2] .D=======eeeeeeeeER . . . . . . ld4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], x28 -# CHECK-NEXT: [0,3] .D========eE------R . . . . . . add x0, x27, #1 -# CHECK-NEXT: [0,4] . D==============eeeeeeeeER . . . . ld4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], x28 -# CHECK-NEXT: [0,5] . D===============eE------R . . . . add x0, x27, #1 -# CHECK-NEXT: [0,6] . D=====================eeeeeeeeER. . . ld4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], #16 -# CHECK-NEXT: [0,7] . D======================eE------R. . . add x0, x27, #1 -# CHECK-NEXT: [0,8] . D============================eeeeeeeeER ld4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], x28 -# CHECK-NEXT: [0,9] . D=============================eE------R add x0, x27, #1 +# CHECK-NEXT: [0,1] .DeE------R . . . . . . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . D======eeeeeeeeER . . . . . . ld4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], x28 +# CHECK-NEXT: [0,3] . D======eE------R . . . . . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . D============eeeeeeeeER . . . . ld4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], x28 +# CHECK-NEXT: [0,5] . D============eE------R . . . . add x0, x27, #1 +# CHECK-NEXT: [0,6] . .D==================eeeeeeeeER. . . ld4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], #16 +# CHECK-NEXT: [0,7] . . D==================eE------R. . . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . D========================eeeeeeeeER ld4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], x28 +# CHECK-NEXT: [0,9] . . D========================eE------R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -2987,16 +2987,16 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], #8 -# CHECK-NEXT: 1. 1 2.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 8.0 0.0 0.0 ld4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], x28 -# CHECK-NEXT: 3. 1 9.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 15.0 0.0 0.0 ld4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], x28 -# CHECK-NEXT: 5. 1 16.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 22.0 0.0 0.0 ld4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], #16 -# CHECK-NEXT: 7. 1 23.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 29.0 0.0 0.0 ld4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], x28 -# CHECK-NEXT: 9. 1 30.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 1 15.5 0.1 3.0 +# CHECK-NEXT: 1. 1 1.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 7.0 0.0 0.0 ld4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], x28 +# CHECK-NEXT: 3. 1 7.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 13.0 0.0 0.0 ld4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], x28 +# CHECK-NEXT: 5. 1 13.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 19.0 0.0 0.0 ld4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], #16 +# CHECK-NEXT: 7. 1 19.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 25.0 0.0 0.0 ld4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], x28 +# CHECK-NEXT: 9. 1 25.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 1 13.0 0.1 3.0 # CHECK: [40] Code Region - G41 @@ -3005,25 +3005,25 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 1903 # CHECK-NEXT: Total uOps: 4500 -# CHECK: Dispatch Width: 15 +# CHECK: Dispatch Width: 8 # CHECK-NEXT: uOps Per Cycle: 2.36 # CHECK-NEXT: IPC: 0.53 -# CHECK-NEXT: Block RThroughput: 5.0 +# CHECK-NEXT: Block RThroughput: 5.6 # CHECK: Timeline view: # CHECK-NEXT: 0123456789 # CHECK-NEXT: Index 0123456789 01 # CHECK: [0,0] DeeeeeeeeER . .. ld4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], #32 -# CHECK-NEXT: [0,1] D=eE------R . .. add x0, x27, #1 -# CHECK-NEXT: [0,2] .D=======eeeeeeeeER .. ld4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], x28 -# CHECK-NEXT: [0,3] .D========eE------R .. add x0, x27, #1 -# CHECK-NEXT: [0,4] . D=======eeeeeeeeER.. ld4r { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32 -# CHECK-NEXT: [0,5] . D========eE------R.. add x0, x27, #1 -# CHECK-NEXT: [0,6] . D=======eeeeeeeeER. ld4r { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #32 -# CHECK-NEXT: [0,7] . D========eE------R. add x0, x27, #1 -# CHECK-NEXT: [0,8] . D=======eeeeeeeeER ld4r { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #16 -# CHECK-NEXT: [0,9] . D========eE------R add x0, x27, #1 +# CHECK-NEXT: [0,1] .DeE------R . .. add x0, x27, #1 +# CHECK-NEXT: [0,2] . D======eeeeeeeeER .. ld4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], x28 +# CHECK-NEXT: [0,3] . D======eE------R .. add x0, x27, #1 +# CHECK-NEXT: [0,4] . D=====eeeeeeeeER.. ld4r { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32 +# CHECK-NEXT: [0,5] . D=====eE------R.. add x0, x27, #1 +# CHECK-NEXT: [0,6] . .D====eeeeeeeeER. ld4r { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #32 +# CHECK-NEXT: [0,7] . . D====eE------R. add x0, x27, #1 +# CHECK-NEXT: [0,8] . . D===eeeeeeeeER ld4r { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #16 +# CHECK-NEXT: [0,9] . . D===eE------R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -3033,43 +3033,43 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], #32 -# CHECK-NEXT: 1. 1 2.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 8.0 0.0 0.0 ld4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], x28 -# CHECK-NEXT: 3. 1 9.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 8.0 0.0 0.0 ld4r { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32 -# CHECK-NEXT: 5. 1 9.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 8.0 0.0 0.0 ld4r { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #32 -# CHECK-NEXT: 7. 1 9.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 8.0 0.0 0.0 ld4r { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #16 -# CHECK-NEXT: 9. 1 9.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 1 7.1 0.1 3.0 +# CHECK-NEXT: 1. 1 1.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 7.0 0.0 0.0 ld4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], x28 +# CHECK-NEXT: 3. 1 7.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 6.0 0.0 0.0 ld4r { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32 +# CHECK-NEXT: 5. 1 6.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 5.0 0.0 0.0 ld4r { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #32 +# CHECK-NEXT: 7. 1 5.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 4.0 0.0 0.0 ld4r { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #16 +# CHECK-NEXT: 9. 1 4.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 1 4.6 0.1 3.0 # CHECK: [41] Code Region - G42 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 510 +# CHECK-NEXT: Total Cycles: 1009 # CHECK-NEXT: Total uOps: 4500 -# CHECK: Dispatch Width: 15 -# CHECK-NEXT: uOps Per Cycle: 8.82 -# CHECK-NEXT: IPC: 1.96 -# CHECK-NEXT: Block RThroughput: 5.0 +# CHECK: Dispatch Width: 8 +# CHECK-NEXT: uOps Per Cycle: 4.46 +# CHECK-NEXT: IPC: 0.99 +# CHECK-NEXT: Block RThroughput: 5.6 # CHECK: Timeline view: -# CHECK-NEXT: 01234 +# CHECK-NEXT: 012345678 # CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeeeeeeER . ld4r { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #8 -# CHECK-NEXT: [0,1] D=eE------R . add x0, x27, #1 -# CHECK-NEXT: [0,2] .DeeeeeeeeER . ld4r { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #16 -# CHECK-NEXT: [0,3] .D=eE------R . add x0, x27, #1 -# CHECK-NEXT: [0,4] . DeeeeeeeeER . ld4r { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #4 -# CHECK-NEXT: [0,5] . D=eE------R . add x0, x27, #1 -# CHECK-NEXT: [0,6] . DeeeeeeeeER. ld4r { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #8 -# CHECK-NEXT: [0,7] . D=eE------R. add x0, x27, #1 -# CHECK-NEXT: [0,8] . DeeeeeeeeER ld4r { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #4 -# CHECK-NEXT: [0,9] . D=eE------R add x0, x27, #1 +# CHECK: [0,0] DeeeeeeeeER . . ld4r { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #8 +# CHECK-NEXT: [0,1] .DeE------R . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeeeeeeeER . . ld4r { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #16 +# CHECK-NEXT: [0,3] . DeE------R . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeeeeeeeER. . ld4r { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #4 +# CHECK-NEXT: [0,5] . DeE------R. . add x0, x27, #1 +# CHECK-NEXT: [0,6] . .DeeeeeeeeER . ld4r { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #8 +# CHECK-NEXT: [0,7] . . DeE------R . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . DeeeeeeeeER ld4r { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #4 +# CHECK-NEXT: [0,9] . . DeE------R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -3078,44 +3078,44 @@ add x0, x27, 1 # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage # CHECK: [0] [1] [2] [3] -# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld4r { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #8 -# CHECK-NEXT: 1. 1 2.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 1.0 0.0 0.0 ld4r { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #16 -# CHECK-NEXT: 3. 1 2.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 1.0 0.0 0.0 ld4r { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #4 -# CHECK-NEXT: 5. 1 2.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 1.0 0.0 0.0 ld4r { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #8 -# CHECK-NEXT: 7. 1 2.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 1.0 0.0 0.0 ld4r { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #4 -# CHECK-NEXT: 9. 1 2.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 1 1.5 0.1 3.0 +# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld4r { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #8 +# CHECK-NEXT: 1. 1 1.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 1.0 1.0 0.0 ld4r { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #16 +# CHECK-NEXT: 3. 1 1.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 1.0 0.0 ld4r { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #4 +# CHECK-NEXT: 5. 1 1.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 1.0 0.0 ld4r { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #8 +# CHECK-NEXT: 7. 1 1.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 1.0 0.0 ld4r { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #4 +# CHECK-NEXT: 9. 1 1.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.0 0.5 3.0 # CHECK: [42] Code Region - G43 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 510 +# CHECK-NEXT: Total Cycles: 1009 # CHECK-NEXT: Total uOps: 4500 -# CHECK: Dispatch Width: 15 -# CHECK-NEXT: uOps Per Cycle: 8.82 -# CHECK-NEXT: IPC: 1.96 -# CHECK-NEXT: Block RThroughput: 5.0 +# CHECK: Dispatch Width: 8 +# CHECK-NEXT: uOps Per Cycle: 4.46 +# CHECK-NEXT: IPC: 0.99 +# CHECK-NEXT: Block RThroughput: 5.6 # CHECK: Timeline view: -# CHECK-NEXT: 01234 +# CHECK-NEXT: 012345678 # CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeeeeeeER . ld4r { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28 -# CHECK-NEXT: [0,1] D=eE------R . add x0, x27, #1 -# CHECK-NEXT: [0,2] .DeeeeeeeeER . ld4r { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28 -# CHECK-NEXT: [0,3] .D=eE------R . add x0, x27, #1 -# CHECK-NEXT: [0,4] . DeeeeeeeeER . ld4r { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28 -# CHECK-NEXT: [0,5] . D=eE------R . add x0, x27, #1 -# CHECK-NEXT: [0,6] . DeeeeeeeeER. ld4r { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28 -# CHECK-NEXT: [0,7] . D=eE------R. add x0, x27, #1 -# CHECK-NEXT: [0,8] . DeeeeeeeeER ld4r { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28 -# CHECK-NEXT: [0,9] . D=eE------R add x0, x27, #1 +# CHECK: [0,0] DeeeeeeeeER . . ld4r { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28 +# CHECK-NEXT: [0,1] .DeE------R . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeeeeeeeER . . ld4r { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28 +# CHECK-NEXT: [0,3] . DeE------R . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeeeeeeeER. . ld4r { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28 +# CHECK-NEXT: [0,5] . DeE------R. . add x0, x27, #1 +# CHECK-NEXT: [0,6] . .DeeeeeeeeER . ld4r { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28 +# CHECK-NEXT: [0,7] . . DeE------R . add x0, x27, #1 +# CHECK-NEXT: [0,8] . . DeeeeeeeeER ld4r { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28 +# CHECK-NEXT: [0,9] . . DeE------R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -3125,43 +3125,43 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld4r { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28 -# CHECK-NEXT: 1. 1 2.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 1.0 0.0 0.0 ld4r { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28 -# CHECK-NEXT: 3. 1 2.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 1.0 0.0 0.0 ld4r { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28 -# CHECK-NEXT: 5. 1 2.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 1.0 0.0 0.0 ld4r { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28 -# CHECK-NEXT: 7. 1 2.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 1.0 0.0 0.0 ld4r { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28 -# CHECK-NEXT: 9. 1 2.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 1 1.5 0.1 3.0 +# CHECK-NEXT: 1. 1 1.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 1.0 1.0 0.0 ld4r { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28 +# CHECK-NEXT: 3. 1 1.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 1.0 0.0 ld4r { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28 +# CHECK-NEXT: 5. 1 1.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 1.0 0.0 ld4r { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28 +# CHECK-NEXT: 7. 1 1.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 1.0 0.0 ld4r { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28 +# CHECK-NEXT: 9. 1 1.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.0 0.5 3.0 # CHECK: [43] Code Region - G44 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 508 +# CHECK-NEXT: Total Cycles: 609 # CHECK-NEXT: Total uOps: 3300 -# CHECK: Dispatch Width: 15 -# CHECK-NEXT: uOps Per Cycle: 6.50 -# CHECK-NEXT: IPC: 1.97 -# CHECK-NEXT: Block RThroughput: 3.7 +# CHECK: Dispatch Width: 8 +# CHECK-NEXT: uOps Per Cycle: 5.42 +# CHECK-NEXT: IPC: 1.64 +# CHECK-NEXT: Block RThroughput: 4.1 # CHECK: Timeline view: -# CHECK-NEXT: 012 +# CHECK-NEXT: 01234 # CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeeeeeeER . ld4r { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28 -# CHECK-NEXT: [0,1] D=eE------R . add x0, x27, #1 -# CHECK-NEXT: [0,2] .DeeeeeeeeER. ld4r { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28 -# CHECK-NEXT: [0,3] .D=eE------R. add x0, x27, #1 -# CHECK-NEXT: [0,4] . DeeeeeeeeER ld4r { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28 -# CHECK-NEXT: [0,5] . D=eE------R add x0, x27, #1 -# CHECK-NEXT: [0,6] . D=eeeeeeE-R ldp s1, s2, [x27], #248 -# CHECK-NEXT: [0,7] . D==eE-----R add x0, x27, #1 -# CHECK-NEXT: [0,8] . D==eeeeeeER ldp d1, d2, [x27], #496 -# CHECK-NEXT: [0,9] . D===eE----R add x0, x27, #1 +# CHECK: [0,0] DeeeeeeeeER . ld4r { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28 +# CHECK-NEXT: [0,1] .DeE------R . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeeeeeeeER . ld4r { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28 +# CHECK-NEXT: [0,3] . DeE------R . add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeeeeeeeER ld4r { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28 +# CHECK-NEXT: [0,5] . DeE------R add x0, x27, #1 +# CHECK-NEXT: [0,6] . DeeeeeeE-R ldp s1, s2, [x27], #248 +# CHECK-NEXT: [0,7] . D=eE-----R add x0, x27, #1 +# CHECK-NEXT: [0,8] . D=eeeeeeER ldp d1, d2, [x27], #496 +# CHECK-NEXT: [0,9] . D==eE----R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -3171,16 +3171,16 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld4r { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28 -# CHECK-NEXT: 1. 1 2.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 1.0 0.0 0.0 ld4r { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28 -# CHECK-NEXT: 3. 1 2.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 1.0 0.0 0.0 ld4r { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28 -# CHECK-NEXT: 5. 1 2.0 0.0 6.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 2.0 0.0 1.0 ldp s1, s2, [x27], #248 -# CHECK-NEXT: 7. 1 3.0 0.0 5.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 3.0 0.0 0.0 ldp d1, d2, [x27], #496 -# CHECK-NEXT: 9. 1 4.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 1 2.1 0.1 2.8 +# CHECK-NEXT: 1. 1 1.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 1.0 1.0 0.0 ld4r { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28 +# CHECK-NEXT: 3. 1 1.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 1.0 0.0 ld4r { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28 +# CHECK-NEXT: 5. 1 1.0 0.0 6.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 0.0 1.0 ldp s1, s2, [x27], #248 +# CHECK-NEXT: 7. 1 2.0 0.0 5.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 2.0 0.0 0.0 ldp d1, d2, [x27], #496 +# CHECK-NEXT: 9. 1 3.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.4 0.3 2.8 # CHECK: [44] Code Region - G45 @@ -3189,7 +3189,7 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 507 # CHECK-NEXT: Total uOps: 1700 -# CHECK: Dispatch Width: 15 +# CHECK: Dispatch Width: 8 # CHECK-NEXT: uOps Per Cycle: 3.35 # CHECK-NEXT: IPC: 1.97 # CHECK-NEXT: Block RThroughput: 2.5 @@ -3202,12 +3202,12 @@ add x0, x27, 1 # CHECK-NEXT: [0,1] D=eE----R .. add x0, x27, #1 # CHECK-NEXT: [0,2] D=eeeeeeER.. ldp s1, s2, [x27, #248]! # CHECK-NEXT: [0,3] D==eE----R.. add x0, x27, #1 -# CHECK-NEXT: [0,4] D==eeeeeeER. ldp d1, d2, [x27, #496]! -# CHECK-NEXT: [0,5] D===eE----R. add x0, x27, #1 -# CHECK-NEXT: [0,6] D===eeeeeeER ldp q1, q2, [x27, #992]! -# CHECK-NEXT: [0,7] D====eE----R add x0, x27, #1 -# CHECK-NEXT: [0,8] .D===eeeeE-R ldp w1, w2, [x27], #248 -# CHECK-NEXT: [0,9] .D====eE---R add x0, x27, #1 +# CHECK-NEXT: [0,4] .D=eeeeeeER. ldp d1, d2, [x27, #496]! +# CHECK-NEXT: [0,5] .D==eE----R. add x0, x27, #1 +# CHECK-NEXT: [0,6] .D==eeeeeeER ldp q1, q2, [x27, #992]! +# CHECK-NEXT: [0,7] .D===eE----R add x0, x27, #1 +# CHECK-NEXT: [0,8] . D==eeeeE-R ldp w1, w2, [x27], #248 +# CHECK-NEXT: [0,9] . D===eE---R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -3220,13 +3220,13 @@ add x0, x27, 1 # CHECK-NEXT: 1. 1 2.0 0.0 4.0 add x0, x27, #1 # CHECK-NEXT: 2. 1 2.0 0.0 0.0 ldp s1, s2, [x27, #248]! # CHECK-NEXT: 3. 1 3.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ldp d1, d2, [x27, #496]! -# CHECK-NEXT: 5. 1 4.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 4.0 0.0 0.0 ldp q1, q2, [x27, #992]! -# CHECK-NEXT: 7. 1 5.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 4.0 0.0 1.0 ldp w1, w2, [x27], #248 -# CHECK-NEXT: 9. 1 5.0 0.0 3.0 add x0, x27, #1 -# CHECK-NEXT: 1 3.3 0.1 2.0 +# CHECK-NEXT: 4. 1 2.0 0.0 0.0 ldp d1, d2, [x27, #496]! +# CHECK-NEXT: 5. 1 3.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ldp q1, q2, [x27, #992]! +# CHECK-NEXT: 7. 1 4.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 3.0 0.0 1.0 ldp w1, w2, [x27], #248 +# CHECK-NEXT: 9. 1 4.0 0.0 3.0 add x0, x27, #1 +# CHECK-NEXT: 1 2.7 0.1 2.0 # CHECK: [45] Code Region - G46 @@ -3235,7 +3235,7 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 507 # CHECK-NEXT: Total uOps: 1900 -# CHECK: Dispatch Width: 15 +# CHECK: Dispatch Width: 8 # CHECK-NEXT: uOps Per Cycle: 3.75 # CHECK-NEXT: IPC: 1.97 # CHECK-NEXT: Block RThroughput: 3.0 @@ -3248,12 +3248,12 @@ add x0, x27, 1 # CHECK-NEXT: [0,1] D=eE--R .. add x0, x27, #1 # CHECK-NEXT: [0,2] D=eeeeER .. ldp w1, w2, [x27, #248]! # CHECK-NEXT: [0,3] D==eE--R .. add x0, x27, #1 -# CHECK-NEXT: [0,4] D==eeeeER .. ldp x1, x2, [x27, #496]! -# CHECK-NEXT: [0,5] D===eE--R .. add x0, x27, #1 -# CHECK-NEXT: [0,6] D===eeeeeER. ldpsw x1, x2, [x27], #248 -# CHECK-NEXT: [0,7] D====eE---R. add x0, x27, #1 -# CHECK-NEXT: [0,8] .D===eeeeeER ldpsw x1, x2, [x27, #248]! -# CHECK-NEXT: [0,9] .D====eE---R add x0, x27, #1 +# CHECK-NEXT: [0,4] .D=eeeeER .. ldp x1, x2, [x27, #496]! +# CHECK-NEXT: [0,5] .D==eE--R .. add x0, x27, #1 +# CHECK-NEXT: [0,6] .D==eeeeeER. ldpsw x1, x2, [x27], #248 +# CHECK-NEXT: [0,7] .D===eE---R. add x0, x27, #1 +# CHECK-NEXT: [0,8] . D==eeeeeER ldpsw x1, x2, [x27, #248]! +# CHECK-NEXT: [0,9] . D===eE---R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -3266,13 +3266,13 @@ add x0, x27, 1 # CHECK-NEXT: 1. 1 2.0 0.0 2.0 add x0, x27, #1 # CHECK-NEXT: 2. 1 2.0 0.0 0.0 ldp w1, w2, [x27, #248]! # CHECK-NEXT: 3. 1 3.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ldp x1, x2, [x27, #496]! -# CHECK-NEXT: 5. 1 4.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 4.0 0.0 0.0 ldpsw x1, x2, [x27], #248 -# CHECK-NEXT: 7. 1 5.0 0.0 3.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 4.0 0.0 0.0 ldpsw x1, x2, [x27, #248]! -# CHECK-NEXT: 9. 1 5.0 0.0 3.0 add x0, x27, #1 -# CHECK-NEXT: 1 3.3 0.1 1.2 +# CHECK-NEXT: 4. 1 2.0 0.0 0.0 ldp x1, x2, [x27, #496]! +# CHECK-NEXT: 5. 1 3.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ldpsw x1, x2, [x27], #248 +# CHECK-NEXT: 7. 1 4.0 0.0 3.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 3.0 0.0 0.0 ldpsw x1, x2, [x27, #248]! +# CHECK-NEXT: 9. 1 4.0 0.0 3.0 add x0, x27, #1 +# CHECK-NEXT: 1 2.7 0.1 1.2 # CHECK: [46] Code Region - G47 @@ -3281,7 +3281,7 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 508 # CHECK-NEXT: Total uOps: 1500 -# CHECK: Dispatch Width: 15 +# CHECK: Dispatch Width: 8 # CHECK-NEXT: uOps Per Cycle: 2.95 # CHECK-NEXT: IPC: 1.97 # CHECK-NEXT: Block RThroughput: 2.5 @@ -3295,11 +3295,11 @@ add x0, x27, 1 # CHECK-NEXT: [0,2] D=eeeeeeER. . ldr h1, [x27], #254 # CHECK-NEXT: [0,3] D==eE----R. . add x0, x27, #1 # CHECK-NEXT: [0,4] D==eeeeeeER . ldr s1, [x27], #254 -# CHECK-NEXT: [0,5] D===eE----R . add x0, x27, #1 -# CHECK-NEXT: [0,6] D===eeeeeeER. ldr d1, [x27], #254 -# CHECK-NEXT: [0,7] D====eE----R. add x0, x27, #1 -# CHECK-NEXT: [0,8] D====eeeeeeER ldr q1, [x27], #254 -# CHECK-NEXT: [0,9] D=====eE----R add x0, x27, #1 +# CHECK-NEXT: [0,5] .D==eE----R . add x0, x27, #1 +# CHECK-NEXT: [0,6] .D==eeeeeeER. ldr d1, [x27], #254 +# CHECK-NEXT: [0,7] .D===eE----R. add x0, x27, #1 +# CHECK-NEXT: [0,8] .D===eeeeeeER ldr q1, [x27], #254 +# CHECK-NEXT: [0,9] .D====eE----R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -3313,12 +3313,12 @@ add x0, x27, 1 # CHECK-NEXT: 2. 1 2.0 0.0 0.0 ldr h1, [x27], #254 # CHECK-NEXT: 3. 1 3.0 0.0 4.0 add x0, x27, #1 # CHECK-NEXT: 4. 1 3.0 0.0 0.0 ldr s1, [x27], #254 -# CHECK-NEXT: 5. 1 4.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 4.0 0.0 0.0 ldr d1, [x27], #254 -# CHECK-NEXT: 7. 1 5.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 5.0 0.0 0.0 ldr q1, [x27], #254 -# CHECK-NEXT: 9. 1 6.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 1 3.5 0.1 2.0 +# CHECK-NEXT: 5. 1 3.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ldr d1, [x27], #254 +# CHECK-NEXT: 7. 1 4.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 4.0 0.0 0.0 ldr q1, [x27], #254 +# CHECK-NEXT: 9. 1 5.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 1 3.0 0.1 2.0 # CHECK: [47] Code Region - G48 @@ -3327,7 +3327,7 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 508 # CHECK-NEXT: Total uOps: 1500 -# CHECK: Dispatch Width: 15 +# CHECK: Dispatch Width: 8 # CHECK-NEXT: uOps Per Cycle: 2.95 # CHECK-NEXT: IPC: 1.97 # CHECK-NEXT: Block RThroughput: 2.5 @@ -3341,11 +3341,11 @@ add x0, x27, 1 # CHECK-NEXT: [0,2] D=eeeeeeER. . ldr h1, [x27, #254]! # CHECK-NEXT: [0,3] D==eE----R. . add x0, x27, #1 # CHECK-NEXT: [0,4] D==eeeeeeER . ldr s1, [x27, #254]! -# CHECK-NEXT: [0,5] D===eE----R . add x0, x27, #1 -# CHECK-NEXT: [0,6] D===eeeeeeER. ldr d1, [x27, #254]! -# CHECK-NEXT: [0,7] D====eE----R. add x0, x27, #1 -# CHECK-NEXT: [0,8] D====eeeeeeER ldr q1, [x27, #254]! -# CHECK-NEXT: [0,9] D=====eE----R add x0, x27, #1 +# CHECK-NEXT: [0,5] .D==eE----R . add x0, x27, #1 +# CHECK-NEXT: [0,6] .D==eeeeeeER. ldr d1, [x27, #254]! +# CHECK-NEXT: [0,7] .D===eE----R. add x0, x27, #1 +# CHECK-NEXT: [0,8] .D===eeeeeeER ldr q1, [x27, #254]! +# CHECK-NEXT: [0,9] .D====eE----R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -3359,12 +3359,12 @@ add x0, x27, 1 # CHECK-NEXT: 2. 1 2.0 0.0 0.0 ldr h1, [x27, #254]! # CHECK-NEXT: 3. 1 3.0 0.0 4.0 add x0, x27, #1 # CHECK-NEXT: 4. 1 3.0 0.0 0.0 ldr s1, [x27, #254]! -# CHECK-NEXT: 5. 1 4.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 4.0 0.0 0.0 ldr d1, [x27, #254]! -# CHECK-NEXT: 7. 1 5.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 5.0 0.0 0.0 ldr q1, [x27, #254]! -# CHECK-NEXT: 9. 1 6.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 1 3.5 0.1 2.0 +# CHECK-NEXT: 5. 1 3.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ldr d1, [x27, #254]! +# CHECK-NEXT: 7. 1 4.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 4.0 0.0 0.0 ldr q1, [x27, #254]! +# CHECK-NEXT: 9. 1 5.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 1 3.0 0.1 2.0 # CHECK: [48] Code Region - G49 @@ -3373,7 +3373,7 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 506 # CHECK-NEXT: Total uOps: 1500 -# CHECK: Dispatch Width: 15 +# CHECK: Dispatch Width: 8 # CHECK-NEXT: uOps Per Cycle: 2.96 # CHECK-NEXT: IPC: 1.98 # CHECK-NEXT: Block RThroughput: 2.5 @@ -3387,11 +3387,11 @@ add x0, x27, 1 # CHECK-NEXT: [0,2] D=eeeeER . ldr x1, [x27], #254 # CHECK-NEXT: [0,3] D==eE--R . add x0, x27, #1 # CHECK-NEXT: [0,4] D==eeeeER . ldr w1, [x27, #254]! -# CHECK-NEXT: [0,5] D===eE--R . add x0, x27, #1 -# CHECK-NEXT: [0,6] D===eeeeER. ldr x1, [x27, #254]! -# CHECK-NEXT: [0,7] D====eE--R. add x0, x27, #1 -# CHECK-NEXT: [0,8] D====eeeeER ldrb w1, [x27], #254 -# CHECK-NEXT: [0,9] D=====eE--R add x0, x27, #1 +# CHECK-NEXT: [0,5] .D==eE--R . add x0, x27, #1 +# CHECK-NEXT: [0,6] .D==eeeeER. ldr x1, [x27, #254]! +# CHECK-NEXT: [0,7] .D===eE--R. add x0, x27, #1 +# CHECK-NEXT: [0,8] .D===eeeeER ldrb w1, [x27], #254 +# CHECK-NEXT: [0,9] .D====eE--R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -3405,12 +3405,12 @@ add x0, x27, 1 # CHECK-NEXT: 2. 1 2.0 0.0 0.0 ldr x1, [x27], #254 # CHECK-NEXT: 3. 1 3.0 0.0 2.0 add x0, x27, #1 # CHECK-NEXT: 4. 1 3.0 0.0 0.0 ldr w1, [x27, #254]! -# CHECK-NEXT: 5. 1 4.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 4.0 0.0 0.0 ldr x1, [x27, #254]! -# CHECK-NEXT: 7. 1 5.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 5.0 0.0 0.0 ldrb w1, [x27], #254 -# CHECK-NEXT: 9. 1 6.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 1 3.5 0.1 1.0 +# CHECK-NEXT: 5. 1 3.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ldr x1, [x27, #254]! +# CHECK-NEXT: 7. 1 4.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 4.0 0.0 0.0 ldrb w1, [x27], #254 +# CHECK-NEXT: 9. 1 5.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 1 3.0 0.1 1.0 # CHECK: [49] Code Region - G50 @@ -3419,7 +3419,7 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 506 # CHECK-NEXT: Total uOps: 1500 -# CHECK: Dispatch Width: 15 +# CHECK: Dispatch Width: 8 # CHECK-NEXT: uOps Per Cycle: 2.96 # CHECK-NEXT: IPC: 1.98 # CHECK-NEXT: Block RThroughput: 2.5 @@ -3433,11 +3433,11 @@ add x0, x27, 1 # CHECK-NEXT: [0,2] D=eeeeER . ldrh w1, [x27], #254 # CHECK-NEXT: [0,3] D==eE--R . add x0, x27, #1 # CHECK-NEXT: [0,4] D==eeeeER . ldrh w1, [x27, #254]! -# CHECK-NEXT: [0,5] D===eE--R . add x0, x27, #1 -# CHECK-NEXT: [0,6] D===eeeeER. ldrsb w1, [x27], #254 -# CHECK-NEXT: [0,7] D====eE--R. add x0, x27, #1 -# CHECK-NEXT: [0,8] D====eeeeER ldrsb x1, [x27], #254 -# CHECK-NEXT: [0,9] D=====eE--R add x0, x27, #1 +# CHECK-NEXT: [0,5] .D==eE--R . add x0, x27, #1 +# CHECK-NEXT: [0,6] .D==eeeeER. ldrsb w1, [x27], #254 +# CHECK-NEXT: [0,7] .D===eE--R. add x0, x27, #1 +# CHECK-NEXT: [0,8] .D===eeeeER ldrsb x1, [x27], #254 +# CHECK-NEXT: [0,9] .D====eE--R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -3451,12 +3451,12 @@ add x0, x27, 1 # CHECK-NEXT: 2. 1 2.0 0.0 0.0 ldrh w1, [x27], #254 # CHECK-NEXT: 3. 1 3.0 0.0 2.0 add x0, x27, #1 # CHECK-NEXT: 4. 1 3.0 0.0 0.0 ldrh w1, [x27, #254]! -# CHECK-NEXT: 5. 1 4.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 4.0 0.0 0.0 ldrsb w1, [x27], #254 -# CHECK-NEXT: 7. 1 5.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 5.0 0.0 0.0 ldrsb x1, [x27], #254 -# CHECK-NEXT: 9. 1 6.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 1 3.5 0.1 1.0 +# CHECK-NEXT: 5. 1 3.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ldrsb w1, [x27], #254 +# CHECK-NEXT: 7. 1 4.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 4.0 0.0 0.0 ldrsb x1, [x27], #254 +# CHECK-NEXT: 9. 1 5.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 1 3.0 0.1 1.0 # CHECK: [50] Code Region - G51 @@ -3465,7 +3465,7 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 506 # CHECK-NEXT: Total uOps: 1500 -# CHECK: Dispatch Width: 15 +# CHECK: Dispatch Width: 8 # CHECK-NEXT: uOps Per Cycle: 2.96 # CHECK-NEXT: IPC: 1.98 # CHECK-NEXT: Block RThroughput: 2.5 @@ -3479,11 +3479,11 @@ add x0, x27, 1 # CHECK-NEXT: [0,2] D=eeeeER . ldrsb x1, [x27, #254]! # CHECK-NEXT: [0,3] D==eE--R . add x0, x27, #1 # CHECK-NEXT: [0,4] D==eeeeER . ldrsh w1, [x27], #254 -# CHECK-NEXT: [0,5] D===eE--R . add x0, x27, #1 -# CHECK-NEXT: [0,6] D===eeeeER. ldrsh x1, [x27], #254 -# CHECK-NEXT: [0,7] D====eE--R. add x0, x27, #1 -# CHECK-NEXT: [0,8] D====eeeeER ldrsh w1, [x27, #254]! -# CHECK-NEXT: [0,9] D=====eE--R add x0, x27, #1 +# CHECK-NEXT: [0,5] .D==eE--R . add x0, x27, #1 +# CHECK-NEXT: [0,6] .D==eeeeER. ldrsh x1, [x27], #254 +# CHECK-NEXT: [0,7] .D===eE--R. add x0, x27, #1 +# CHECK-NEXT: [0,8] .D===eeeeER ldrsh w1, [x27, #254]! +# CHECK-NEXT: [0,9] .D====eE--R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -3497,12 +3497,12 @@ add x0, x27, 1 # CHECK-NEXT: 2. 1 2.0 0.0 0.0 ldrsb x1, [x27, #254]! # CHECK-NEXT: 3. 1 3.0 0.0 2.0 add x0, x27, #1 # CHECK-NEXT: 4. 1 3.0 0.0 0.0 ldrsh w1, [x27], #254 -# CHECK-NEXT: 5. 1 4.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 4.0 0.0 0.0 ldrsh x1, [x27], #254 -# CHECK-NEXT: 7. 1 5.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 5.0 0.0 0.0 ldrsh w1, [x27, #254]! -# CHECK-NEXT: 9. 1 6.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 1 3.5 0.1 1.0 +# CHECK-NEXT: 5. 1 3.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ldrsh x1, [x27], #254 +# CHECK-NEXT: 7. 1 4.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 4.0 0.0 0.0 ldrsh w1, [x27, #254]! +# CHECK-NEXT: 9. 1 5.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 1 3.0 0.1 1.0 # CHECK: [51] Code Region - G52 @@ -3511,7 +3511,7 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 504 # CHECK-NEXT: Total uOps: 1700 -# CHECK: Dispatch Width: 15 +# CHECK: Dispatch Width: 8 # CHECK-NEXT: uOps Per Cycle: 3.37 # CHECK-NEXT: IPC: 1.98 # CHECK-NEXT: Block RThroughput: 2.5 @@ -3524,11 +3524,11 @@ add x0, x27, 1 # CHECK-NEXT: [0,2] D=eeeeER. ldrsw x1, [x27], #254 # CHECK-NEXT: [0,3] D==eE--R. add x0, x27, #1 # CHECK-NEXT: [0,4] D==eeeeER ldrsw x1, [x27, #254]! -# CHECK-NEXT: [0,5] D===eE--R add x0, x27, #1 -# CHECK-NEXT: [0,6] D===eeE-R st1 { v1.1d }, [x27], #8 -# CHECK-NEXT: [0,7] D====eE-R add x0, x27, #1 +# CHECK-NEXT: [0,5] .D==eE--R add x0, x27, #1 +# CHECK-NEXT: [0,6] .D==eeE-R st1 { v1.1d }, [x27], #8 +# CHECK-NEXT: [0,7] .D===eE-R add x0, x27, #1 # CHECK-NEXT: [0,8] .D===eeER st1 { v1.2d }, [x27], #16 -# CHECK-NEXT: [0,9] .D====eER add x0, x27, #1 +# CHECK-NEXT: [0,9] . D===eER add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -3542,12 +3542,12 @@ add x0, x27, 1 # CHECK-NEXT: 2. 1 2.0 0.0 0.0 ldrsw x1, [x27], #254 # CHECK-NEXT: 3. 1 3.0 0.0 2.0 add x0, x27, #1 # CHECK-NEXT: 4. 1 3.0 0.0 0.0 ldrsw x1, [x27, #254]! -# CHECK-NEXT: 5. 1 4.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 4.0 0.0 1.0 st1 { v1.1d }, [x27], #8 -# CHECK-NEXT: 7. 1 5.0 0.0 1.0 add x0, x27, #1 +# CHECK-NEXT: 5. 1 3.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 3.0 0.0 1.0 st1 { v1.1d }, [x27], #8 +# CHECK-NEXT: 7. 1 4.0 0.0 1.0 add x0, x27, #1 # CHECK-NEXT: 8. 1 4.0 0.0 0.0 st1 { v1.2d }, [x27], #16 -# CHECK-NEXT: 9. 1 5.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 3.3 0.1 0.8 +# CHECK-NEXT: 9. 1 4.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 1 2.9 0.1 0.8 # CHECK: [52] Code Region - G53 @@ -3556,7 +3556,7 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 504 # CHECK-NEXT: Total uOps: 2000 -# CHECK: Dispatch Width: 15 +# CHECK: Dispatch Width: 8 # CHECK-NEXT: uOps Per Cycle: 3.97 # CHECK-NEXT: IPC: 1.98 # CHECK-NEXT: Block RThroughput: 2.5 @@ -3568,12 +3568,12 @@ add x0, x27, 1 # CHECK-NEXT: [0,1] D=eER. . add x0, x27, #1 # CHECK-NEXT: [0,2] D=eeER . st1 { v1.4h }, [x27], #8 # CHECK-NEXT: [0,3] D==eER . add x0, x27, #1 -# CHECK-NEXT: [0,4] D==eeER . st1 { v1.4s }, [x27], #16 -# CHECK-NEXT: [0,5] D===eER . add x0, x27, #1 -# CHECK-NEXT: [0,6] D===eeER. st1 { v1.8b }, [x27], #8 +# CHECK-NEXT: [0,4] .D=eeER . st1 { v1.4s }, [x27], #16 +# CHECK-NEXT: [0,5] .D==eER . add x0, x27, #1 +# CHECK-NEXT: [0,6] .D==eeER. st1 { v1.8b }, [x27], #8 # CHECK-NEXT: [0,7] .D===eER. add x0, x27, #1 -# CHECK-NEXT: [0,8] .D===eeER st1 { v1.8h }, [x27], #16 -# CHECK-NEXT: [0,9] .D====eER add x0, x27, #1 +# CHECK-NEXT: [0,8] . D==eeER st1 { v1.8h }, [x27], #16 +# CHECK-NEXT: [0,9] . D===eER add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -3586,13 +3586,13 @@ add x0, x27, 1 # CHECK-NEXT: 1. 1 2.0 0.0 0.0 add x0, x27, #1 # CHECK-NEXT: 2. 1 2.0 0.0 0.0 st1 { v1.4h }, [x27], #8 # CHECK-NEXT: 3. 1 3.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 3.0 0.0 0.0 st1 { v1.4s }, [x27], #16 -# CHECK-NEXT: 5. 1 4.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 4.0 0.0 0.0 st1 { v1.8b }, [x27], #8 +# CHECK-NEXT: 4. 1 2.0 0.0 0.0 st1 { v1.4s }, [x27], #16 +# CHECK-NEXT: 5. 1 3.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 3.0 0.0 0.0 st1 { v1.8b }, [x27], #8 # CHECK-NEXT: 7. 1 4.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 4.0 0.0 0.0 st1 { v1.8h }, [x27], #16 -# CHECK-NEXT: 9. 1 5.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 3.2 0.1 0.0 +# CHECK-NEXT: 8. 1 3.0 0.0 0.0 st1 { v1.8h }, [x27], #16 +# CHECK-NEXT: 9. 1 4.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 1 2.7 0.1 0.0 # CHECK: [53] Code Region - G54 @@ -3601,7 +3601,7 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 504 # CHECK-NEXT: Total uOps: 2000 -# CHECK: Dispatch Width: 15 +# CHECK: Dispatch Width: 8 # CHECK-NEXT: uOps Per Cycle: 3.97 # CHECK-NEXT: IPC: 1.98 # CHECK-NEXT: Block RThroughput: 2.5 @@ -3613,12 +3613,12 @@ add x0, x27, 1 # CHECK-NEXT: [0,1] D=eER. . add x0, x27, #1 # CHECK-NEXT: [0,2] D=eeER . st1 { v1.1d }, [x27], x28 # CHECK-NEXT: [0,3] D==eER . add x0, x27, #1 -# CHECK-NEXT: [0,4] D==eeER . st1 { v1.2d }, [x27], x28 -# CHECK-NEXT: [0,5] D===eER . add x0, x27, #1 -# CHECK-NEXT: [0,6] D===eeER. st1 { v1.2s }, [x27], x28 +# CHECK-NEXT: [0,4] .D=eeER . st1 { v1.2d }, [x27], x28 +# CHECK-NEXT: [0,5] .D==eER . add x0, x27, #1 +# CHECK-NEXT: [0,6] .D==eeER. st1 { v1.2s }, [x27], x28 # CHECK-NEXT: [0,7] .D===eER. add x0, x27, #1 -# CHECK-NEXT: [0,8] .D===eeER st1 { v1.4h }, [x27], x28 -# CHECK-NEXT: [0,9] .D====eER add x0, x27, #1 +# CHECK-NEXT: [0,8] . D==eeER st1 { v1.4h }, [x27], x28 +# CHECK-NEXT: [0,9] . D===eER add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -3631,13 +3631,13 @@ add x0, x27, 1 # CHECK-NEXT: 1. 1 2.0 0.0 0.0 add x0, x27, #1 # CHECK-NEXT: 2. 1 2.0 0.0 0.0 st1 { v1.1d }, [x27], x28 # CHECK-NEXT: 3. 1 3.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 3.0 0.0 0.0 st1 { v1.2d }, [x27], x28 -# CHECK-NEXT: 5. 1 4.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 4.0 0.0 0.0 st1 { v1.2s }, [x27], x28 +# CHECK-NEXT: 4. 1 2.0 0.0 0.0 st1 { v1.2d }, [x27], x28 +# CHECK-NEXT: 5. 1 3.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 3.0 0.0 0.0 st1 { v1.2s }, [x27], x28 # CHECK-NEXT: 7. 1 4.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 4.0 0.0 0.0 st1 { v1.4h }, [x27], x28 -# CHECK-NEXT: 9. 1 5.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 3.2 0.1 0.0 +# CHECK-NEXT: 8. 1 3.0 0.0 0.0 st1 { v1.4h }, [x27], x28 +# CHECK-NEXT: 9. 1 4.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 1 2.7 0.1 0.0 # CHECK: [54] Code Region - G55 @@ -3646,7 +3646,7 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 504 # CHECK-NEXT: Total uOps: 2000 -# CHECK: Dispatch Width: 15 +# CHECK: Dispatch Width: 8 # CHECK-NEXT: uOps Per Cycle: 3.97 # CHECK-NEXT: IPC: 1.98 # CHECK-NEXT: Block RThroughput: 2.5 @@ -3658,12 +3658,12 @@ add x0, x27, 1 # CHECK-NEXT: [0,1] D=eER. . add x0, x27, #1 # CHECK-NEXT: [0,2] D=eeER . st1 { v1.8b }, [x27], x28 # CHECK-NEXT: [0,3] D==eER . add x0, x27, #1 -# CHECK-NEXT: [0,4] D==eeER . st1 { v1.8h }, [x27], x28 -# CHECK-NEXT: [0,5] D===eER . add x0, x27, #1 -# CHECK-NEXT: [0,6] D===eeER. st1 { v1.16b }, [x27], x28 +# CHECK-NEXT: [0,4] .D=eeER . st1 { v1.8h }, [x27], x28 +# CHECK-NEXT: [0,5] .D==eER . add x0, x27, #1 +# CHECK-NEXT: [0,6] .D==eeER. st1 { v1.16b }, [x27], x28 # CHECK-NEXT: [0,7] .D===eER. add x0, x27, #1 -# CHECK-NEXT: [0,8] .D===eeER st1 { v1.1d, v2.1d }, [x27], #16 -# CHECK-NEXT: [0,9] .D====eER add x0, x27, #1 +# CHECK-NEXT: [0,8] . D==eeER st1 { v1.1d, v2.1d }, [x27], #16 +# CHECK-NEXT: [0,9] . D===eER add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -3676,13 +3676,13 @@ add x0, x27, 1 # CHECK-NEXT: 1. 1 2.0 0.0 0.0 add x0, x27, #1 # CHECK-NEXT: 2. 1 2.0 0.0 0.0 st1 { v1.8b }, [x27], x28 # CHECK-NEXT: 3. 1 3.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 3.0 0.0 0.0 st1 { v1.8h }, [x27], x28 -# CHECK-NEXT: 5. 1 4.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 4.0 0.0 0.0 st1 { v1.16b }, [x27], x28 +# CHECK-NEXT: 4. 1 2.0 0.0 0.0 st1 { v1.8h }, [x27], x28 +# CHECK-NEXT: 5. 1 3.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 3.0 0.0 0.0 st1 { v1.16b }, [x27], x28 # CHECK-NEXT: 7. 1 4.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 4.0 0.0 0.0 st1 { v1.1d, v2.1d }, [x27], #16 -# CHECK-NEXT: 9. 1 5.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 3.2 0.1 0.0 +# CHECK-NEXT: 8. 1 3.0 0.0 0.0 st1 { v1.1d, v2.1d }, [x27], #16 +# CHECK-NEXT: 9. 1 4.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 1 2.7 0.1 0.0 # CHECK: [55] Code Region - G56 @@ -3691,7 +3691,7 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 504 # CHECK-NEXT: Total uOps: 2400 -# CHECK: Dispatch Width: 15 +# CHECK: Dispatch Width: 8 # CHECK-NEXT: uOps Per Cycle: 4.76 # CHECK-NEXT: IPC: 1.98 # CHECK-NEXT: Block RThroughput: 3.5 @@ -3701,14 +3701,14 @@ add x0, x27, 1 # CHECK: [0,0] DeeER. . st1 { v1.2d, v2.2d }, [x27], #32 # CHECK-NEXT: [0,1] D=eER. . add x0, x27, #1 -# CHECK-NEXT: [0,2] D=eeER . st1 { v1.2s, v2.2s }, [x27], #16 -# CHECK-NEXT: [0,3] D==eER . add x0, x27, #1 -# CHECK-NEXT: [0,4] D==eeER . st1 { v1.4h, v2.4h }, [x27], #16 -# CHECK-NEXT: [0,5] D===eER . add x0, x27, #1 -# CHECK-NEXT: [0,6] .D==eeER. st1 { v1.4s, v2.4s }, [x27], #32 -# CHECK-NEXT: [0,7] .D===eER. add x0, x27, #1 -# CHECK-NEXT: [0,8] .D===eeER st1 { v1.8b, v2.8b }, [x27], #16 -# CHECK-NEXT: [0,9] .D====eER add x0, x27, #1 +# CHECK-NEXT: [0,2] .DeeER . st1 { v1.2s, v2.2s }, [x27], #16 +# CHECK-NEXT: [0,3] .D=eER . add x0, x27, #1 +# CHECK-NEXT: [0,4] .D=eeER . st1 { v1.4h, v2.4h }, [x27], #16 +# CHECK-NEXT: [0,5] .D==eER . add x0, x27, #1 +# CHECK-NEXT: [0,6] . D=eeER. st1 { v1.4s, v2.4s }, [x27], #32 +# CHECK-NEXT: [0,7] . D==eER. add x0, x27, #1 +# CHECK-NEXT: [0,8] . D=eeER st1 { v1.8b, v2.8b }, [x27], #16 +# CHECK-NEXT: [0,9] . D==eER add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -3719,15 +3719,15 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.2d, v2.2d }, [x27], #32 # CHECK-NEXT: 1. 1 2.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st1 { v1.2s, v2.2s }, [x27], #16 -# CHECK-NEXT: 3. 1 3.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 3.0 0.0 0.0 st1 { v1.4h, v2.4h }, [x27], #16 -# CHECK-NEXT: 5. 1 4.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 3.0 0.0 0.0 st1 { v1.4s, v2.4s }, [x27], #32 -# CHECK-NEXT: 7. 1 4.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 4.0 0.0 0.0 st1 { v1.8b, v2.8b }, [x27], #16 -# CHECK-NEXT: 9. 1 5.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 3.1 0.1 0.0 +# CHECK-NEXT: 2. 1 1.0 0.0 0.0 st1 { v1.2s, v2.2s }, [x27], #16 +# CHECK-NEXT: 3. 1 2.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 2.0 0.0 0.0 st1 { v1.4h, v2.4h }, [x27], #16 +# CHECK-NEXT: 5. 1 3.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 2.0 0.0 0.0 st1 { v1.4s, v2.4s }, [x27], #32 +# CHECK-NEXT: 7. 1 3.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 2.0 0.0 0.0 st1 { v1.8b, v2.8b }, [x27], #16 +# CHECK-NEXT: 9. 1 3.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 1 2.1 0.1 0.0 # CHECK: [56] Code Region - G57 @@ -3736,7 +3736,7 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 504 # CHECK-NEXT: Total uOps: 2600 -# CHECK: Dispatch Width: 15 +# CHECK: Dispatch Width: 8 # CHECK-NEXT: uOps Per Cycle: 5.16 # CHECK-NEXT: IPC: 1.98 # CHECK-NEXT: Block RThroughput: 4.0 @@ -3746,14 +3746,14 @@ add x0, x27, 1 # CHECK: [0,0] DeeER. . st1 { v1.8h, v2.8h }, [x27], #32 # CHECK-NEXT: [0,1] D=eER. . add x0, x27, #1 -# CHECK-NEXT: [0,2] D=eeER . st1 { v1.16b, v2.16b }, [x27], #32 -# CHECK-NEXT: [0,3] D==eER . add x0, x27, #1 -# CHECK-NEXT: [0,4] D==eeER . st1 { v1.1d, v2.1d }, [x27], x28 -# CHECK-NEXT: [0,5] .D==eER . add x0, x27, #1 -# CHECK-NEXT: [0,6] .D==eeER. st1 { v1.2d, v2.2d }, [x27], x28 -# CHECK-NEXT: [0,7] .D===eER. add x0, x27, #1 -# CHECK-NEXT: [0,8] .D===eeER st1 { v1.2s, v2.2s }, [x27], x28 -# CHECK-NEXT: [0,9] .D====eER add x0, x27, #1 +# CHECK-NEXT: [0,2] .DeeER . st1 { v1.16b, v2.16b }, [x27], #32 +# CHECK-NEXT: [0,3] .D=eER . add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeER . st1 { v1.1d, v2.1d }, [x27], x28 +# CHECK-NEXT: [0,5] . D=eER . add x0, x27, #1 +# CHECK-NEXT: [0,6] . DeeER. st1 { v1.2d, v2.2d }, [x27], x28 +# CHECK-NEXT: [0,7] . D=eER. add x0, x27, #1 +# CHECK-NEXT: [0,8] . DeeER st1 { v1.2s, v2.2s }, [x27], x28 +# CHECK-NEXT: [0,9] . D=eER add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -3764,15 +3764,15 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.8h, v2.8h }, [x27], #32 # CHECK-NEXT: 1. 1 2.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st1 { v1.16b, v2.16b }, [x27], #32 -# CHECK-NEXT: 3. 1 3.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 3.0 0.0 0.0 st1 { v1.1d, v2.1d }, [x27], x28 -# CHECK-NEXT: 5. 1 3.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 3.0 0.0 0.0 st1 { v1.2d, v2.2d }, [x27], x28 -# CHECK-NEXT: 7. 1 4.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 4.0 0.0 0.0 st1 { v1.2s, v2.2s }, [x27], x28 -# CHECK-NEXT: 9. 1 5.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 3.0 0.1 0.0 +# CHECK-NEXT: 2. 1 1.0 0.0 0.0 st1 { v1.16b, v2.16b }, [x27], #32 +# CHECK-NEXT: 3. 1 2.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 0.0 0.0 st1 { v1.1d, v2.1d }, [x27], x28 +# CHECK-NEXT: 5. 1 2.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 0.0 0.0 st1 { v1.2d, v2.2d }, [x27], x28 +# CHECK-NEXT: 7. 1 2.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 0.0 0.0 st1 { v1.2s, v2.2s }, [x27], x28 +# CHECK-NEXT: 9. 1 2.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.5 0.1 0.0 # CHECK: [57] Code Region - G58 @@ -3781,7 +3781,7 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 504 # CHECK-NEXT: Total uOps: 2600 -# CHECK: Dispatch Width: 15 +# CHECK: Dispatch Width: 8 # CHECK-NEXT: uOps Per Cycle: 5.16 # CHECK-NEXT: IPC: 1.98 # CHECK-NEXT: Block RThroughput: 4.0 @@ -3791,14 +3791,14 @@ add x0, x27, 1 # CHECK: [0,0] DeeER. . st1 { v1.4h, v2.4h }, [x27], x28 # CHECK-NEXT: [0,1] D=eER. . add x0, x27, #1 -# CHECK-NEXT: [0,2] D=eeER . st1 { v1.4s, v2.4s }, [x27], x28 -# CHECK-NEXT: [0,3] D==eER . add x0, x27, #1 -# CHECK-NEXT: [0,4] D==eeER . st1 { v1.8b, v2.8b }, [x27], x28 -# CHECK-NEXT: [0,5] D===eER . add x0, x27, #1 -# CHECK-NEXT: [0,6] .D==eeER. st1 { v1.8h, v2.8h }, [x27], x28 -# CHECK-NEXT: [0,7] .D===eER. add x0, x27, #1 -# CHECK-NEXT: [0,8] .D===eeER st1 { v1.16b, v2.16b }, [x27], x28 -# CHECK-NEXT: [0,9] .D====eER add x0, x27, #1 +# CHECK-NEXT: [0,2] .DeeER . st1 { v1.4s, v2.4s }, [x27], x28 +# CHECK-NEXT: [0,3] .D=eER . add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeER . st1 { v1.8b, v2.8b }, [x27], x28 +# CHECK-NEXT: [0,5] . D=eER . add x0, x27, #1 +# CHECK-NEXT: [0,6] . DeeER. st1 { v1.8h, v2.8h }, [x27], x28 +# CHECK-NEXT: [0,7] . D=eER. add x0, x27, #1 +# CHECK-NEXT: [0,8] . DeeER st1 { v1.16b, v2.16b }, [x27], x28 +# CHECK-NEXT: [0,9] . D=eER add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -3809,15 +3809,15 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.4h, v2.4h }, [x27], x28 # CHECK-NEXT: 1. 1 2.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st1 { v1.4s, v2.4s }, [x27], x28 -# CHECK-NEXT: 3. 1 3.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 3.0 0.0 0.0 st1 { v1.8b, v2.8b }, [x27], x28 -# CHECK-NEXT: 5. 1 4.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 3.0 0.0 0.0 st1 { v1.8h, v2.8h }, [x27], x28 -# CHECK-NEXT: 7. 1 4.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 4.0 0.0 0.0 st1 { v1.16b, v2.16b }, [x27], x28 -# CHECK-NEXT: 9. 1 5.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 3.1 0.1 0.0 +# CHECK-NEXT: 2. 1 1.0 0.0 0.0 st1 { v1.4s, v2.4s }, [x27], x28 +# CHECK-NEXT: 3. 1 2.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 0.0 0.0 st1 { v1.8b, v2.8b }, [x27], x28 +# CHECK-NEXT: 5. 1 2.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 0.0 0.0 st1 { v1.8h, v2.8h }, [x27], x28 +# CHECK-NEXT: 7. 1 2.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 0.0 0.0 st1 { v1.16b, v2.16b }, [x27], x28 +# CHECK-NEXT: 9. 1 2.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.5 0.1 0.0 # CHECK: [58] Code Region - G59 @@ -3826,7 +3826,7 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 703 # CHECK-NEXT: Total uOps: 3400 -# CHECK: Dispatch Width: 15 +# CHECK: Dispatch Width: 8 # CHECK-NEXT: uOps Per Cycle: 4.84 # CHECK-NEXT: IPC: 1.42 # CHECK-NEXT: Block RThroughput: 6.0 @@ -3836,14 +3836,14 @@ add x0, x27, 1 # CHECK: [0,0] DeeER. . st1 { v1.1d, v2.1d, v3.1d }, [x27], #24 # CHECK-NEXT: [0,1] D=eER. . add x0, x27, #1 -# CHECK-NEXT: [0,2] D=eeER . st1 { v1.2d, v2.2d, v3.2d }, [x27], #48 -# CHECK-NEXT: [0,3] D==eER . add x0, x27, #1 -# CHECK-NEXT: [0,4] .D=eeER . st1 { v1.2s, v2.2s, v3.2s }, [x27], #24 -# CHECK-NEXT: [0,5] .D==eER . add x0, x27, #1 -# CHECK-NEXT: [0,6] .D===eeER. st1 { v1.4h, v2.4h, v3.4h }, [x27], #24 -# CHECK-NEXT: [0,7] .D====eER. add x0, x27, #1 -# CHECK-NEXT: [0,8] . D===eeER st1 { v1.4s, v2.4s, v3.4s }, [x27], #48 -# CHECK-NEXT: [0,9] . D====eER add x0, x27, #1 +# CHECK-NEXT: [0,2] .DeeER . st1 { v1.2d, v2.2d, v3.2d }, [x27], #48 +# CHECK-NEXT: [0,3] .D=eER . add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeER . st1 { v1.2s, v2.2s, v3.2s }, [x27], #24 +# CHECK-NEXT: [0,5] . D=eER . add x0, x27, #1 +# CHECK-NEXT: [0,6] . D=eeER. st1 { v1.4h, v2.4h, v3.4h }, [x27], #24 +# CHECK-NEXT: [0,7] . D==eER. add x0, x27, #1 +# CHECK-NEXT: [0,8] . D=eeER st1 { v1.4s, v2.4s, v3.4s }, [x27], #48 +# CHECK-NEXT: [0,9] . D==eER add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -3854,15 +3854,15 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.1d, v2.1d, v3.1d }, [x27], #24 # CHECK-NEXT: 1. 1 2.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st1 { v1.2d, v2.2d, v3.2d }, [x27], #48 -# CHECK-NEXT: 3. 1 3.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 2.0 0.0 0.0 st1 { v1.2s, v2.2s, v3.2s }, [x27], #24 -# CHECK-NEXT: 5. 1 3.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 4.0 1.0 0.0 st1 { v1.4h, v2.4h, v3.4h }, [x27], #24 -# CHECK-NEXT: 7. 1 5.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 4.0 0.0 0.0 st1 { v1.4s, v2.4s, v3.4s }, [x27], #48 -# CHECK-NEXT: 9. 1 5.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 3.1 0.2 0.0 +# CHECK-NEXT: 2. 1 1.0 0.0 0.0 st1 { v1.2d, v2.2d, v3.2d }, [x27], #48 +# CHECK-NEXT: 3. 1 2.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 0.0 0.0 st1 { v1.2s, v2.2s, v3.2s }, [x27], #24 +# CHECK-NEXT: 5. 1 2.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 2.0 1.0 0.0 st1 { v1.4h, v2.4h, v3.4h }, [x27], #24 +# CHECK-NEXT: 7. 1 3.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 2.0 0.0 0.0 st1 { v1.4s, v2.4s, v3.4s }, [x27], #48 +# CHECK-NEXT: 9. 1 3.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.9 0.2 0.0 # CHECK: [59] Code Region - G60 @@ -3871,7 +3871,7 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 703 # CHECK-NEXT: Total uOps: 3600 -# CHECK: Dispatch Width: 15 +# CHECK: Dispatch Width: 8 # CHECK-NEXT: uOps Per Cycle: 5.12 # CHECK-NEXT: IPC: 1.42 # CHECK-NEXT: Block RThroughput: 6.5 @@ -3881,14 +3881,14 @@ add x0, x27, 1 # CHECK: [0,0] DeeER. . st1 { v1.8b, v2.8b, v3.8b }, [x27], #24 # CHECK-NEXT: [0,1] D=eER. . add x0, x27, #1 -# CHECK-NEXT: [0,2] D=eeER . st1 { v1.8h, v2.8h, v3.8h }, [x27], #48 -# CHECK-NEXT: [0,3] D==eER . add x0, x27, #1 -# CHECK-NEXT: [0,4] .D=eeER . st1 { v1.16b, v2.16b, v3.16b }, [x27], #48 -# CHECK-NEXT: [0,5] .D==eER . add x0, x27, #1 -# CHECK-NEXT: [0,6] .D===eeER. st1 { v1.1d, v2.1d, v3.1d }, [x27], x28 -# CHECK-NEXT: [0,7] .D====eER. add x0, x27, #1 -# CHECK-NEXT: [0,8] . D===eeER st1 { v1.2d, v2.2d, v3.2d }, [x27], x28 -# CHECK-NEXT: [0,9] . D====eER add x0, x27, #1 +# CHECK-NEXT: [0,2] .DeeER . st1 { v1.8h, v2.8h, v3.8h }, [x27], #48 +# CHECK-NEXT: [0,3] .D=eER . add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeER . st1 { v1.16b, v2.16b, v3.16b }, [x27], #48 +# CHECK-NEXT: [0,5] . D=eER . add x0, x27, #1 +# CHECK-NEXT: [0,6] . D=eeER. st1 { v1.1d, v2.1d, v3.1d }, [x27], x28 +# CHECK-NEXT: [0,7] . D==eER. add x0, x27, #1 +# CHECK-NEXT: [0,8] . D=eeER st1 { v1.2d, v2.2d, v3.2d }, [x27], x28 +# CHECK-NEXT: [0,9] . D==eER add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -3899,15 +3899,15 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.8b, v2.8b, v3.8b }, [x27], #24 # CHECK-NEXT: 1. 1 2.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st1 { v1.8h, v2.8h, v3.8h }, [x27], #48 -# CHECK-NEXT: 3. 1 3.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 2.0 0.0 0.0 st1 { v1.16b, v2.16b, v3.16b }, [x27], #48 -# CHECK-NEXT: 5. 1 3.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 4.0 1.0 0.0 st1 { v1.1d, v2.1d, v3.1d }, [x27], x28 -# CHECK-NEXT: 7. 1 5.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 4.0 0.0 0.0 st1 { v1.2d, v2.2d, v3.2d }, [x27], x28 -# CHECK-NEXT: 9. 1 5.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 3.1 0.2 0.0 +# CHECK-NEXT: 2. 1 1.0 0.0 0.0 st1 { v1.8h, v2.8h, v3.8h }, [x27], #48 +# CHECK-NEXT: 3. 1 2.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 0.0 0.0 st1 { v1.16b, v2.16b, v3.16b }, [x27], #48 +# CHECK-NEXT: 5. 1 2.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 2.0 1.0 0.0 st1 { v1.1d, v2.1d, v3.1d }, [x27], x28 +# CHECK-NEXT: 7. 1 3.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 2.0 0.0 0.0 st1 { v1.2d, v2.2d, v3.2d }, [x27], x28 +# CHECK-NEXT: 9. 1 3.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.9 0.2 0.0 # CHECK: [60] Code Region - G61 @@ -3916,7 +3916,7 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 703 # CHECK-NEXT: Total uOps: 3400 -# CHECK: Dispatch Width: 15 +# CHECK: Dispatch Width: 8 # CHECK-NEXT: uOps Per Cycle: 4.84 # CHECK-NEXT: IPC: 1.42 # CHECK-NEXT: Block RThroughput: 6.0 @@ -3926,14 +3926,14 @@ add x0, x27, 1 # CHECK: [0,0] DeeER. . st1 { v1.2s, v2.2s, v3.2s }, [x27], x28 # CHECK-NEXT: [0,1] D=eER. . add x0, x27, #1 -# CHECK-NEXT: [0,2] D=eeER . st1 { v1.4h, v2.4h, v3.4h }, [x27], x28 -# CHECK-NEXT: [0,3] D==eER . add x0, x27, #1 -# CHECK-NEXT: [0,4] .D=eeER . st1 { v1.4s, v2.4s, v3.4s }, [x27], x28 -# CHECK-NEXT: [0,5] .D==eER . add x0, x27, #1 -# CHECK-NEXT: [0,6] .D==eeER . st1 { v1.8b, v2.8b, v3.8b }, [x27], x28 -# CHECK-NEXT: [0,7] .D===eER . add x0, x27, #1 -# CHECK-NEXT: [0,8] . D===eeER st1 { v1.8h, v2.8h, v3.8h }, [x27], x28 -# CHECK-NEXT: [0,9] . D====eER add x0, x27, #1 +# CHECK-NEXT: [0,2] .DeeER . st1 { v1.4h, v2.4h, v3.4h }, [x27], x28 +# CHECK-NEXT: [0,3] .D=eER . add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeER . st1 { v1.4s, v2.4s, v3.4s }, [x27], x28 +# CHECK-NEXT: [0,5] . D=eER . add x0, x27, #1 +# CHECK-NEXT: [0,6] . DeeER . st1 { v1.8b, v2.8b, v3.8b }, [x27], x28 +# CHECK-NEXT: [0,7] . D=eER . add x0, x27, #1 +# CHECK-NEXT: [0,8] . D=eeER st1 { v1.8h, v2.8h, v3.8h }, [x27], x28 +# CHECK-NEXT: [0,9] . D==eER add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -3944,15 +3944,15 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.2s, v2.2s, v3.2s }, [x27], x28 # CHECK-NEXT: 1. 1 2.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st1 { v1.4h, v2.4h, v3.4h }, [x27], x28 -# CHECK-NEXT: 3. 1 3.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 2.0 0.0 0.0 st1 { v1.4s, v2.4s, v3.4s }, [x27], x28 -# CHECK-NEXT: 5. 1 3.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 3.0 0.0 0.0 st1 { v1.8b, v2.8b, v3.8b }, [x27], x28 -# CHECK-NEXT: 7. 1 4.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 4.0 1.0 0.0 st1 { v1.8h, v2.8h, v3.8h }, [x27], x28 -# CHECK-NEXT: 9. 1 5.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 2.9 0.2 0.0 +# CHECK-NEXT: 2. 1 1.0 0.0 0.0 st1 { v1.4h, v2.4h, v3.4h }, [x27], x28 +# CHECK-NEXT: 3. 1 2.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 0.0 0.0 st1 { v1.4s, v2.4s, v3.4s }, [x27], x28 +# CHECK-NEXT: 5. 1 2.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 0.0 0.0 st1 { v1.8b, v2.8b, v3.8b }, [x27], x28 +# CHECK-NEXT: 7. 1 2.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 2.0 1.0 0.0 st1 { v1.8h, v2.8h, v3.8h }, [x27], x28 +# CHECK-NEXT: 9. 1 3.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.7 0.2 0.0 # CHECK: [61] Code Region - G62 @@ -3961,7 +3961,7 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 704 # CHECK-NEXT: Total uOps: 3600 -# CHECK: Dispatch Width: 15 +# CHECK: Dispatch Width: 8 # CHECK-NEXT: uOps Per Cycle: 5.11 # CHECK-NEXT: IPC: 1.42 # CHECK-NEXT: Block RThroughput: 6.5 @@ -3972,14 +3972,14 @@ add x0, x27, 1 # CHECK: [0,0] DeeER. . st1 { v1.16b, v2.16b, v3.16b }, [x27], x28 # CHECK-NEXT: [0,1] D=eER. . add x0, x27, #1 -# CHECK-NEXT: [0,2] D=eeER . st1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32 -# CHECK-NEXT: [0,3] D==eER . add x0, x27, #1 -# CHECK-NEXT: [0,4] .D==eeER . st1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64 -# CHECK-NEXT: [0,5] .D===eER . add x0, x27, #1 -# CHECK-NEXT: [0,6] .D===eeER . st1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32 -# CHECK-NEXT: [0,7] . D===eER . add x0, x27, #1 -# CHECK-NEXT: [0,8] . D====eeER st1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32 -# CHECK-NEXT: [0,9] . D=====eER add x0, x27, #1 +# CHECK-NEXT: [0,2] .DeeER . st1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32 +# CHECK-NEXT: [0,3] .D=eER . add x0, x27, #1 +# CHECK-NEXT: [0,4] . D=eeER . st1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64 +# CHECK-NEXT: [0,5] . D=eER . add x0, x27, #1 +# CHECK-NEXT: [0,6] . D=eeER . st1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32 +# CHECK-NEXT: [0,7] . D==eER . add x0, x27, #1 +# CHECK-NEXT: [0,8] . D==eeER st1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32 +# CHECK-NEXT: [0,9] . D===eER add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -3990,15 +3990,15 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.16b, v2.16b, v3.16b }, [x27], x28 # CHECK-NEXT: 1. 1 2.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32 -# CHECK-NEXT: 3. 1 3.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 3.0 1.0 0.0 st1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64 -# CHECK-NEXT: 5. 1 4.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 4.0 0.0 0.0 st1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32 -# CHECK-NEXT: 7. 1 4.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 5.0 1.0 0.0 st1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32 -# CHECK-NEXT: 9. 1 6.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 3.4 0.3 0.0 +# CHECK-NEXT: 2. 1 1.0 0.0 0.0 st1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32 +# CHECK-NEXT: 3. 1 2.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 2.0 1.0 0.0 st1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64 +# CHECK-NEXT: 5. 1 2.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 2.0 0.0 0.0 st1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32 +# CHECK-NEXT: 7. 1 3.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 3.0 1.0 0.0 st1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32 +# CHECK-NEXT: 9. 1 4.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 1 2.2 0.3 0.0 # CHECK: [62] Code Region - G63 @@ -4007,7 +4007,7 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 804 # CHECK-NEXT: Total uOps: 4200 -# CHECK: Dispatch Width: 15 +# CHECK: Dispatch Width: 8 # CHECK-NEXT: uOps Per Cycle: 5.22 # CHECK-NEXT: IPC: 1.24 # CHECK-NEXT: Block RThroughput: 8.0 @@ -4017,15 +4017,15 @@ add x0, x27, 1 # CHECK-NEXT: Index 0123456789 # CHECK: [0,0] DeeER. .. st1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64 -# CHECK-NEXT: [0,1] D=eER. .. add x0, x27, #1 -# CHECK-NEXT: [0,2] D=eeER .. st1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32 +# CHECK-NEXT: [0,1] .DeER. .. add x0, x27, #1 +# CHECK-NEXT: [0,2] .DeeER .. st1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32 # CHECK-NEXT: [0,3] .D=eER .. add x0, x27, #1 -# CHECK-NEXT: [0,4] .D==eeER .. st1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64 -# CHECK-NEXT: [0,5] .D===eER .. add x0, x27, #1 -# CHECK-NEXT: [0,6] . D==eeER .. st1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64 -# CHECK-NEXT: [0,7] . D===eER .. add x0, x27, #1 -# CHECK-NEXT: [0,8] . D=====eeER st1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28 -# CHECK-NEXT: [0,9] . D=====eER add x0, x27, #1 +# CHECK-NEXT: [0,4] . D=eeER .. st1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64 +# CHECK-NEXT: [0,5] . D=eER .. add x0, x27, #1 +# CHECK-NEXT: [0,6] . DeeER .. st1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64 +# CHECK-NEXT: [0,7] . DeER .. add x0, x27, #1 +# CHECK-NEXT: [0,8] . D==eeER st1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28 +# CHECK-NEXT: [0,9] . D===eER add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -4035,16 +4035,16 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64 -# CHECK-NEXT: 1. 1 2.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32 +# CHECK-NEXT: 1. 1 1.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 1.0 0.0 0.0 st1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32 # CHECK-NEXT: 3. 1 2.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 3.0 1.0 0.0 st1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64 -# CHECK-NEXT: 5. 1 4.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 3.0 0.0 0.0 st1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64 -# CHECK-NEXT: 7. 1 4.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 6.0 2.0 0.0 st1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28 -# CHECK-NEXT: 9. 1 6.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 3.3 0.4 0.0 +# CHECK-NEXT: 4. 1 2.0 1.0 0.0 st1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64 +# CHECK-NEXT: 5. 1 2.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 0.0 0.0 st1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64 +# CHECK-NEXT: 7. 1 1.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 3.0 2.0 0.0 st1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28 +# CHECK-NEXT: 9. 1 4.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.8 0.4 0.0 # CHECK: [63] Code Region - G64 @@ -4053,7 +4053,7 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 703 # CHECK-NEXT: Total uOps: 3800 -# CHECK: Dispatch Width: 15 +# CHECK: Dispatch Width: 8 # CHECK-NEXT: uOps Per Cycle: 5.41 # CHECK-NEXT: IPC: 1.42 # CHECK-NEXT: Block RThroughput: 7.0 @@ -4062,15 +4062,15 @@ add x0, x27, 1 # CHECK-NEXT: Index 0123456789 # CHECK: [0,0] DeeER. . st1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28 -# CHECK-NEXT: [0,1] D=eER. . add x0, x27, #1 -# CHECK-NEXT: [0,2] D=eeER . st1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28 +# CHECK-NEXT: [0,1] .DeER. . add x0, x27, #1 +# CHECK-NEXT: [0,2] .DeeER . st1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28 # CHECK-NEXT: [0,3] .D=eER . add x0, x27, #1 -# CHECK-NEXT: [0,4] .D==eeER . st1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28 -# CHECK-NEXT: [0,5] .D===eER . add x0, x27, #1 -# CHECK-NEXT: [0,6] . D==eeER. st1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28 -# CHECK-NEXT: [0,7] . D===eER. add x0, x27, #1 -# CHECK-NEXT: [0,8] . D===eeER st1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28 -# CHECK-NEXT: [0,9] . D===eER add x0, x27, #1 +# CHECK-NEXT: [0,4] . D=eeER . st1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28 +# CHECK-NEXT: [0,5] . D==eER . add x0, x27, #1 +# CHECK-NEXT: [0,6] . D=eeER. st1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28 +# CHECK-NEXT: [0,7] . D=eER. add x0, x27, #1 +# CHECK-NEXT: [0,8] . D=eeER st1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28 +# CHECK-NEXT: [0,9] . D==eER add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -4080,16 +4080,16 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28 -# CHECK-NEXT: 1. 1 2.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28 +# CHECK-NEXT: 1. 1 1.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 1.0 0.0 0.0 st1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28 # CHECK-NEXT: 3. 1 2.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 3.0 1.0 0.0 st1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28 -# CHECK-NEXT: 5. 1 4.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 3.0 0.0 0.0 st1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28 -# CHECK-NEXT: 7. 1 4.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 4.0 0.0 0.0 st1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28 -# CHECK-NEXT: 9. 1 4.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 2.9 0.2 0.0 +# CHECK-NEXT: 4. 1 2.0 1.0 0.0 st1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28 +# CHECK-NEXT: 5. 1 3.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 2.0 0.0 0.0 st1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28 +# CHECK-NEXT: 7. 1 2.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 2.0 0.0 0.0 st1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28 +# CHECK-NEXT: 9. 1 3.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.9 0.2 0.0 # CHECK: [64] Code Region - G65 @@ -4098,7 +4098,7 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 706 # CHECK-NEXT: Total uOps: 3200 -# CHECK: Dispatch Width: 15 +# CHECK: Dispatch Width: 8 # CHECK-NEXT: uOps Per Cycle: 4.53 # CHECK-NEXT: IPC: 1.42 # CHECK-NEXT: Block RThroughput: 5.5 @@ -4108,15 +4108,15 @@ add x0, x27, 1 # CHECK-NEXT: Index 0123456789 # CHECK: [0,0] DeeER. . . st1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28 -# CHECK-NEXT: [0,1] D=eER. . . add x0, x27, #1 -# CHECK-NEXT: [0,2] .DeeER . . st1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28 -# CHECK-NEXT: [0,3] .D=eER . . add x0, x27, #1 -# CHECK-NEXT: [0,4] .D===eeeeER . st1 { v1.b }[0], [x27], #1 -# CHECK-NEXT: [0,5] .D====eE--R . add x0, x27, #1 -# CHECK-NEXT: [0,6] . D===eeeeER. st1 { v1.b }[8], [x27], #1 -# CHECK-NEXT: [0,7] . D====eE--R. add x0, x27, #1 -# CHECK-NEXT: [0,8] . D====eeeeER st1 { v1.b }[0], [x27], x28 -# CHECK-NEXT: [0,9] . D=====eE--R add x0, x27, #1 +# CHECK-NEXT: [0,1] .DeER. . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeER . . st1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28 +# CHECK-NEXT: [0,3] . DeER . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . D=eeeeER . st1 { v1.b }[0], [x27], #1 +# CHECK-NEXT: [0,5] . D==eE--R . add x0, x27, #1 +# CHECK-NEXT: [0,6] . D=eeeeER. st1 { v1.b }[8], [x27], #1 +# CHECK-NEXT: [0,7] . D==eE--R. add x0, x27, #1 +# CHECK-NEXT: [0,8] . D==eeeeER st1 { v1.b }[0], [x27], x28 +# CHECK-NEXT: [0,9] . D===eE--R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -4126,16 +4126,16 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28 -# CHECK-NEXT: 1. 1 2.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 1.0 0.0 0.0 st1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28 -# CHECK-NEXT: 3. 1 2.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 4.0 2.0 0.0 st1 { v1.b }[0], [x27], #1 -# CHECK-NEXT: 5. 1 5.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 4.0 0.0 0.0 st1 { v1.b }[8], [x27], #1 -# CHECK-NEXT: 7. 1 5.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 5.0 0.0 0.0 st1 { v1.b }[0], [x27], x28 -# CHECK-NEXT: 9. 1 6.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 1 3.5 0.3 0.6 +# CHECK-NEXT: 1. 1 1.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 1.0 1.0 0.0 st1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28 +# CHECK-NEXT: 3. 1 1.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 2.0 1.0 0.0 st1 { v1.b }[0], [x27], #1 +# CHECK-NEXT: 5. 1 3.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 2.0 0.0 0.0 st1 { v1.b }[8], [x27], #1 +# CHECK-NEXT: 7. 1 3.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 3.0 0.0 0.0 st1 { v1.b }[0], [x27], x28 +# CHECK-NEXT: 9. 1 4.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 1 2.1 0.3 0.6 # CHECK: [65] Code Region - G66 @@ -4144,7 +4144,7 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 506 # CHECK-NEXT: Total uOps: 2000 -# CHECK: Dispatch Width: 15 +# CHECK: Dispatch Width: 8 # CHECK-NEXT: uOps Per Cycle: 3.95 # CHECK-NEXT: IPC: 1.98 # CHECK-NEXT: Block RThroughput: 2.5 @@ -4157,12 +4157,12 @@ add x0, x27, 1 # CHECK-NEXT: [0,1] D=eE--R . add x0, x27, #1 # CHECK-NEXT: [0,2] D=eeeeER . st1 { v1.h }[0], [x27], #2 # CHECK-NEXT: [0,3] D==eE--R . add x0, x27, #1 -# CHECK-NEXT: [0,4] D==eeeeER . st1 { v1.h }[4], [x27], #2 -# CHECK-NEXT: [0,5] D===eE--R . add x0, x27, #1 -# CHECK-NEXT: [0,6] D===eeeeER. st1 { v1.h }[0], [x27], x28 +# CHECK-NEXT: [0,4] .D=eeeeER . st1 { v1.h }[4], [x27], #2 +# CHECK-NEXT: [0,5] .D==eE--R . add x0, x27, #1 +# CHECK-NEXT: [0,6] .D==eeeeER. st1 { v1.h }[0], [x27], x28 # CHECK-NEXT: [0,7] .D===eE--R. add x0, x27, #1 -# CHECK-NEXT: [0,8] .D===eeeeER st1 { v1.h }[4], [x27], x28 -# CHECK-NEXT: [0,9] .D====eE--R add x0, x27, #1 +# CHECK-NEXT: [0,8] . D==eeeeER st1 { v1.h }[4], [x27], x28 +# CHECK-NEXT: [0,9] . D===eE--R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -4175,13 +4175,13 @@ add x0, x27, 1 # CHECK-NEXT: 1. 1 2.0 0.0 2.0 add x0, x27, #1 # CHECK-NEXT: 2. 1 2.0 0.0 0.0 st1 { v1.h }[0], [x27], #2 # CHECK-NEXT: 3. 1 3.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 3.0 0.0 0.0 st1 { v1.h }[4], [x27], #2 -# CHECK-NEXT: 5. 1 4.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 4.0 0.0 0.0 st1 { v1.h }[0], [x27], x28 +# CHECK-NEXT: 4. 1 2.0 0.0 0.0 st1 { v1.h }[4], [x27], #2 +# CHECK-NEXT: 5. 1 3.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 3.0 0.0 0.0 st1 { v1.h }[0], [x27], x28 # CHECK-NEXT: 7. 1 4.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 4.0 0.0 0.0 st1 { v1.h }[4], [x27], x28 -# CHECK-NEXT: 9. 1 5.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 1 3.2 0.1 1.0 +# CHECK-NEXT: 8. 1 3.0 0.0 0.0 st1 { v1.h }[4], [x27], x28 +# CHECK-NEXT: 9. 1 4.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 1 2.7 0.1 1.0 # CHECK: [66] Code Region - G67 @@ -4190,7 +4190,7 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 506 # CHECK-NEXT: Total uOps: 2200 -# CHECK: Dispatch Width: 15 +# CHECK: Dispatch Width: 8 # CHECK-NEXT: uOps Per Cycle: 4.35 # CHECK-NEXT: IPC: 1.98 # CHECK-NEXT: Block RThroughput: 3.0 @@ -4203,12 +4203,12 @@ add x0, x27, 1 # CHECK-NEXT: [0,1] D=eE--R . add x0, x27, #1 # CHECK-NEXT: [0,2] D=eeeeER . st1 { v1.s }[0], [x27], x28 # CHECK-NEXT: [0,3] D==eE--R . add x0, x27, #1 -# CHECK-NEXT: [0,4] D==eeeeER . st1 { v1.d }[0], [x27], #8 -# CHECK-NEXT: [0,5] D===eE--R . add x0, x27, #1 -# CHECK-NEXT: [0,6] D===eeeeER. st1 { v1.d }[0], [x27], x28 +# CHECK-NEXT: [0,4] .D=eeeeER . st1 { v1.d }[0], [x27], #8 +# CHECK-NEXT: [0,5] .D==eE--R . add x0, x27, #1 +# CHECK-NEXT: [0,6] .D==eeeeER. st1 { v1.d }[0], [x27], x28 # CHECK-NEXT: [0,7] .D===eE--R. add x0, x27, #1 -# CHECK-NEXT: [0,8] .D===eeeeER st2 { v1.2d, v2.2d }, [x27], #32 -# CHECK-NEXT: [0,9] .D====eE--R add x0, x27, #1 +# CHECK-NEXT: [0,8] . D==eeeeER st2 { v1.2d, v2.2d }, [x27], #32 +# CHECK-NEXT: [0,9] . D===eE--R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -4221,13 +4221,13 @@ add x0, x27, 1 # CHECK-NEXT: 1. 1 2.0 0.0 2.0 add x0, x27, #1 # CHECK-NEXT: 2. 1 2.0 0.0 0.0 st1 { v1.s }[0], [x27], x28 # CHECK-NEXT: 3. 1 3.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 3.0 0.0 0.0 st1 { v1.d }[0], [x27], #8 -# CHECK-NEXT: 5. 1 4.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 4.0 0.0 0.0 st1 { v1.d }[0], [x27], x28 +# CHECK-NEXT: 4. 1 2.0 0.0 0.0 st1 { v1.d }[0], [x27], #8 +# CHECK-NEXT: 5. 1 3.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 3.0 0.0 0.0 st1 { v1.d }[0], [x27], x28 # CHECK-NEXT: 7. 1 4.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 4.0 0.0 0.0 st2 { v1.2d, v2.2d }, [x27], #32 -# CHECK-NEXT: 9. 1 5.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 1 3.2 0.1 1.0 +# CHECK-NEXT: 8. 1 3.0 0.0 0.0 st2 { v1.2d, v2.2d }, [x27], #32 +# CHECK-NEXT: 9. 1 4.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 1 2.7 0.1 1.0 # CHECK: [67] Code Region - G68 @@ -4236,7 +4236,7 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 506 # CHECK-NEXT: Total uOps: 2400 -# CHECK: Dispatch Width: 15 +# CHECK: Dispatch Width: 8 # CHECK-NEXT: uOps Per Cycle: 4.74 # CHECK-NEXT: IPC: 1.98 # CHECK-NEXT: Block RThroughput: 3.5 @@ -4249,12 +4249,12 @@ add x0, x27, 1 # CHECK-NEXT: [0,1] D=eE--R . add x0, x27, #1 # CHECK-NEXT: [0,2] D=eeeeER . st2 { v1.4h, v2.4h }, [x27], #16 # CHECK-NEXT: [0,3] D==eE--R . add x0, x27, #1 -# CHECK-NEXT: [0,4] D==eeeeER . st2 { v1.4s, v2.4s }, [x27], #32 -# CHECK-NEXT: [0,5] D===eE--R . add x0, x27, #1 -# CHECK-NEXT: [0,6] .D==eeeeER. st2 { v1.8b, v2.8b }, [x27], #16 -# CHECK-NEXT: [0,7] .D===eE--R. add x0, x27, #1 -# CHECK-NEXT: [0,8] .D===eeeeER st2 { v1.8h, v2.8h }, [x27], #32 -# CHECK-NEXT: [0,9] .D====eE--R add x0, x27, #1 +# CHECK-NEXT: [0,4] .D=eeeeER . st2 { v1.4s, v2.4s }, [x27], #32 +# CHECK-NEXT: [0,5] .D==eE--R . add x0, x27, #1 +# CHECK-NEXT: [0,6] . D=eeeeER. st2 { v1.8b, v2.8b }, [x27], #16 +# CHECK-NEXT: [0,7] . D==eE--R. add x0, x27, #1 +# CHECK-NEXT: [0,8] . D=eeeeER st2 { v1.8h, v2.8h }, [x27], #32 +# CHECK-NEXT: [0,9] . D==eE--R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -4267,13 +4267,13 @@ add x0, x27, 1 # CHECK-NEXT: 1. 1 2.0 0.0 2.0 add x0, x27, #1 # CHECK-NEXT: 2. 1 2.0 0.0 0.0 st2 { v1.4h, v2.4h }, [x27], #16 # CHECK-NEXT: 3. 1 3.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 3.0 0.0 0.0 st2 { v1.4s, v2.4s }, [x27], #32 -# CHECK-NEXT: 5. 1 4.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 3.0 0.0 0.0 st2 { v1.8b, v2.8b }, [x27], #16 -# CHECK-NEXT: 7. 1 4.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 4.0 0.0 0.0 st2 { v1.8h, v2.8h }, [x27], #32 -# CHECK-NEXT: 9. 1 5.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 1 3.1 0.1 1.0 +# CHECK-NEXT: 4. 1 2.0 0.0 0.0 st2 { v1.4s, v2.4s }, [x27], #32 +# CHECK-NEXT: 5. 1 3.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 2.0 0.0 0.0 st2 { v1.8b, v2.8b }, [x27], #16 +# CHECK-NEXT: 7. 1 3.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 2.0 0.0 0.0 st2 { v1.8h, v2.8h }, [x27], #32 +# CHECK-NEXT: 9. 1 3.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 1 2.3 0.1 1.0 # CHECK: [68] Code Region - G69 @@ -4282,7 +4282,7 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 506 # CHECK-NEXT: Total uOps: 2600 -# CHECK: Dispatch Width: 15 +# CHECK: Dispatch Width: 8 # CHECK-NEXT: uOps Per Cycle: 5.14 # CHECK-NEXT: IPC: 1.98 # CHECK-NEXT: Block RThroughput: 4.0 @@ -4293,14 +4293,14 @@ add x0, x27, 1 # CHECK: [0,0] DeeeeER . st2 { v1.16b, v2.16b }, [x27], #32 # CHECK-NEXT: [0,1] D=eE--R . add x0, x27, #1 -# CHECK-NEXT: [0,2] D=eeeeER . st2 { v1.2d, v2.2d }, [x27], x28 -# CHECK-NEXT: [0,3] D==eE--R . add x0, x27, #1 -# CHECK-NEXT: [0,4] D==eeeeER . st2 { v1.2s, v2.2s }, [x27], x28 -# CHECK-NEXT: [0,5] .D==eE--R . add x0, x27, #1 -# CHECK-NEXT: [0,6] .D==eeeeER. st2 { v1.4h, v2.4h }, [x27], x28 -# CHECK-NEXT: [0,7] .D===eE--R. add x0, x27, #1 -# CHECK-NEXT: [0,8] .D===eeeeER st2 { v1.4s, v2.4s }, [x27], x28 -# CHECK-NEXT: [0,9] .D====eE--R add x0, x27, #1 +# CHECK-NEXT: [0,2] .DeeeeER . st2 { v1.2d, v2.2d }, [x27], x28 +# CHECK-NEXT: [0,3] .D=eE--R . add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeeeER . st2 { v1.2s, v2.2s }, [x27], x28 +# CHECK-NEXT: [0,5] . D=eE--R . add x0, x27, #1 +# CHECK-NEXT: [0,6] . D=eeeeER. st2 { v1.4h, v2.4h }, [x27], x28 +# CHECK-NEXT: [0,7] . D==eE--R. add x0, x27, #1 +# CHECK-NEXT: [0,8] . D=eeeeER st2 { v1.4s, v2.4s }, [x27], x28 +# CHECK-NEXT: [0,9] . D==eE--R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -4311,15 +4311,15 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st2 { v1.16b, v2.16b }, [x27], #32 # CHECK-NEXT: 1. 1 2.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st2 { v1.2d, v2.2d }, [x27], x28 -# CHECK-NEXT: 3. 1 3.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 3.0 0.0 0.0 st2 { v1.2s, v2.2s }, [x27], x28 -# CHECK-NEXT: 5. 1 3.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 3.0 0.0 0.0 st2 { v1.4h, v2.4h }, [x27], x28 -# CHECK-NEXT: 7. 1 4.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 4.0 0.0 0.0 st2 { v1.4s, v2.4s }, [x27], x28 -# CHECK-NEXT: 9. 1 5.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 1 3.0 0.1 1.0 +# CHECK-NEXT: 2. 1 1.0 0.0 0.0 st2 { v1.2d, v2.2d }, [x27], x28 +# CHECK-NEXT: 3. 1 2.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 0.0 0.0 st2 { v1.2s, v2.2s }, [x27], x28 +# CHECK-NEXT: 5. 1 2.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 2.0 0.0 0.0 st2 { v1.4h, v2.4h }, [x27], x28 +# CHECK-NEXT: 7. 1 3.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 2.0 0.0 0.0 st2 { v1.4s, v2.4s }, [x27], x28 +# CHECK-NEXT: 9. 1 3.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.9 0.1 1.0 # CHECK: [69] Code Region - G70 @@ -4328,7 +4328,7 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 506 # CHECK-NEXT: Total uOps: 2400 -# CHECK: Dispatch Width: 15 +# CHECK: Dispatch Width: 8 # CHECK-NEXT: uOps Per Cycle: 4.74 # CHECK-NEXT: IPC: 1.98 # CHECK-NEXT: Block RThroughput: 3.5 @@ -4339,14 +4339,14 @@ add x0, x27, 1 # CHECK: [0,0] DeeeeER . st2 { v1.8b, v2.8b }, [x27], x28 # CHECK-NEXT: [0,1] D=eE--R . add x0, x27, #1 -# CHECK-NEXT: [0,2] D=eeeeER . st2 { v1.8h, v2.8h }, [x27], x28 -# CHECK-NEXT: [0,3] D==eE--R . add x0, x27, #1 -# CHECK-NEXT: [0,4] D==eeeeER . st2 { v1.16b, v2.16b }, [x27], x28 -# CHECK-NEXT: [0,5] .D==eE--R . add x0, x27, #1 -# CHECK-NEXT: [0,6] .D==eeeeER. st2 { v1.b, v2.b }[0], [x27], #2 -# CHECK-NEXT: [0,7] .D===eE--R. add x0, x27, #1 -# CHECK-NEXT: [0,8] .D===eeeeER st2 { v1.b, v2.b }[8], [x27], #2 -# CHECK-NEXT: [0,9] .D====eE--R add x0, x27, #1 +# CHECK-NEXT: [0,2] .DeeeeER . st2 { v1.8h, v2.8h }, [x27], x28 +# CHECK-NEXT: [0,3] .D=eE--R . add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeeeER . st2 { v1.16b, v2.16b }, [x27], x28 +# CHECK-NEXT: [0,5] . D=eE--R . add x0, x27, #1 +# CHECK-NEXT: [0,6] . DeeeeER. st2 { v1.b, v2.b }[0], [x27], #2 +# CHECK-NEXT: [0,7] . D=eE--R. add x0, x27, #1 +# CHECK-NEXT: [0,8] . D=eeeeER st2 { v1.b, v2.b }[8], [x27], #2 +# CHECK-NEXT: [0,9] . D==eE--R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -4357,15 +4357,15 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st2 { v1.8b, v2.8b }, [x27], x28 # CHECK-NEXT: 1. 1 2.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st2 { v1.8h, v2.8h }, [x27], x28 -# CHECK-NEXT: 3. 1 3.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 3.0 0.0 0.0 st2 { v1.16b, v2.16b }, [x27], x28 -# CHECK-NEXT: 5. 1 3.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 3.0 0.0 0.0 st2 { v1.b, v2.b }[0], [x27], #2 -# CHECK-NEXT: 7. 1 4.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 4.0 0.0 0.0 st2 { v1.b, v2.b }[8], [x27], #2 -# CHECK-NEXT: 9. 1 5.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 1 3.0 0.1 1.0 +# CHECK-NEXT: 2. 1 1.0 0.0 0.0 st2 { v1.8h, v2.8h }, [x27], x28 +# CHECK-NEXT: 3. 1 2.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 0.0 0.0 st2 { v1.16b, v2.16b }, [x27], x28 +# CHECK-NEXT: 5. 1 2.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 0.0 0.0 st2 { v1.b, v2.b }[0], [x27], #2 +# CHECK-NEXT: 7. 1 2.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 2.0 0.0 0.0 st2 { v1.b, v2.b }[8], [x27], #2 +# CHECK-NEXT: 9. 1 3.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.7 0.1 1.0 # CHECK: [70] Code Region - G71 @@ -4374,7 +4374,7 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 506 # CHECK-NEXT: Total uOps: 2000 -# CHECK: Dispatch Width: 15 +# CHECK: Dispatch Width: 8 # CHECK-NEXT: uOps Per Cycle: 3.95 # CHECK-NEXT: IPC: 1.98 # CHECK-NEXT: Block RThroughput: 2.5 @@ -4387,12 +4387,12 @@ add x0, x27, 1 # CHECK-NEXT: [0,1] D=eE--R . add x0, x27, #1 # CHECK-NEXT: [0,2] D=eeeeER . st2 { v1.b, v2.b }[8], [x27], x28 # CHECK-NEXT: [0,3] D==eE--R . add x0, x27, #1 -# CHECK-NEXT: [0,4] D==eeeeER . st2 { v1.h, v2.h }[0], [x27], #4 -# CHECK-NEXT: [0,5] D===eE--R . add x0, x27, #1 -# CHECK-NEXT: [0,6] D===eeeeER. st2 { v1.h, v2.h }[4], [x27], #4 +# CHECK-NEXT: [0,4] .D=eeeeER . st2 { v1.h, v2.h }[0], [x27], #4 +# CHECK-NEXT: [0,5] .D==eE--R . add x0, x27, #1 +# CHECK-NEXT: [0,6] .D==eeeeER. st2 { v1.h, v2.h }[4], [x27], #4 # CHECK-NEXT: [0,7] .D===eE--R. add x0, x27, #1 -# CHECK-NEXT: [0,8] .D===eeeeER st2 { v1.h, v2.h }[0], [x27], x28 -# CHECK-NEXT: [0,9] .D====eE--R add x0, x27, #1 +# CHECK-NEXT: [0,8] . D==eeeeER st2 { v1.h, v2.h }[0], [x27], x28 +# CHECK-NEXT: [0,9] . D===eE--R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -4405,13 +4405,13 @@ add x0, x27, 1 # CHECK-NEXT: 1. 1 2.0 0.0 2.0 add x0, x27, #1 # CHECK-NEXT: 2. 1 2.0 0.0 0.0 st2 { v1.b, v2.b }[8], [x27], x28 # CHECK-NEXT: 3. 1 3.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 3.0 0.0 0.0 st2 { v1.h, v2.h }[0], [x27], #4 -# CHECK-NEXT: 5. 1 4.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 4.0 0.0 0.0 st2 { v1.h, v2.h }[4], [x27], #4 +# CHECK-NEXT: 4. 1 2.0 0.0 0.0 st2 { v1.h, v2.h }[0], [x27], #4 +# CHECK-NEXT: 5. 1 3.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 3.0 0.0 0.0 st2 { v1.h, v2.h }[4], [x27], #4 # CHECK-NEXT: 7. 1 4.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 4.0 0.0 0.0 st2 { v1.h, v2.h }[0], [x27], x28 -# CHECK-NEXT: 9. 1 5.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 1 3.2 0.1 1.0 +# CHECK-NEXT: 8. 1 3.0 0.0 0.0 st2 { v1.h, v2.h }[0], [x27], x28 +# CHECK-NEXT: 9. 1 4.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 1 2.7 0.1 1.0 # CHECK: [71] Code Region - G72 @@ -4420,7 +4420,7 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 506 # CHECK-NEXT: Total uOps: 2000 -# CHECK: Dispatch Width: 15 +# CHECK: Dispatch Width: 8 # CHECK-NEXT: uOps Per Cycle: 3.95 # CHECK-NEXT: IPC: 1.98 # CHECK-NEXT: Block RThroughput: 2.5 @@ -4433,12 +4433,12 @@ add x0, x27, 1 # CHECK-NEXT: [0,1] D=eE--R . add x0, x27, #1 # CHECK-NEXT: [0,2] D=eeeeER . st2 { v1.s, v2.s }[0], [x27], #8 # CHECK-NEXT: [0,3] D==eE--R . add x0, x27, #1 -# CHECK-NEXT: [0,4] D==eeeeER . st2 { v1.s, v2.s }[0], [x27], x28 -# CHECK-NEXT: [0,5] D===eE--R . add x0, x27, #1 -# CHECK-NEXT: [0,6] D===eeeeER. st2 { v1.d, v2.d }[0], [x27], #16 +# CHECK-NEXT: [0,4] .D=eeeeER . st2 { v1.s, v2.s }[0], [x27], x28 +# CHECK-NEXT: [0,5] .D==eE--R . add x0, x27, #1 +# CHECK-NEXT: [0,6] .D==eeeeER. st2 { v1.d, v2.d }[0], [x27], #16 # CHECK-NEXT: [0,7] .D===eE--R. add x0, x27, #1 -# CHECK-NEXT: [0,8] .D===eeeeER st2 { v1.d, v2.d }[0], [x27], x28 -# CHECK-NEXT: [0,9] .D====eE--R add x0, x27, #1 +# CHECK-NEXT: [0,8] . D==eeeeER st2 { v1.d, v2.d }[0], [x27], x28 +# CHECK-NEXT: [0,9] . D===eE--R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -4451,13 +4451,13 @@ add x0, x27, 1 # CHECK-NEXT: 1. 1 2.0 0.0 2.0 add x0, x27, #1 # CHECK-NEXT: 2. 1 2.0 0.0 0.0 st2 { v1.s, v2.s }[0], [x27], #8 # CHECK-NEXT: 3. 1 3.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 3.0 0.0 0.0 st2 { v1.s, v2.s }[0], [x27], x28 -# CHECK-NEXT: 5. 1 4.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 4.0 0.0 0.0 st2 { v1.d, v2.d }[0], [x27], #16 +# CHECK-NEXT: 4. 1 2.0 0.0 0.0 st2 { v1.s, v2.s }[0], [x27], x28 +# CHECK-NEXT: 5. 1 3.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 3.0 0.0 0.0 st2 { v1.d, v2.d }[0], [x27], #16 # CHECK-NEXT: 7. 1 4.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 4.0 0.0 0.0 st2 { v1.d, v2.d }[0], [x27], x28 -# CHECK-NEXT: 9. 1 5.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 1 3.2 0.1 1.0 +# CHECK-NEXT: 8. 1 3.0 0.0 0.0 st2 { v1.d, v2.d }[0], [x27], x28 +# CHECK-NEXT: 9. 1 4.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 1 2.7 0.1 1.0 # CHECK: [72] Code Region - G73 @@ -4466,7 +4466,7 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 406 # CHECK-NEXT: Total uOps: 2000 -# CHECK: Dispatch Width: 15 +# CHECK: Dispatch Width: 8 # CHECK-NEXT: uOps Per Cycle: 4.93 # CHECK-NEXT: IPC: 1.48 # CHECK-NEXT: Block RThroughput: 3.5 @@ -4476,10 +4476,10 @@ add x0, x27, 1 # CHECK: [0,0] DeeeeeER . st3 { v1.2d, v2.2d, v3.2d }, [x27], #48 # CHECK-NEXT: [0,1] D=eE---R . add x0, x27, #1 -# CHECK-NEXT: [0,2] D=eeeeER . st3 { v1.2s, v2.2s, v3.2s }, [x27], #24 -# CHECK-NEXT: [0,3] D==eE--R . add x0, x27, #1 -# CHECK-NEXT: [0,4] .D==eeeeER st3 { v1.4h, v2.4h, v3.4h }, [x27], #24 -# CHECK-NEXT: [0,5] .D===eE--R add x0, x27, #1 +# CHECK-NEXT: [0,2] .DeeeeER . st3 { v1.2s, v2.2s, v3.2s }, [x27], #24 +# CHECK-NEXT: [0,3] .D=eE--R . add x0, x27, #1 +# CHECK-NEXT: [0,4] . D=eeeeER st3 { v1.4h, v2.4h, v3.4h }, [x27], #24 +# CHECK-NEXT: [0,5] . D==eE--R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -4490,11 +4490,11 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st3 { v1.2d, v2.2d, v3.2d }, [x27], #48 # CHECK-NEXT: 1. 1 2.0 0.0 3.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st3 { v1.2s, v2.2s, v3.2s }, [x27], #24 -# CHECK-NEXT: 3. 1 3.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 3.0 1.0 0.0 st3 { v1.4h, v2.4h, v3.4h }, [x27], #24 -# CHECK-NEXT: 5. 1 4.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 1 2.5 0.3 1.2 +# CHECK-NEXT: 2. 1 1.0 0.0 0.0 st3 { v1.2s, v2.2s, v3.2s }, [x27], #24 +# CHECK-NEXT: 3. 1 2.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 2.0 1.0 0.0 st3 { v1.4h, v2.4h, v3.4h }, [x27], #24 +# CHECK-NEXT: 5. 1 3.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.8 0.3 1.2 # CHECK: [73] Code Region - G74 @@ -4503,7 +4503,7 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 707 # CHECK-NEXT: Total uOps: 3800 -# CHECK: Dispatch Width: 15 +# CHECK: Dispatch Width: 8 # CHECK-NEXT: uOps Per Cycle: 5.37 # CHECK-NEXT: IPC: 1.41 # CHECK-NEXT: Block RThroughput: 7.0 @@ -4514,14 +4514,14 @@ add x0, x27, 1 # CHECK: [0,0] DeeeeeER . . st3 { v1.4s, v2.4s, v3.4s }, [x27], #48 # CHECK-NEXT: [0,1] D=eE---R . . add x0, x27, #1 -# CHECK-NEXT: [0,2] D=eeeeER . . st3 { v1.8b, v2.8b, v3.8b }, [x27], #24 -# CHECK-NEXT: [0,3] D==eE--R . . add x0, x27, #1 -# CHECK-NEXT: [0,4] .D==eeeeeER . st3 { v1.8h, v2.8h, v3.8h }, [x27], #48 -# CHECK-NEXT: [0,5] .D===eE---R . add x0, x27, #1 -# CHECK-NEXT: [0,6] .D===eeeeeER . st3 { v1.16b, v2.16b, v3.16b }, [x27], #48 -# CHECK-NEXT: [0,7] . D===eE---R . add x0, x27, #1 -# CHECK-NEXT: [0,8] . D====eeeeeER st3 { v1.2d, v2.2d, v3.2d }, [x27], x28 -# CHECK-NEXT: [0,9] . D=====eE---R add x0, x27, #1 +# CHECK-NEXT: [0,2] .DeeeeER . . st3 { v1.8b, v2.8b, v3.8b }, [x27], #24 +# CHECK-NEXT: [0,3] .D=eE--R . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . D=eeeeeER . st3 { v1.8h, v2.8h, v3.8h }, [x27], #48 +# CHECK-NEXT: [0,5] . D==eE---R . add x0, x27, #1 +# CHECK-NEXT: [0,6] . D=eeeeeER . st3 { v1.16b, v2.16b, v3.16b }, [x27], #48 +# CHECK-NEXT: [0,7] . D==eE---R . add x0, x27, #1 +# CHECK-NEXT: [0,8] . D==eeeeeER st3 { v1.2d, v2.2d, v3.2d }, [x27], x28 +# CHECK-NEXT: [0,9] . D===eE---R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -4532,15 +4532,15 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st3 { v1.4s, v2.4s, v3.4s }, [x27], #48 # CHECK-NEXT: 1. 1 2.0 0.0 3.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st3 { v1.8b, v2.8b, v3.8b }, [x27], #24 -# CHECK-NEXT: 3. 1 3.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 3.0 1.0 0.0 st3 { v1.8h, v2.8h, v3.8h }, [x27], #48 -# CHECK-NEXT: 5. 1 4.0 0.0 3.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 4.0 0.0 0.0 st3 { v1.16b, v2.16b, v3.16b }, [x27], #48 -# CHECK-NEXT: 7. 1 4.0 0.0 3.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 5.0 1.0 0.0 st3 { v1.2d, v2.2d, v3.2d }, [x27], x28 -# CHECK-NEXT: 9. 1 6.0 0.0 3.0 add x0, x27, #1 -# CHECK-NEXT: 1 3.4 0.3 1.4 +# CHECK-NEXT: 2. 1 1.0 0.0 0.0 st3 { v1.8b, v2.8b, v3.8b }, [x27], #24 +# CHECK-NEXT: 3. 1 2.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 2.0 1.0 0.0 st3 { v1.8h, v2.8h, v3.8h }, [x27], #48 +# CHECK-NEXT: 5. 1 3.0 0.0 3.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 2.0 0.0 0.0 st3 { v1.16b, v2.16b, v3.16b }, [x27], #48 +# CHECK-NEXT: 7. 1 3.0 0.0 3.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 3.0 1.0 0.0 st3 { v1.2d, v2.2d, v3.2d }, [x27], x28 +# CHECK-NEXT: 9. 1 4.0 0.0 3.0 add x0, x27, #1 +# CHECK-NEXT: 1 2.3 0.3 1.4 # CHECK: [74] Code Region - G75 @@ -4549,7 +4549,7 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 706 # CHECK-NEXT: Total uOps: 3400 -# CHECK: Dispatch Width: 15 +# CHECK: Dispatch Width: 8 # CHECK-NEXT: uOps Per Cycle: 4.82 # CHECK-NEXT: IPC: 1.42 # CHECK-NEXT: Block RThroughput: 6.0 @@ -4560,14 +4560,14 @@ add x0, x27, 1 # CHECK: [0,0] DeeeeER . . st3 { v1.2s, v2.2s, v3.2s }, [x27], x28 # CHECK-NEXT: [0,1] D=eE--R . . add x0, x27, #1 -# CHECK-NEXT: [0,2] D=eeeeER . . st3 { v1.4h, v2.4h, v3.4h }, [x27], x28 -# CHECK-NEXT: [0,3] D==eE--R . . add x0, x27, #1 -# CHECK-NEXT: [0,4] .D=eeeeeER. . st3 { v1.4s, v2.4s, v3.4s }, [x27], x28 -# CHECK-NEXT: [0,5] .D==eE---R. . add x0, x27, #1 -# CHECK-NEXT: [0,6] .D==eeeeER. . st3 { v1.8b, v2.8b, v3.8b }, [x27], x28 -# CHECK-NEXT: [0,7] .D===eE--R. . add x0, x27, #1 -# CHECK-NEXT: [0,8] . D===eeeeeER st3 { v1.8h, v2.8h, v3.8h }, [x27], x28 -# CHECK-NEXT: [0,9] . D====eE---R add x0, x27, #1 +# CHECK-NEXT: [0,2] .DeeeeER . . st3 { v1.4h, v2.4h, v3.4h }, [x27], x28 +# CHECK-NEXT: [0,3] .D=eE--R . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeeeeER. . st3 { v1.4s, v2.4s, v3.4s }, [x27], x28 +# CHECK-NEXT: [0,5] . D=eE---R. . add x0, x27, #1 +# CHECK-NEXT: [0,6] . DeeeeER. . st3 { v1.8b, v2.8b, v3.8b }, [x27], x28 +# CHECK-NEXT: [0,7] . D=eE--R. . add x0, x27, #1 +# CHECK-NEXT: [0,8] . D=eeeeeER st3 { v1.8h, v2.8h, v3.8h }, [x27], x28 +# CHECK-NEXT: [0,9] . D==eE---R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -4578,15 +4578,15 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st3 { v1.2s, v2.2s, v3.2s }, [x27], x28 # CHECK-NEXT: 1. 1 2.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st3 { v1.4h, v2.4h, v3.4h }, [x27], x28 -# CHECK-NEXT: 3. 1 3.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 2.0 0.0 0.0 st3 { v1.4s, v2.4s, v3.4s }, [x27], x28 -# CHECK-NEXT: 5. 1 3.0 0.0 3.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 3.0 0.0 0.0 st3 { v1.8b, v2.8b, v3.8b }, [x27], x28 -# CHECK-NEXT: 7. 1 4.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 4.0 1.0 0.0 st3 { v1.8h, v2.8h, v3.8h }, [x27], x28 -# CHECK-NEXT: 9. 1 5.0 0.0 3.0 add x0, x27, #1 -# CHECK-NEXT: 1 2.9 0.2 1.2 +# CHECK-NEXT: 2. 1 1.0 0.0 0.0 st3 { v1.4h, v2.4h, v3.4h }, [x27], x28 +# CHECK-NEXT: 3. 1 2.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 0.0 0.0 st3 { v1.4s, v2.4s, v3.4s }, [x27], x28 +# CHECK-NEXT: 5. 1 2.0 0.0 3.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 0.0 0.0 st3 { v1.8b, v2.8b, v3.8b }, [x27], x28 +# CHECK-NEXT: 7. 1 2.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 2.0 1.0 0.0 st3 { v1.8h, v2.8h, v3.8h }, [x27], x28 +# CHECK-NEXT: 9. 1 3.0 0.0 3.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.7 0.2 1.2 # CHECK: [75] Code Region - G76 @@ -4595,7 +4595,7 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 606 # CHECK-NEXT: Total uOps: 3200 -# CHECK: Dispatch Width: 15 +# CHECK: Dispatch Width: 8 # CHECK-NEXT: uOps Per Cycle: 5.28 # CHECK-NEXT: IPC: 1.65 # CHECK-NEXT: Block RThroughput: 5.5 @@ -4606,14 +4606,14 @@ add x0, x27, 1 # CHECK: [0,0] DeeeeeER .. st3 { v1.16b, v2.16b, v3.16b }, [x27], x28 # CHECK-NEXT: [0,1] D=eE---R .. add x0, x27, #1 -# CHECK-NEXT: [0,2] D=eeeeER .. st3 { v1.b, v2.b, v3.b }[0], [x27], #3 -# CHECK-NEXT: [0,3] D==eE--R .. add x0, x27, #1 -# CHECK-NEXT: [0,4] .D==eeeeER.. st3 { v1.b, v2.b, v3.b }[8], [x27], #3 -# CHECK-NEXT: [0,5] .D===eE--R.. add x0, x27, #1 -# CHECK-NEXT: [0,6] .D===eeeeER. st3 { v1.b, v2.b, v3.b }[0], [x27], x28 -# CHECK-NEXT: [0,7] .D====eE--R. add x0, x27, #1 -# CHECK-NEXT: [0,8] . D===eeeeER st3 { v1.b, v2.b, v3.b }[8], [x27], x28 -# CHECK-NEXT: [0,9] . D====eE--R add x0, x27, #1 +# CHECK-NEXT: [0,2] .DeeeeER .. st3 { v1.b, v2.b, v3.b }[0], [x27], #3 +# CHECK-NEXT: [0,3] .D=eE--R .. add x0, x27, #1 +# CHECK-NEXT: [0,4] . D=eeeeER.. st3 { v1.b, v2.b, v3.b }[8], [x27], #3 +# CHECK-NEXT: [0,5] . D==eE--R.. add x0, x27, #1 +# CHECK-NEXT: [0,6] . D=eeeeER. st3 { v1.b, v2.b, v3.b }[0], [x27], x28 +# CHECK-NEXT: [0,7] . D==eE--R. add x0, x27, #1 +# CHECK-NEXT: [0,8] . D=eeeeER st3 { v1.b, v2.b, v3.b }[8], [x27], x28 +# CHECK-NEXT: [0,9] . D==eE--R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -4624,15 +4624,15 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st3 { v1.16b, v2.16b, v3.16b }, [x27], x28 # CHECK-NEXT: 1. 1 2.0 0.0 3.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st3 { v1.b, v2.b, v3.b }[0], [x27], #3 -# CHECK-NEXT: 3. 1 3.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 3.0 1.0 0.0 st3 { v1.b, v2.b, v3.b }[8], [x27], #3 -# CHECK-NEXT: 5. 1 4.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 4.0 0.0 0.0 st3 { v1.b, v2.b, v3.b }[0], [x27], x28 -# CHECK-NEXT: 7. 1 5.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 4.0 0.0 0.0 st3 { v1.b, v2.b, v3.b }[8], [x27], x28 -# CHECK-NEXT: 9. 1 5.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 1 3.3 0.2 1.1 +# CHECK-NEXT: 2. 1 1.0 0.0 0.0 st3 { v1.b, v2.b, v3.b }[0], [x27], #3 +# CHECK-NEXT: 3. 1 2.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 2.0 1.0 0.0 st3 { v1.b, v2.b, v3.b }[8], [x27], #3 +# CHECK-NEXT: 5. 1 3.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 2.0 0.0 0.0 st3 { v1.b, v2.b, v3.b }[0], [x27], x28 +# CHECK-NEXT: 7. 1 3.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 2.0 0.0 0.0 st3 { v1.b, v2.b, v3.b }[8], [x27], x28 +# CHECK-NEXT: 9. 1 3.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 1 2.1 0.2 1.1 # CHECK: [76] Code Region - G77 @@ -4641,7 +4641,7 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 506 # CHECK-NEXT: Total uOps: 3000 -# CHECK: Dispatch Width: 15 +# CHECK: Dispatch Width: 8 # CHECK-NEXT: uOps Per Cycle: 5.93 # CHECK-NEXT: IPC: 1.98 # CHECK-NEXT: Block RThroughput: 5.0 @@ -4652,14 +4652,14 @@ add x0, x27, 1 # CHECK: [0,0] DeeeeER . st3 { v1.h, v2.h, v3.h }[0], [x27], #6 # CHECK-NEXT: [0,1] D=eE--R . add x0, x27, #1 -# CHECK-NEXT: [0,2] D=eeeeER . st3 { v1.h, v2.h, v3.h }[4], [x27], #6 -# CHECK-NEXT: [0,3] D==eE--R . add x0, x27, #1 -# CHECK-NEXT: [0,4] .D=eeeeER . st3 { v1.h, v2.h, v3.h }[0], [x27], x28 -# CHECK-NEXT: [0,5] .D==eE--R . add x0, x27, #1 -# CHECK-NEXT: [0,6] .D==eeeeER. st3 { v1.h, v2.h, v3.h }[4], [x27], x28 -# CHECK-NEXT: [0,7] .D===eE--R. add x0, x27, #1 -# CHECK-NEXT: [0,8] . D==eeeeER st3 { v1.s, v2.s, v3.s }[0], [x27], #12 -# CHECK-NEXT: [0,9] . D===eE--R add x0, x27, #1 +# CHECK-NEXT: [0,2] .DeeeeER . st3 { v1.h, v2.h, v3.h }[4], [x27], #6 +# CHECK-NEXT: [0,3] .D=eE--R . add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeeeER . st3 { v1.h, v2.h, v3.h }[0], [x27], x28 +# CHECK-NEXT: [0,5] . D=eE--R . add x0, x27, #1 +# CHECK-NEXT: [0,6] . DeeeeER. st3 { v1.h, v2.h, v3.h }[4], [x27], x28 +# CHECK-NEXT: [0,7] . D=eE--R. add x0, x27, #1 +# CHECK-NEXT: [0,8] . DeeeeER st3 { v1.s, v2.s, v3.s }[0], [x27], #12 +# CHECK-NEXT: [0,9] . D=eE--R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -4670,15 +4670,15 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st3 { v1.h, v2.h, v3.h }[0], [x27], #6 # CHECK-NEXT: 1. 1 2.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st3 { v1.h, v2.h, v3.h }[4], [x27], #6 -# CHECK-NEXT: 3. 1 3.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 2.0 0.0 0.0 st3 { v1.h, v2.h, v3.h }[0], [x27], x28 -# CHECK-NEXT: 5. 1 3.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 3.0 0.0 0.0 st3 { v1.h, v2.h, v3.h }[4], [x27], x28 -# CHECK-NEXT: 7. 1 4.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 3.0 0.0 0.0 st3 { v1.s, v2.s, v3.s }[0], [x27], #12 -# CHECK-NEXT: 9. 1 4.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 1 2.7 0.1 1.0 +# CHECK-NEXT: 2. 1 1.0 0.0 0.0 st3 { v1.h, v2.h, v3.h }[4], [x27], #6 +# CHECK-NEXT: 3. 1 2.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 0.0 0.0 st3 { v1.h, v2.h, v3.h }[0], [x27], x28 +# CHECK-NEXT: 5. 1 2.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 0.0 0.0 st3 { v1.h, v2.h, v3.h }[4], [x27], x28 +# CHECK-NEXT: 7. 1 2.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 0.0 0.0 st3 { v1.s, v2.s, v3.s }[0], [x27], #12 +# CHECK-NEXT: 9. 1 2.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.5 0.1 1.0 # CHECK: [77] Code Region - G78 @@ -4687,25 +4687,25 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 706 # CHECK-NEXT: Total uOps: 3600 -# CHECK: Dispatch Width: 15 +# CHECK: Dispatch Width: 8 # CHECK-NEXT: uOps Per Cycle: 5.10 # CHECK-NEXT: IPC: 1.42 # CHECK-NEXT: Block RThroughput: 6.5 # CHECK: Timeline view: -# CHECK-NEXT: 012 +# CHECK-NEXT: 0123 # CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeeER . . st3 { v1.s, v2.s, v3.s }[0], [x27], x28 -# CHECK-NEXT: [0,1] D=eE--R . . add x0, x27, #1 -# CHECK-NEXT: [0,2] D=eeeeER . . st3 { v1.d, v2.d, v3.d }[0], [x27], #24 -# CHECK-NEXT: [0,3] D==eE--R . . add x0, x27, #1 -# CHECK-NEXT: [0,4] .D=eeeeER . . st3 { v1.d, v2.d, v3.d }[0], [x27], x28 -# CHECK-NEXT: [0,5] .D==eE--R . . add x0, x27, #1 -# CHECK-NEXT: [0,6] .D==eeeeER. . st4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64 -# CHECK-NEXT: [0,7] . D==eE--R. . add x0, x27, #1 -# CHECK-NEXT: [0,8] . D==eeeeeeER st4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32 -# CHECK-NEXT: [0,9] . D===eE----R add x0, x27, #1 +# CHECK: [0,0] DeeeeER . . st3 { v1.s, v2.s, v3.s }[0], [x27], x28 +# CHECK-NEXT: [0,1] D=eE--R . . add x0, x27, #1 +# CHECK-NEXT: [0,2] .DeeeeER . . st3 { v1.d, v2.d, v3.d }[0], [x27], #24 +# CHECK-NEXT: [0,3] .D=eE--R . . add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeeeER . . st3 { v1.d, v2.d, v3.d }[0], [x27], x28 +# CHECK-NEXT: [0,5] . D=eE--R . . add x0, x27, #1 +# CHECK-NEXT: [0,6] . DeeeeER. . st4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64 +# CHECK-NEXT: [0,7] . DeE--R. . add x0, x27, #1 +# CHECK-NEXT: [0,8] . DeeeeeeER st4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32 +# CHECK-NEXT: [0,9] . D=eE----R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -4716,15 +4716,15 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st3 { v1.s, v2.s, v3.s }[0], [x27], x28 # CHECK-NEXT: 1. 1 2.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st3 { v1.d, v2.d, v3.d }[0], [x27], #24 -# CHECK-NEXT: 3. 1 3.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 2.0 0.0 0.0 st3 { v1.d, v2.d, v3.d }[0], [x27], x28 -# CHECK-NEXT: 5. 1 3.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 3.0 0.0 0.0 st4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64 -# CHECK-NEXT: 7. 1 3.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 3.0 0.0 0.0 st4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32 -# CHECK-NEXT: 9. 1 4.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 1 2.6 0.1 1.2 +# CHECK-NEXT: 2. 1 1.0 0.0 0.0 st3 { v1.d, v2.d, v3.d }[0], [x27], #24 +# CHECK-NEXT: 3. 1 2.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 0.0 0.0 st3 { v1.d, v2.d, v3.d }[0], [x27], x28 +# CHECK-NEXT: 5. 1 2.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 0.0 0.0 st4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64 +# CHECK-NEXT: 7. 1 1.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 1.0 0.0 st4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32 +# CHECK-NEXT: 9. 1 2.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.4 0.2 1.2 # CHECK: [78] Code Region - G79 @@ -4733,7 +4733,7 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 1205 # CHECK-NEXT: Total uOps: 5800 -# CHECK: Dispatch Width: 15 +# CHECK: Dispatch Width: 8 # CHECK-NEXT: uOps Per Cycle: 4.81 # CHECK-NEXT: IPC: 0.83 # CHECK-NEXT: Block RThroughput: 12.0 @@ -4745,13 +4745,13 @@ add x0, x27, 1 # CHECK: [0,0] DeeeeeeER . .. st4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32 # CHECK-NEXT: [0,1] D=eE----R . .. add x0, x27, #1 # CHECK-NEXT: [0,2] .DeeeeeeeER .. st4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64 -# CHECK-NEXT: [0,3] .D=eE-----R .. add x0, x27, #1 -# CHECK-NEXT: [0,4] . D=eeeeeeER .. st4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32 -# CHECK-NEXT: [0,5] . D==eE----R .. add x0, x27, #1 -# CHECK-NEXT: [0,6] . D===eeeeeeeER. st4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64 -# CHECK-NEXT: [0,7] . D====eE-----R. add x0, x27, #1 -# CHECK-NEXT: [0,8] . D===eeeeeeeER st4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64 -# CHECK-NEXT: [0,9] . D====eE-----R add x0, x27, #1 +# CHECK-NEXT: [0,3] . DeE-----R .. add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeeeeeER .. st4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32 +# CHECK-NEXT: [0,5] . D=eE----R .. add x0, x27, #1 +# CHECK-NEXT: [0,6] . D==eeeeeeeER. st4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64 +# CHECK-NEXT: [0,7] . D==eE-----R. add x0, x27, #1 +# CHECK-NEXT: [0,8] . .D=eeeeeeeER st4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64 +# CHECK-NEXT: [0,9] . . D=eE-----R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -4763,14 +4763,14 @@ add x0, x27, 1 # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32 # CHECK-NEXT: 1. 1 2.0 0.0 4.0 add x0, x27, #1 # CHECK-NEXT: 2. 1 1.0 0.0 0.0 st4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64 -# CHECK-NEXT: 3. 1 2.0 0.0 5.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 2.0 1.0 0.0 st4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32 -# CHECK-NEXT: 5. 1 3.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 4.0 2.0 0.0 st4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64 -# CHECK-NEXT: 7. 1 5.0 0.0 5.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 4.0 0.0 0.0 st4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64 -# CHECK-NEXT: 9. 1 5.0 0.0 5.0 add x0, x27, #1 -# CHECK-NEXT: 1 2.9 0.4 2.3 +# CHECK-NEXT: 3. 1 1.0 0.0 5.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 1.0 0.0 st4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32 +# CHECK-NEXT: 5. 1 2.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 3.0 2.0 0.0 st4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64 +# CHECK-NEXT: 7. 1 3.0 0.0 5.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 2.0 0.0 0.0 st4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64 +# CHECK-NEXT: 9. 1 2.0 0.0 5.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.8 0.4 2.3 # CHECK: [79] Code Region - G80 @@ -4779,7 +4779,7 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 1006 # CHECK-NEXT: Total uOps: 4800 -# CHECK: Dispatch Width: 15 +# CHECK: Dispatch Width: 8 # CHECK-NEXT: uOps Per Cycle: 4.77 # CHECK-NEXT: IPC: 0.99 # CHECK-NEXT: Block RThroughput: 9.5 @@ -4789,15 +4789,15 @@ add x0, x27, 1 # CHECK-NEXT: Index 0123456789 # CHECK: [0,0] DeeeeER . . st4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28 -# CHECK-NEXT: [0,1] D=eE--R . . add x0, x27, #1 -# CHECK-NEXT: [0,2] .DeeeeeeER. . st4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28 -# CHECK-NEXT: [0,3] .D=eE----R. . add x0, x27, #1 -# CHECK-NEXT: [0,4] .D===eeeeeeER . st4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28 -# CHECK-NEXT: [0,5] . D===eE----R . add x0, x27, #1 -# CHECK-NEXT: [0,6] . D===eeeeeeeER. st4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28 -# CHECK-NEXT: [0,7] . D====eE-----R. add x0, x27, #1 -# CHECK-NEXT: [0,8] . D====eeeeeeER st4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28 -# CHECK-NEXT: [0,9] . D=====eE----R add x0, x27, #1 +# CHECK-NEXT: [0,1] .DeE--R . . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeeeeeER . st4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28 +# CHECK-NEXT: [0,3] . D=eE----R . add x0, x27, #1 +# CHECK-NEXT: [0,4] . D=eeeeeeER . st4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28 +# CHECK-NEXT: [0,5] . D==eE----R . add x0, x27, #1 +# CHECK-NEXT: [0,6] . D=eeeeeeeER. st4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28 +# CHECK-NEXT: [0,7] . D=eE-----R. add x0, x27, #1 +# CHECK-NEXT: [0,8] . .D=eeeeeeER st4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28 +# CHECK-NEXT: [0,9] . .D==eE----R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -4807,43 +4807,43 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28 -# CHECK-NEXT: 1. 1 2.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 1.0 0.0 0.0 st4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28 +# CHECK-NEXT: 1. 1 1.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 1.0 1.0 0.0 st4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28 # CHECK-NEXT: 3. 1 2.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 4.0 2.0 0.0 st4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28 -# CHECK-NEXT: 5. 1 4.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 4.0 0.0 0.0 st4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28 -# CHECK-NEXT: 7. 1 5.0 0.0 5.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 5.0 1.0 0.0 st4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28 -# CHECK-NEXT: 9. 1 6.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 1 3.4 0.4 1.9 +# CHECK-NEXT: 4. 1 2.0 1.0 0.0 st4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28 +# CHECK-NEXT: 5. 1 3.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 2.0 0.0 0.0 st4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28 +# CHECK-NEXT: 7. 1 2.0 0.0 5.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 2.0 1.0 0.0 st4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28 +# CHECK-NEXT: 9. 1 3.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.9 0.4 1.9 # CHECK: [80] Code Region - G81 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 807 +# CHECK-NEXT: Total Cycles: 808 # CHECK-NEXT: Total uOps: 5200 -# CHECK: Dispatch Width: 15 +# CHECK: Dispatch Width: 8 # CHECK-NEXT: uOps Per Cycle: 6.44 # CHECK-NEXT: IPC: 1.24 -# CHECK-NEXT: Block RThroughput: 6.0 +# CHECK-NEXT: Block RThroughput: 6.5 # CHECK: Timeline view: -# CHECK-NEXT: 01234 +# CHECK-NEXT: 012345 # CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeeeeeER. . st4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28 -# CHECK-NEXT: [0,1] D=eE-----R. . add x0, x27, #1 -# CHECK-NEXT: [0,2] .DeeeeeeeER . st4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28 -# CHECK-NEXT: [0,3] .D=eE-----R . add x0, x27, #1 -# CHECK-NEXT: [0,4] . DeeeeeeER . st4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], #4 -# CHECK-NEXT: [0,5] . D=eE----R . add x0, x27, #1 -# CHECK-NEXT: [0,6] . D===eeeeeeER. st4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], #4 -# CHECK-NEXT: [0,7] . D===eE----R. add x0, x27, #1 -# CHECK-NEXT: [0,8] . D===eeeeeeER st4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], x28 -# CHECK-NEXT: [0,9] . D====eE----R add x0, x27, #1 +# CHECK: [0,0] DeeeeeeeER. . st4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28 +# CHECK-NEXT: [0,1] .DeE-----R. . add x0, x27, #1 +# CHECK-NEXT: [0,2] . DeeeeeeeER . st4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28 +# CHECK-NEXT: [0,3] . DeE-----R . add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeeeeeER . st4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], #4 +# CHECK-NEXT: [0,5] . D=eE----R . add x0, x27, #1 +# CHECK-NEXT: [0,6] . D=eeeeeeER. st4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], #4 +# CHECK-NEXT: [0,7] . D==eE----R. add x0, x27, #1 +# CHECK-NEXT: [0,8] . .D=eeeeeeER st4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], x28 +# CHECK-NEXT: [0,9] . .D==eE----R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -4853,16 +4853,16 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28 -# CHECK-NEXT: 1. 1 2.0 0.0 5.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 1.0 0.0 0.0 st4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28 -# CHECK-NEXT: 3. 1 2.0 0.0 5.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 1.0 0.0 0.0 st4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], #4 +# CHECK-NEXT: 1. 1 1.0 0.0 5.0 add x0, x27, #1 +# CHECK-NEXT: 2. 1 1.0 1.0 0.0 st4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28 +# CHECK-NEXT: 3. 1 1.0 0.0 5.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 1.0 1.0 0.0 st4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], #4 # CHECK-NEXT: 5. 1 2.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 4.0 2.0 0.0 st4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], #4 -# CHECK-NEXT: 7. 1 4.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 4.0 0.0 0.0 st4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], x28 -# CHECK-NEXT: 9. 1 5.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 1 2.6 0.3 2.2 +# CHECK-NEXT: 6. 1 2.0 1.0 0.0 st4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], #4 +# CHECK-NEXT: 7. 1 3.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 2.0 0.0 0.0 st4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], x28 +# CHECK-NEXT: 9. 1 3.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.7 0.4 2.2 # CHECK: [81] Code Region - G82 @@ -4871,7 +4871,7 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 508 # CHECK-NEXT: Total uOps: 4000 -# CHECK: Dispatch Width: 15 +# CHECK: Dispatch Width: 8 # CHECK-NEXT: uOps Per Cycle: 7.87 # CHECK-NEXT: IPC: 1.97 # CHECK-NEXT: Block RThroughput: 5.0 @@ -4882,14 +4882,14 @@ add x0, x27, 1 # CHECK: [0,0] DeeeeeeER . . st4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], x28 # CHECK-NEXT: [0,1] D=eE----R . . add x0, x27, #1 -# CHECK-NEXT: [0,2] D=eeeeeeER. . st4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], #8 +# CHECK-NEXT: [0,2] .DeeeeeeER. . st4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], #8 # CHECK-NEXT: [0,3] .D=eE----R. . add x0, x27, #1 -# CHECK-NEXT: [0,4] .D=eeeeeeER . st4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], #8 -# CHECK-NEXT: [0,5] .D==eE----R . add x0, x27, #1 -# CHECK-NEXT: [0,6] . D=eeeeeeER. st4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], x28 -# CHECK-NEXT: [0,7] . D==eE----R. add x0, x27, #1 -# CHECK-NEXT: [0,8] . D==eeeeeeER st4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], x28 -# CHECK-NEXT: [0,9] . D==eE----R add x0, x27, #1 +# CHECK-NEXT: [0,4] . DeeeeeeER . st4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], #8 +# CHECK-NEXT: [0,5] . D=eE----R . add x0, x27, #1 +# CHECK-NEXT: [0,6] . DeeeeeeER. st4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], x28 +# CHECK-NEXT: [0,7] . D=eE----R. add x0, x27, #1 +# CHECK-NEXT: [0,8] . DeeeeeeER st4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], x28 +# CHECK-NEXT: [0,9] . D=eE----R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -4900,15 +4900,15 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], x28 # CHECK-NEXT: 1. 1 2.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], #8 +# CHECK-NEXT: 2. 1 1.0 0.0 0.0 st4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], #8 # CHECK-NEXT: 3. 1 2.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 2.0 0.0 0.0 st4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], #8 -# CHECK-NEXT: 5. 1 3.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 2.0 0.0 0.0 st4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], x28 -# CHECK-NEXT: 7. 1 3.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 3.0 0.0 0.0 st4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], x28 -# CHECK-NEXT: 9. 1 3.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 1 2.3 0.1 2.0 +# CHECK-NEXT: 4. 1 1.0 0.0 0.0 st4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], #8 +# CHECK-NEXT: 5. 1 2.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 1.0 0.0 0.0 st4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], x28 +# CHECK-NEXT: 7. 1 2.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 1.0 0.0 0.0 st4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], x28 +# CHECK-NEXT: 9. 1 2.0 0.0 4.0 add x0, x27, #1 +# CHECK-NEXT: 1 1.5 0.1 2.0 # CHECK: [82] Code Region - G83 @@ -4917,10 +4917,10 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 506 # CHECK-NEXT: Total uOps: 2800 -# CHECK: Dispatch Width: 15 +# CHECK: Dispatch Width: 8 # CHECK-NEXT: uOps Per Cycle: 5.53 # CHECK-NEXT: IPC: 1.58 -# CHECK-NEXT: Block RThroughput: 2.0 +# CHECK-NEXT: Block RThroughput: 3.5 # CHECK: Timeline view: # CHECK-NEXT: 0 @@ -4928,12 +4928,12 @@ add x0, x27, 1 # CHECK: [0,0] DeeeeeeER . st4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], #16 # CHECK-NEXT: [0,1] D=eE----R . add x0, x27, #1 -# CHECK-NEXT: [0,2] D=eeeeeeER. st4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], x28 +# CHECK-NEXT: [0,2] .DeeeeeeER. st4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], x28 # CHECK-NEXT: [0,3] .D=eE----R. add x0, x27, #1 -# CHECK-NEXT: [0,4] .D==eeeeER. st4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], #32 -# CHECK-NEXT: [0,5] .D===eE--R. add x0, x27, #1 -# CHECK-NEXT: [0,6] .D===eeeeER st4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], x28 -# CHECK-NEXT: [0,7] .D====eE--R add x0, x27, #1 +# CHECK-NEXT: [0,4] . D=eeeeER. st4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], #32 +# CHECK-NEXT: [0,5] . D==eE--R. add x0, x27, #1 +# CHECK-NEXT: [0,6] . D=eeeeER st4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], x28 +# CHECK-NEXT: [0,7] . D==eE--R add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -4944,13 +4944,13 @@ add x0, x27, 1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], #16 # CHECK-NEXT: 1. 1 2.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], x28 +# CHECK-NEXT: 2. 1 1.0 0.0 0.0 st4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], x28 # CHECK-NEXT: 3. 1 2.0 0.0 4.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 3.0 1.0 0.0 st4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], #32 -# CHECK-NEXT: 5. 1 4.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 4.0 0.0 0.0 st4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], x28 -# CHECK-NEXT: 7. 1 5.0 0.0 2.0 add x0, x27, #1 -# CHECK-NEXT: 1 2.9 0.3 1.5 +# CHECK-NEXT: 4. 1 2.0 1.0 0.0 st4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], #32 +# CHECK-NEXT: 5. 1 3.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 2.0 0.0 0.0 st4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], x28 +# CHECK-NEXT: 7. 1 3.0 0.0 2.0 add x0, x27, #1 +# CHECK-NEXT: 1 2.0 0.3 1.5 # CHECK: [83] Code Region - G84 @@ -4959,7 +4959,7 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 204 # CHECK-NEXT: Total uOps: 800 -# CHECK: Dispatch Width: 15 +# CHECK: Dispatch Width: 8 # CHECK-NEXT: uOps Per Cycle: 3.92 # CHECK-NEXT: IPC: 1.96 # CHECK-NEXT: Block RThroughput: 1.0 @@ -4992,7 +4992,7 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 504 # CHECK-NEXT: Total uOps: 2200 -# CHECK: Dispatch Width: 15 +# CHECK: Dispatch Width: 8 # CHECK-NEXT: uOps Per Cycle: 4.37 # CHECK-NEXT: IPC: 1.98 # CHECK-NEXT: Block RThroughput: 3.5 @@ -5003,13 +5003,13 @@ add x0, x27, 1 # CHECK: [0,0] DeeER. . stp q1, q2, [x27], #992 # CHECK-NEXT: [0,1] D=eER. . add x0, x27, #1 # CHECK-NEXT: [0,2] D=eeER . stp s1, s2, [x27, #248]! -# CHECK-NEXT: [0,3] D==eER . add x0, x27, #1 -# CHECK-NEXT: [0,4] D==eeER . stp d1, d2, [x27, #496]! -# CHECK-NEXT: [0,5] D===eER . add x0, x27, #1 -# CHECK-NEXT: [0,6] .D==eeER. stp q1, q2, [x27, #992]! -# CHECK-NEXT: [0,7] .D===eER. add x0, x27, #1 -# CHECK-NEXT: [0,8] .D===eER. stp w1, w2, [x27], #248 -# CHECK-NEXT: [0,9] .D====eER add x0, x27, #1 +# CHECK-NEXT: [0,3] .D=eER . add x0, x27, #1 +# CHECK-NEXT: [0,4] .D=eeER . stp d1, d2, [x27, #496]! +# CHECK-NEXT: [0,5] .D==eER . add x0, x27, #1 +# CHECK-NEXT: [0,6] . D=eeER. stp q1, q2, [x27, #992]! +# CHECK-NEXT: [0,7] . D==eER. add x0, x27, #1 +# CHECK-NEXT: [0,8] . D==eER. stp w1, w2, [x27], #248 +# CHECK-NEXT: [0,9] . D==eER add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -5021,14 +5021,14 @@ add x0, x27, 1 # CHECK-NEXT: 0. 1 1.0 1.0 0.0 stp q1, q2, [x27], #992 # CHECK-NEXT: 1. 1 2.0 0.0 0.0 add x0, x27, #1 # CHECK-NEXT: 2. 1 2.0 0.0 0.0 stp s1, s2, [x27, #248]! -# CHECK-NEXT: 3. 1 3.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 3.0 0.0 0.0 stp d1, d2, [x27, #496]! -# CHECK-NEXT: 5. 1 4.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 3.0 0.0 0.0 stp q1, q2, [x27, #992]! -# CHECK-NEXT: 7. 1 4.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 4.0 0.0 0.0 stp w1, w2, [x27], #248 -# CHECK-NEXT: 9. 1 5.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 3.1 0.1 0.0 +# CHECK-NEXT: 3. 1 2.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 4. 1 2.0 0.0 0.0 stp d1, d2, [x27, #496]! +# CHECK-NEXT: 5. 1 3.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 2.0 0.0 0.0 stp q1, q2, [x27, #992]! +# CHECK-NEXT: 7. 1 3.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 8. 1 3.0 0.0 0.0 stp w1, w2, [x27], #248 +# CHECK-NEXT: 9. 1 3.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 1 2.3 0.1 0.0 # CHECK: [85] Code Region - G86 @@ -5037,7 +5037,7 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 504 # CHECK-NEXT: Total uOps: 2000 -# CHECK: Dispatch Width: 15 +# CHECK: Dispatch Width: 8 # CHECK-NEXT: uOps Per Cycle: 3.97 # CHECK-NEXT: IPC: 1.98 # CHECK-NEXT: Block RThroughput: 2.5 @@ -5049,12 +5049,12 @@ add x0, x27, 1 # CHECK-NEXT: [0,1] D=eER. . add x0, x27, #1 # CHECK-NEXT: [0,2] D=eER. . stp w1, w2, [x27, #248]! # CHECK-NEXT: [0,3] D==eER . add x0, x27, #1 -# CHECK-NEXT: [0,4] D==eER . stp x1, x2, [x27, #496]! -# CHECK-NEXT: [0,5] D===eER . add x0, x27, #1 -# CHECK-NEXT: [0,6] D===eeER. str b1, [x27], #254 +# CHECK-NEXT: [0,4] .D=eER . stp x1, x2, [x27, #496]! +# CHECK-NEXT: [0,5] .D==eER . add x0, x27, #1 +# CHECK-NEXT: [0,6] .D==eeER. str b1, [x27], #254 # CHECK-NEXT: [0,7] .D===eER. add x0, x27, #1 -# CHECK-NEXT: [0,8] .D===eeER str h1, [x27], #254 -# CHECK-NEXT: [0,9] .D====eER add x0, x27, #1 +# CHECK-NEXT: [0,8] . D==eeER str h1, [x27], #254 +# CHECK-NEXT: [0,9] . D===eER add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -5067,13 +5067,13 @@ add x0, x27, 1 # CHECK-NEXT: 1. 1 2.0 0.0 0.0 add x0, x27, #1 # CHECK-NEXT: 2. 1 2.0 0.0 0.0 stp w1, w2, [x27, #248]! # CHECK-NEXT: 3. 1 3.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 3.0 0.0 0.0 stp x1, x2, [x27, #496]! -# CHECK-NEXT: 5. 1 4.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 4.0 0.0 0.0 str b1, [x27], #254 +# CHECK-NEXT: 4. 1 2.0 0.0 0.0 stp x1, x2, [x27, #496]! +# CHECK-NEXT: 5. 1 3.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 3.0 0.0 0.0 str b1, [x27], #254 # CHECK-NEXT: 7. 1 4.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 4.0 0.0 0.0 str h1, [x27], #254 -# CHECK-NEXT: 9. 1 5.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 3.2 0.1 0.0 +# CHECK-NEXT: 8. 1 3.0 0.0 0.0 str h1, [x27], #254 +# CHECK-NEXT: 9. 1 4.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 1 2.7 0.1 0.0 # CHECK: [86] Code Region - G87 @@ -5082,7 +5082,7 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 504 # CHECK-NEXT: Total uOps: 2000 -# CHECK: Dispatch Width: 15 +# CHECK: Dispatch Width: 8 # CHECK-NEXT: uOps Per Cycle: 3.97 # CHECK-NEXT: IPC: 1.98 # CHECK-NEXT: Block RThroughput: 2.5 @@ -5094,12 +5094,12 @@ add x0, x27, 1 # CHECK-NEXT: [0,1] D=eER. . add x0, x27, #1 # CHECK-NEXT: [0,2] D=eeER . str d1, [x27], #254 # CHECK-NEXT: [0,3] D==eER . add x0, x27, #1 -# CHECK-NEXT: [0,4] D==eeER . str q1, [x27], #254 -# CHECK-NEXT: [0,5] D===eER . add x0, x27, #1 -# CHECK-NEXT: [0,6] D===eeER. str b1, [x27, #254]! +# CHECK-NEXT: [0,4] .D=eeER . str q1, [x27], #254 +# CHECK-NEXT: [0,5] .D==eER . add x0, x27, #1 +# CHECK-NEXT: [0,6] .D==eeER. str b1, [x27, #254]! # CHECK-NEXT: [0,7] .D===eER. add x0, x27, #1 -# CHECK-NEXT: [0,8] .D===eeER str h1, [x27, #254]! -# CHECK-NEXT: [0,9] .D====eER add x0, x27, #1 +# CHECK-NEXT: [0,8] . D==eeER str h1, [x27, #254]! +# CHECK-NEXT: [0,9] . D===eER add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -5112,13 +5112,13 @@ add x0, x27, 1 # CHECK-NEXT: 1. 1 2.0 0.0 0.0 add x0, x27, #1 # CHECK-NEXT: 2. 1 2.0 0.0 0.0 str d1, [x27], #254 # CHECK-NEXT: 3. 1 3.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 3.0 0.0 0.0 str q1, [x27], #254 -# CHECK-NEXT: 5. 1 4.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 4.0 0.0 0.0 str b1, [x27, #254]! +# CHECK-NEXT: 4. 1 2.0 0.0 0.0 str q1, [x27], #254 +# CHECK-NEXT: 5. 1 3.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 3.0 0.0 0.0 str b1, [x27, #254]! # CHECK-NEXT: 7. 1 4.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 4.0 0.0 0.0 str h1, [x27, #254]! -# CHECK-NEXT: 9. 1 5.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 3.2 0.1 0.0 +# CHECK-NEXT: 8. 1 3.0 0.0 0.0 str h1, [x27, #254]! +# CHECK-NEXT: 9. 1 4.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 1 2.7 0.1 0.0 # CHECK: [87] Code Region - G88 @@ -5127,7 +5127,7 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 504 # CHECK-NEXT: Total uOps: 2000 -# CHECK: Dispatch Width: 15 +# CHECK: Dispatch Width: 8 # CHECK-NEXT: uOps Per Cycle: 3.97 # CHECK-NEXT: IPC: 1.98 # CHECK-NEXT: Block RThroughput: 2.5 @@ -5139,12 +5139,12 @@ add x0, x27, 1 # CHECK-NEXT: [0,1] D=eER. . add x0, x27, #1 # CHECK-NEXT: [0,2] D=eeER . str d1, [x27, #254]! # CHECK-NEXT: [0,3] D==eER . add x0, x27, #1 -# CHECK-NEXT: [0,4] D==eeER . str q1, [x27, #254]! -# CHECK-NEXT: [0,5] D===eER . add x0, x27, #1 -# CHECK-NEXT: [0,6] D===eER . str w1, [x27], #254 +# CHECK-NEXT: [0,4] .D=eeER . str q1, [x27, #254]! +# CHECK-NEXT: [0,5] .D==eER . add x0, x27, #1 +# CHECK-NEXT: [0,6] .D==eER . str w1, [x27], #254 # CHECK-NEXT: [0,7] .D===eER. add x0, x27, #1 -# CHECK-NEXT: [0,8] .D===eER. str x1, [x27], #254 -# CHECK-NEXT: [0,9] .D====eER add x0, x27, #1 +# CHECK-NEXT: [0,8] . D==eER. str x1, [x27], #254 +# CHECK-NEXT: [0,9] . D===eER add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -5157,13 +5157,13 @@ add x0, x27, 1 # CHECK-NEXT: 1. 1 2.0 0.0 0.0 add x0, x27, #1 # CHECK-NEXT: 2. 1 2.0 0.0 0.0 str d1, [x27, #254]! # CHECK-NEXT: 3. 1 3.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 3.0 0.0 0.0 str q1, [x27, #254]! -# CHECK-NEXT: 5. 1 4.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 4.0 0.0 0.0 str w1, [x27], #254 +# CHECK-NEXT: 4. 1 2.0 0.0 0.0 str q1, [x27, #254]! +# CHECK-NEXT: 5. 1 3.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 3.0 0.0 0.0 str w1, [x27], #254 # CHECK-NEXT: 7. 1 4.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 4.0 0.0 0.0 str x1, [x27], #254 -# CHECK-NEXT: 9. 1 5.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 3.2 0.1 0.0 +# CHECK-NEXT: 8. 1 3.0 0.0 0.0 str x1, [x27], #254 +# CHECK-NEXT: 9. 1 4.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 1 2.7 0.1 0.0 # CHECK: [88] Code Region - G89 @@ -5172,7 +5172,7 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 504 # CHECK-NEXT: Total uOps: 2000 -# CHECK: Dispatch Width: 15 +# CHECK: Dispatch Width: 8 # CHECK-NEXT: uOps Per Cycle: 3.97 # CHECK-NEXT: IPC: 1.98 # CHECK-NEXT: Block RThroughput: 2.5 @@ -5184,12 +5184,12 @@ add x0, x27, 1 # CHECK-NEXT: [0,1] D=eER. . add x0, x27, #1 # CHECK-NEXT: [0,2] D=eER. . str x1, [x27, #254]! # CHECK-NEXT: [0,3] D==eER . add x0, x27, #1 -# CHECK-NEXT: [0,4] D==eER . strb w1, [x27], #254 -# CHECK-NEXT: [0,5] D===eER . add x0, x27, #1 -# CHECK-NEXT: [0,6] D===eER . strb w1, [x27, #254]! +# CHECK-NEXT: [0,4] .D=eER . strb w1, [x27], #254 +# CHECK-NEXT: [0,5] .D==eER . add x0, x27, #1 +# CHECK-NEXT: [0,6] .D==eER . strb w1, [x27, #254]! # CHECK-NEXT: [0,7] .D===eER. add x0, x27, #1 -# CHECK-NEXT: [0,8] .D===eER. strh w1, [x27], #254 -# CHECK-NEXT: [0,9] .D====eER add x0, x27, #1 +# CHECK-NEXT: [0,8] . D==eER. strh w1, [x27], #254 +# CHECK-NEXT: [0,9] . D===eER add x0, x27, #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -5202,13 +5202,13 @@ add x0, x27, 1 # CHECK-NEXT: 1. 1 2.0 0.0 0.0 add x0, x27, #1 # CHECK-NEXT: 2. 1 2.0 0.0 0.0 str x1, [x27, #254]! # CHECK-NEXT: 3. 1 3.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 4. 1 3.0 0.0 0.0 strb w1, [x27], #254 -# CHECK-NEXT: 5. 1 4.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 6. 1 4.0 0.0 0.0 strb w1, [x27, #254]! +# CHECK-NEXT: 4. 1 2.0 0.0 0.0 strb w1, [x27], #254 +# CHECK-NEXT: 5. 1 3.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 6. 1 3.0 0.0 0.0 strb w1, [x27, #254]! # CHECK-NEXT: 7. 1 4.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 8. 1 4.0 0.0 0.0 strh w1, [x27], #254 -# CHECK-NEXT: 9. 1 5.0 0.0 0.0 add x0, x27, #1 -# CHECK-NEXT: 1 3.2 0.1 0.0 +# CHECK-NEXT: 8. 1 3.0 0.0 0.0 strh w1, [x27], #254 +# CHECK-NEXT: 9. 1 4.0 0.0 0.0 add x0, x27, #1 +# CHECK-NEXT: 1 2.7 0.1 0.0 # CHECK: [89] Code Region - G90 @@ -5217,7 +5217,7 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 104 # CHECK-NEXT: Total uOps: 400 -# CHECK: Dispatch Width: 15 +# CHECK: Dispatch Width: 8 # CHECK-NEXT: uOps Per Cycle: 3.85 # CHECK-NEXT: IPC: 1.92 # CHECK-NEXT: Block RThroughput: 0.5 @@ -5246,7 +5246,7 @@ add x0, x27, 1 # CHECK-NEXT: Total Cycles: 110 # CHECK-NEXT: Total uOps: 600 -# CHECK: Dispatch Width: 15 +# CHECK: Dispatch Width: 8 # CHECK-NEXT: uOps Per Cycle: 5.45 # CHECK-NEXT: IPC: 3.64 # CHECK-NEXT: Block RThroughput: 1.0 @@ -5272,3 +5272,4 @@ add x0, x27, 1 # CHECK-NEXT: 2. 1 5.0 0.0 0.0 ldr x2, [x1], #254 # CHECK-NEXT: 3. 1 2.0 0.0 6.0 add x0, x27, #1 # CHECK-NEXT: 1 2.5 0.3 2.0 + diff --git a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-zero-dependency.s b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-zero-dependency.s index 8b1c8a4e4ca55..3954cbd8c5490 100644 --- a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-zero-dependency.s +++ b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-zero-dependency.s @@ -9,7 +9,7 @@ cmp x0, #4 # CHECK-NEXT: Total Cycles: 54 # CHECK-NEXT: Total uOps: 200 -# CHECK: Dispatch Width: 15 +# CHECK: Dispatch Width: 8 # CHECK-NEXT: uOps Per Cycle: 3.70 # CHECK-NEXT: IPC: 3.70 # CHECK-NEXT: Block RThroughput: 0.5