From dc6667082d2bc00e4702f1a200956e046132fdb4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ga=C3=ABtan=20Bossu?= Date: Mon, 23 Jun 2025 12:45:24 +0000 Subject: [PATCH 1/6] Precommit CostModel tests for 128+ bits extract shuffles --- .../CostModel/AArch64/shuffle-extract.ll | 192 +++++++++++++++++- 1 file changed, 187 insertions(+), 5 deletions(-) diff --git a/llvm/test/Analysis/CostModel/AArch64/shuffle-extract.ll b/llvm/test/Analysis/CostModel/AArch64/shuffle-extract.ll index 867c35ab79446..bf57e1ca7a605 100644 --- a/llvm/test/Analysis/CostModel/AArch64/shuffle-extract.ll +++ b/llvm/test/Analysis/CostModel/AArch64/shuffle-extract.ll @@ -1,10 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py -; RUN: opt < %s -mtriple=aarch64--linux-gnu -passes="print" -cost-kind=all 2>&1 -disable-output | FileCheck %s +; RUN: opt < %s -mtriple=aarch64 -mattr=+sve -passes="print" -cost-kind=all 2>&1 -disable-output | FileCheck %s target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" -define void @extract_half() { -; CHECK-LABEL: 'extract_half' +define void @extract_half_vscale_any() { +; CHECK-LABEL: 'extract_half_vscale_any' ; CHECK-NEXT: Cost Model: Found costs of 0 for: %v2i8_lo = shufflevector <2 x i8> poison, <2 x i8> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2i8_hi = shufflevector <2 x i8> poison, <2 x i8> poison, <1 x i32> ; CHECK-NEXT: Cost Model: Found costs of 0 for: %v4i8_lo = shufflevector <4 x i8> poison, <4 x i8> poison, <2 x i32> @@ -16,6 +16,9 @@ define void @extract_half() { ; CHECK-NEXT: Cost Model: Found costs of 0 for: %v16i8_lo = shufflevector <16 x i8> poison, <16 x i8> poison, <8 x i32> ; CHECK-NEXT: Cost Model: Found costs of 8 for: %v16i8_mi = shufflevector <16 x i8> poison, <16 x i8> poison, <8 x i32> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v16i8_hi = shufflevector <16 x i8> poison, <16 x i8> poison, <8 x i32> +; CHECK-NEXT: Cost Model: Found costs of 0 for: %v32i8_lo = shufflevector <32 x i8> poison, <32 x i8> poison, <16 x i32> +; CHECK-NEXT: Cost Model: Found costs of 16 for: %v32i8_mi = shufflevector <32 x i8> poison, <32 x i8> poison, <16 x i32> +; CHECK-NEXT: Cost Model: Found costs of 16 for: %v32i8_hi = shufflevector <32 x i8> poison, <32 x i8> poison, <16 x i32> ; CHECK-NEXT: Cost Model: Found costs of 0 for: %v2i16_lo = shufflevector <2 x i16> poison, <2 x i16> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2i16_hi = shufflevector <2 x i16> poison, <2 x i16> poison, <1 x i32> ; CHECK-NEXT: Cost Model: Found costs of 0 for: %v4i16_lo = shufflevector <4 x i16> poison, <4 x i16> poison, <2 x i32> @@ -59,6 +62,9 @@ define void @extract_half() { %v16i8_lo = shufflevector <16 x i8> poison, <16 x i8> poison, <8 x i32> %v16i8_mi = shufflevector <16 x i8> poison, <16 x i8> poison, <8 x i32> %v16i8_hi = shufflevector <16 x i8> poison, <16 x i8> poison, <8 x i32> + %v32i8_lo = shufflevector <32 x i8> poison, <32 x i8> poison, <16 x i32> + %v32i8_mi = shufflevector <32 x i8> poison, <32 x i8> poison, <16 x i32> + %v32i8_hi = shufflevector <32 x i8> poison, <32 x i8> poison, <16 x i32> %v2i16_lo = shufflevector <2 x i16> poison, <2 x i16> poison, <1 x i32> %v2i16_hi = shufflevector <2 x i16> poison, <2 x i16> poison, <1 x i32> @@ -96,8 +102,129 @@ define void @extract_half() { ret void } -define void @extract_qtr() { -; CHECK-LABEL: 'extract_qtr' +; Similar as above, but for a 256-bit SVE core. +; Vectors smaller than 256-bit aren't tested again. +define void @extract_half_vscale2() vscale_range(2,2) { +; CHECK-LABEL: 'extract_half_vscale2' +; CHECK-NEXT: Cost Model: Found costs of 0 for: %v32i8_lo = shufflevector <32 x i8> poison, <32 x i8> poison, <16 x i32> +; CHECK-NEXT: Cost Model: Found costs of RThru:64 CodeSize:32 Lat:64 SizeLat:64 for: %v32i8_mi = shufflevector <32 x i8> poison, <32 x i8> poison, <16 x i32> +; CHECK-NEXT: Cost Model: Found costs of RThru:64 CodeSize:32 Lat:64 SizeLat:64 for: %v32i8_hi = shufflevector <32 x i8> poison, <32 x i8> poison, <16 x i32> +; CHECK-NEXT: Cost Model: Found costs of 0 for: %v64i8_lo = shufflevector <64 x i8> poison, <64 x i8> poison, <32 x i32> +; CHECK-NEXT: Cost Model: Found costs of RThru:128 CodeSize:64 Lat:128 SizeLat:128 for: %v64i8_mi = shufflevector <64 x i8> poison, <64 x i8> poison, <32 x i32> +; CHECK-NEXT: Cost Model: Found costs of RThru:128 CodeSize:64 Lat:128 SizeLat:128 for: %v64i8_hi = shufflevector <64 x i8> poison, <64 x i8> poison, <32 x i32> +; CHECK-NEXT: Cost Model: Found costs of 0 for: %v16i16_lo = shufflevector <16 x i16> poison, <16 x i16> poison, <8 x i32> +; CHECK-NEXT: Cost Model: Found costs of RThru:32 CodeSize:16 Lat:32 SizeLat:32 for: %v16i16_mi = shufflevector <16 x i16> poison, <16 x i16> poison, <8 x i32> +; CHECK-NEXT: Cost Model: Found costs of RThru:32 CodeSize:16 Lat:32 SizeLat:32 for: %v16i16_hi = shufflevector <16 x i16> poison, <16 x i16> poison, <8 x i32> +; CHECK-NEXT: Cost Model: Found costs of 0 for: %v32i16_lo = shufflevector <32 x i16> poison, <32 x i16> poison, <16 x i32> +; CHECK-NEXT: Cost Model: Found costs of RThru:64 CodeSize:32 Lat:64 SizeLat:64 for: %v32i16_mi = shufflevector <32 x i16> poison, <32 x i16> poison, <16 x i32> +; CHECK-NEXT: Cost Model: Found costs of RThru:64 CodeSize:32 Lat:64 SizeLat:64 for: %v32i16_hi = shufflevector <32 x i16> poison, <32 x i16> poison, <16 x i32> +; CHECK-NEXT: Cost Model: Found costs of 0 for: %v8i32_lo = shufflevector <8 x i32> poison, <8 x i32> poison, <4 x i32> +; CHECK-NEXT: Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for: %v8i32_mi = shufflevector <8 x i32> poison, <8 x i32> poison, <4 x i32> +; CHECK-NEXT: Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for: %v8i32_hi = shufflevector <8 x i32> poison, <8 x i32> poison, <4 x i32> +; CHECK-NEXT: Cost Model: Found costs of 0 for: %v16i32_lo = shufflevector <16 x i32> poison, <16 x i32> poison, <8 x i32> +; CHECK-NEXT: Cost Model: Found costs of RThru:32 CodeSize:16 Lat:32 SizeLat:32 for: %v16i32_mi = shufflevector <16 x i32> poison, <16 x i32> poison, <8 x i32> +; CHECK-NEXT: Cost Model: Found costs of RThru:32 CodeSize:16 Lat:32 SizeLat:32 for: %v16i32_hi = shufflevector <16 x i32> poison, <16 x i32> poison, <8 x i32> +; CHECK-NEXT: Cost Model: Found costs of 0 for: %v4i64_lo = shufflevector <4 x i64> poison, <4 x i64> poison, <2 x i32> +; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %v4i64_mi = shufflevector <4 x i64> poison, <4 x i64> poison, <2 x i32> +; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %v4i64_hi = shufflevector <4 x i64> poison, <4 x i64> poison, <2 x i32> +; CHECK-NEXT: Cost Model: Found costs of 0 for: %v8i64_lo = shufflevector <8 x i64> poison, <8 x i64> poison, <4 x i32> +; CHECK-NEXT: Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for: %v8i64_mi = shufflevector <8 x i64> poison, <8 x i64> poison, <4 x i32> +; CHECK-NEXT: Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for: %v8i64_hi = shufflevector <8 x i64> poison, <8 x i64> poison, <4 x i32> +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void +; + %v32i8_lo = shufflevector <32 x i8> poison, <32 x i8> poison, <16 x i32> + %v32i8_mi = shufflevector <32 x i8> poison, <32 x i8> poison, <16 x i32> + %v32i8_hi = shufflevector <32 x i8> poison, <32 x i8> poison, <16 x i32> + %v64i8_lo = shufflevector <64 x i8> poison, <64 x i8> poison, <32 x i32> + %v64i8_mi = shufflevector <64 x i8> poison, <64 x i8> poison, <32 x i32> + %v64i8_hi = shufflevector <64 x i8> poison, <64 x i8> poison, <32 x i32> + + %v16i16_lo = shufflevector <16 x i16> poison, <16 x i16> poison, <8 x i32> + %v16i16_mi = shufflevector <16 x i16> poison, <16 x i16> poison, <8 x i32> + %v16i16_hi = shufflevector <16 x i16> poison, <16 x i16> poison, <8 x i32> + %v32i16_lo = shufflevector <32 x i16> poison, <32 x i16> poison, <16 x i32> + %v32i16_mi = shufflevector <32 x i16> poison, <32 x i16> poison, <16 x i32> + %v32i16_hi = shufflevector <32 x i16> poison, <32 x i16> poison, <16 x i32> + + %v8i32_lo = shufflevector <8 x i32> poison, <8 x i32> poison, <4 x i32> + %v8i32_mi = shufflevector <8 x i32> poison, <8 x i32> poison, <4 x i32> + %v8i32_hi = shufflevector <8 x i32> poison, <8 x i32> poison, <4 x i32> + %v16i32_lo = shufflevector <16 x i32> poison, <16 x i32> poison, <8 x i32> + %v16i32_mi = shufflevector <16 x i32> poison, <16 x i32> poison, <8 x i32> + %v16i32_hi = shufflevector <16 x i32> poison, <16 x i32> poison, <8 x i32> + + %v4i64_lo = shufflevector <4 x i64> poison, <4 x i64> poison, <2 x i32> + %v4i64_mi = shufflevector <4 x i64> poison, <4 x i64> poison, <2 x i32> + %v4i64_hi = shufflevector <4 x i64> poison, <4 x i64> poison, <2 x i32> + %v8i64_lo = shufflevector <8 x i64> poison, <8 x i64> poison, <4 x i32> + %v8i64_mi = shufflevector <8 x i64> poison, <8 x i64> poison, <4 x i32> + %v8i64_hi = shufflevector <8 x i64> poison, <8 x i64> poison, <4 x i32> + + ret void +} + +; Similar as above, but for a 512-bit SVE core. +define void @extract_half_vscale4() vscale_range(4,4) { +; CHECK-LABEL: 'extract_half_vscale4' +; CHECK-NEXT: Cost Model: Found costs of 0 for: %v32i8_lo = shufflevector <32 x i8> poison, <32 x i8> poison, <16 x i32> +; CHECK-NEXT: Cost Model: Found costs of RThru:64 CodeSize:32 Lat:64 SizeLat:64 for: %v32i8_mi = shufflevector <32 x i8> poison, <32 x i8> poison, <16 x i32> +; CHECK-NEXT: Cost Model: Found costs of RThru:64 CodeSize:32 Lat:64 SizeLat:64 for: %v32i8_hi = shufflevector <32 x i8> poison, <32 x i8> poison, <16 x i32> +; CHECK-NEXT: Cost Model: Found costs of 0 for: %v64i8_lo = shufflevector <64 x i8> poison, <64 x i8> poison, <32 x i32> +; CHECK-NEXT: Cost Model: Found costs of RThru:128 CodeSize:64 Lat:128 SizeLat:128 for: %v64i8_mi = shufflevector <64 x i8> poison, <64 x i8> poison, <32 x i32> +; CHECK-NEXT: Cost Model: Found costs of RThru:128 CodeSize:64 Lat:128 SizeLat:128 for: %v64i8_hi = shufflevector <64 x i8> poison, <64 x i8> poison, <32 x i32> +; CHECK-NEXT: Cost Model: Found costs of 0 for: %v16i16_lo = shufflevector <16 x i16> poison, <16 x i16> poison, <8 x i32> +; CHECK-NEXT: Cost Model: Found costs of RThru:32 CodeSize:16 Lat:32 SizeLat:32 for: %v16i16_mi = shufflevector <16 x i16> poison, <16 x i16> poison, <8 x i32> +; CHECK-NEXT: Cost Model: Found costs of RThru:32 CodeSize:16 Lat:32 SizeLat:32 for: %v16i16_hi = shufflevector <16 x i16> poison, <16 x i16> poison, <8 x i32> +; CHECK-NEXT: Cost Model: Found costs of 0 for: %v32i16_lo = shufflevector <32 x i16> poison, <32 x i16> poison, <16 x i32> +; CHECK-NEXT: Cost Model: Found costs of RThru:64 CodeSize:32 Lat:64 SizeLat:64 for: %v32i16_mi = shufflevector <32 x i16> poison, <32 x i16> poison, <16 x i32> +; CHECK-NEXT: Cost Model: Found costs of RThru:64 CodeSize:32 Lat:64 SizeLat:64 for: %v32i16_hi = shufflevector <32 x i16> poison, <32 x i16> poison, <16 x i32> +; CHECK-NEXT: Cost Model: Found costs of 0 for: %v8i32_lo = shufflevector <8 x i32> poison, <8 x i32> poison, <4 x i32> +; CHECK-NEXT: Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for: %v8i32_mi = shufflevector <8 x i32> poison, <8 x i32> poison, <4 x i32> +; CHECK-NEXT: Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for: %v8i32_hi = shufflevector <8 x i32> poison, <8 x i32> poison, <4 x i32> +; CHECK-NEXT: Cost Model: Found costs of 0 for: %v16i32_lo = shufflevector <16 x i32> poison, <16 x i32> poison, <8 x i32> +; CHECK-NEXT: Cost Model: Found costs of RThru:32 CodeSize:16 Lat:32 SizeLat:32 for: %v16i32_mi = shufflevector <16 x i32> poison, <16 x i32> poison, <8 x i32> +; CHECK-NEXT: Cost Model: Found costs of RThru:32 CodeSize:16 Lat:32 SizeLat:32 for: %v16i32_hi = shufflevector <16 x i32> poison, <16 x i32> poison, <8 x i32> +; CHECK-NEXT: Cost Model: Found costs of 0 for: %v4i64_lo = shufflevector <4 x i64> poison, <4 x i64> poison, <2 x i32> +; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %v4i64_mi = shufflevector <4 x i64> poison, <4 x i64> poison, <2 x i32> +; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %v4i64_hi = shufflevector <4 x i64> poison, <4 x i64> poison, <2 x i32> +; CHECK-NEXT: Cost Model: Found costs of 0 for: %v8i64_lo = shufflevector <8 x i64> poison, <8 x i64> poison, <4 x i32> +; CHECK-NEXT: Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for: %v8i64_mi = shufflevector <8 x i64> poison, <8 x i64> poison, <4 x i32> +; CHECK-NEXT: Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for: %v8i64_hi = shufflevector <8 x i64> poison, <8 x i64> poison, <4 x i32> +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void +; + %v32i8_lo = shufflevector <32 x i8> poison, <32 x i8> poison, <16 x i32> + %v32i8_mi = shufflevector <32 x i8> poison, <32 x i8> poison, <16 x i32> + %v32i8_hi = shufflevector <32 x i8> poison, <32 x i8> poison, <16 x i32> + %v64i8_lo = shufflevector <64 x i8> poison, <64 x i8> poison, <32 x i32> + %v64i8_mi = shufflevector <64 x i8> poison, <64 x i8> poison, <32 x i32> + %v64i8_hi = shufflevector <64 x i8> poison, <64 x i8> poison, <32 x i32> + + %v16i16_lo = shufflevector <16 x i16> poison, <16 x i16> poison, <8 x i32> + %v16i16_mi = shufflevector <16 x i16> poison, <16 x i16> poison, <8 x i32> + %v16i16_hi = shufflevector <16 x i16> poison, <16 x i16> poison, <8 x i32> + %v32i16_lo = shufflevector <32 x i16> poison, <32 x i16> poison, <16 x i32> + %v32i16_mi = shufflevector <32 x i16> poison, <32 x i16> poison, <16 x i32> + %v32i16_hi = shufflevector <32 x i16> poison, <32 x i16> poison, <16 x i32> + + %v8i32_lo = shufflevector <8 x i32> poison, <8 x i32> poison, <4 x i32> + %v8i32_mi = shufflevector <8 x i32> poison, <8 x i32> poison, <4 x i32> + %v8i32_hi = shufflevector <8 x i32> poison, <8 x i32> poison, <4 x i32> + %v16i32_lo = shufflevector <16 x i32> poison, <16 x i32> poison, <8 x i32> + %v16i32_mi = shufflevector <16 x i32> poison, <16 x i32> poison, <8 x i32> + %v16i32_hi = shufflevector <16 x i32> poison, <16 x i32> poison, <8 x i32> + + %v4i64_lo = shufflevector <4 x i64> poison, <4 x i64> poison, <2 x i32> + %v4i64_mi = shufflevector <4 x i64> poison, <4 x i64> poison, <2 x i32> + %v4i64_hi = shufflevector <4 x i64> poison, <4 x i64> poison, <2 x i32> + %v8i64_lo = shufflevector <8 x i64> poison, <8 x i64> poison, <4 x i32> + %v8i64_mi = shufflevector <8 x i64> poison, <8 x i64> poison, <4 x i32> + %v8i64_hi = shufflevector <8 x i64> poison, <8 x i64> poison, <4 x i32> + + ret void +} + +define void @extract_qtr_vscale_any() { +; CHECK-LABEL: 'extract_qtr_vscale_any' ; CHECK-NEXT: Cost Model: Found costs of 0 for: %v4i8_lo = shufflevector <4 x i8> poison, <4 x i8> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of 3 for: %v4i8_mi = shufflevector <4 x i8> poison, <4 x i8> poison, <1 x i32> ; CHECK-NEXT: Cost Model: Found costs of 3 for: %v4i8_hi = shufflevector <4 x i8> poison, <4 x i8> poison, <1 x i32> @@ -107,6 +234,9 @@ define void @extract_qtr() { ; CHECK-NEXT: Cost Model: Found costs of 0 for: %v16i8_lo = shufflevector <16 x i8> poison, <16 x i8> poison, <4 x i32> ; CHECK-NEXT: Cost Model: Found costs of 8 for: %v16i8_mi = shufflevector <16 x i8> poison, <16 x i8> poison, <4 x i32> ; CHECK-NEXT: Cost Model: Found costs of 8 for: %v16i8_hi = shufflevector <16 x i8> poison, <16 x i8> poison, <4 x i32> +; CHECK-NEXT: Cost Model: Found costs of 0 for: %v32i8_lo = shufflevector <32 x i8> poison, <32 x i8> poison, <8 x i32> +; CHECK-NEXT: Cost Model: Found costs of 16 for: %v32i8_mi = shufflevector <32 x i8> poison, <32 x i8> poison, <8 x i32> +; CHECK-NEXT: Cost Model: Found costs of 16 for: %v32i8_hi = shufflevector <32 x i8> poison, <32 x i8> poison, <8 x i32> ; CHECK-NEXT: Cost Model: Found costs of 0 for: %v4i16_lo = shufflevector <4 x i16> poison, <4 x i16> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of 3 for: %v4i16_mi = shufflevector <4 x i16> poison, <4 x i16> poison, <1 x i32> ; CHECK-NEXT: Cost Model: Found costs of 3 for: %v4i16_hi = shufflevector <4 x i16> poison, <4 x i16> poison, <1 x i32> @@ -142,6 +272,9 @@ define void @extract_qtr() { %v16i8_lo = shufflevector <16 x i8> poison, <16 x i8> poison, <4 x i32> %v16i8_mi = shufflevector <16 x i8> poison, <16 x i8> poison, <4 x i32> %v16i8_hi = shufflevector <16 x i8> poison, <16 x i8> poison, <4 x i32> + %v32i8_lo = shufflevector <32 x i8> poison, <32 x i8> poison, <8 x i32> + %v32i8_mi = shufflevector <32 x i8> poison, <32 x i8> poison, <8 x i32> + %v32i8_hi = shufflevector <32 x i8> poison, <32 x i8> poison, <8 x i32> %v4i16_lo = shufflevector <4 x i16> poison, <4 x i16> poison, <1 x i32> %v4i16_mi = shufflevector <4 x i16> poison, <4 x i16> poison, <1 x i32> @@ -172,3 +305,52 @@ define void @extract_qtr() { ret void } + +; Similar as above, but for a 256-bit SVE core. +; Vectors smaller than 256-bit aren't tested again. +define void @extract_qtr_vscale2() vscale_range(2,2) { +; CHECK-LABEL: 'extract_qtr_vscale2' +; CHECK-NEXT: Cost Model: Found costs of 0 for: %v32i8_lo = shufflevector <32 x i8> poison, <32 x i8> poison, <8 x i32> +; CHECK-NEXT: Cost Model: Found costs of RThru:32 CodeSize:16 Lat:32 SizeLat:32 for: %v32i8_mi = shufflevector <32 x i8> poison, <32 x i8> poison, <8 x i32> +; CHECK-NEXT: Cost Model: Found costs of RThru:32 CodeSize:16 Lat:32 SizeLat:32 for: %v32i8_hi = shufflevector <32 x i8> poison, <32 x i8> poison, <8 x i32> +; CHECK-NEXT: Cost Model: Found costs of 0 for: %v16i16_lo = shufflevector <16 x i16> poison, <16 x i16> poison, <4 x i32> +; CHECK-NEXT: Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for: %v16i16_mi = shufflevector <16 x i16> poison, <16 x i16> poison, <4 x i32> +; CHECK-NEXT: Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for: %v16i16_hi = shufflevector <16 x i16> poison, <16 x i16> poison, <4 x i32> +; CHECK-NEXT: Cost Model: Found costs of 0 for: %v8i32_lo = shufflevector <8 x i32> poison, <8 x i32> poison, <2 x i32> +; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %v8i32_mi = shufflevector <8 x i32> poison, <8 x i32> poison, <2 x i32> +; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %v8i32_hi = shufflevector <8 x i32> poison, <8 x i32> poison, <2 x i32> +; CHECK-NEXT: Cost Model: Found costs of 0 for: %v16i32_lo = shufflevector <16 x i32> poison, <16 x i32> poison, <4 x i32> +; CHECK-NEXT: Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for: %v16i32_mi = shufflevector <16 x i32> poison, <16 x i32> poison, <4 x i32> +; CHECK-NEXT: Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for: %v16i32_hi = shufflevector <16 x i32> poison, <16 x i32> poison, <4 x i32> +; CHECK-NEXT: Cost Model: Found costs of 0 for: %v4i64_lo = shufflevector <4 x i64> poison, <4 x i64> poison, <1 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %v4i64_mi = shufflevector <4 x i64> poison, <4 x i64> poison, <1 x i32> +; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %v4i64_hi = shufflevector <4 x i64> poison, <4 x i64> poison, <1 x i32> +; CHECK-NEXT: Cost Model: Found costs of 0 for: %v8i64_lo = shufflevector <8 x i64> poison, <8 x i64> poison, <2 x i32> +; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %v8i64_mi = shufflevector <8 x i64> poison, <8 x i64> poison, <2 x i32> +; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %v8i64_hi = shufflevector <8 x i64> poison, <8 x i64> poison, <2 x i32> +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void +; + %v32i8_lo = shufflevector <32 x i8> poison, <32 x i8> poison, <8 x i32> + %v32i8_mi = shufflevector <32 x i8> poison, <32 x i8> poison, <8 x i32> + %v32i8_hi = shufflevector <32 x i8> poison, <32 x i8> poison, <8 x i32> + + %v16i16_lo = shufflevector <16 x i16> poison, <16 x i16> poison, <4 x i32> + %v16i16_mi = shufflevector <16 x i16> poison, <16 x i16> poison, <4 x i32> + %v16i16_hi = shufflevector <16 x i16> poison, <16 x i16> poison, <4 x i32> + + %v8i32_lo = shufflevector <8 x i32> poison, <8 x i32> poison, <2 x i32> + %v8i32_mi = shufflevector <8 x i32> poison, <8 x i32> poison, <2 x i32> + %v8i32_hi = shufflevector <8 x i32> poison, <8 x i32> poison, <2 x i32> + %v16i32_lo = shufflevector <16 x i32> poison, <16 x i32> poison, <4 x i32> + %v16i32_mi = shufflevector <16 x i32> poison, <16 x i32> poison, <4 x i32> + %v16i32_hi = shufflevector <16 x i32> poison, <16 x i32> poison, <4 x i32> + + %v4i64_lo = shufflevector <4 x i64> poison, <4 x i64> poison, <1 x i32> + %v4i64_mi = shufflevector <4 x i64> poison, <4 x i64> poison, <1 x i32> + %v4i64_hi = shufflevector <4 x i64> poison, <4 x i64> poison, <1 x i32> + %v8i64_lo = shufflevector <8 x i64> poison, <8 x i64> poison, <2 x i32> + %v8i64_mi = shufflevector <8 x i64> poison, <8 x i64> poison, <2 x i32> + %v8i64_hi = shufflevector <8 x i64> poison, <8 x i64> poison, <2 x i32> + + ret void +} From b0203919ab84ef54b80c1350ca00110bb0e7cf1b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ga=C3=ABtan=20Bossu?= Date: Tue, 26 Aug 2025 09:13:52 +0000 Subject: [PATCH 2/6] [AArch64] Update cost model for extracting halves from 128+ bit vectors Previously, only 128-bit "NEON" vectors were given sensible costs. Cores with vscale>1 can use SVE's EXT instruction to perform a fixed-length subvector extract. --- .../AArch64/AArch64TargetTransformInfo.cpp | 10 ++++--- .../CostModel/AArch64/shuffle-extract.ll | 28 +++++++++---------- 2 files changed, 20 insertions(+), 18 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index 490f6391c15a0..120f29a1aa22c 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -5750,11 +5750,13 @@ AArch64TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, Kind = improveShuffleKindFromMask(Kind, Mask, SrcTy, Index, SubTp); bool IsExtractSubvector = Kind == TTI::SK_ExtractSubvector; - // A subvector extract can be implemented with an ext (or trivial extract, if - // from lane 0). This currently only handles low or high extracts to prevent - // SLP vectorizer regressions. + // A subvector extract can be implemented with a NEON/SVE ext (or trivial + // extract, if from lane 0). This currently only handles low or high extracts + // to prevent SLP vectorizer regressions. + // Note that SVE's ext instruciton is destructive, but it can be fused with + // a movprfx to act like a constructive instruction. if (IsExtractSubvector && LT.second.isFixedLengthVector()) { - if (LT.second.is128BitVector() && + if (LT.second.getFixedSizeInBits() >= AArch64::SVEBitsPerBlock && cast(SubTp)->getNumElements() == LT.second.getVectorNumElements() / 2) { if (Index == 0) diff --git a/llvm/test/Analysis/CostModel/AArch64/shuffle-extract.ll b/llvm/test/Analysis/CostModel/AArch64/shuffle-extract.ll index bf57e1ca7a605..8199f26b9c11c 100644 --- a/llvm/test/Analysis/CostModel/AArch64/shuffle-extract.ll +++ b/llvm/test/Analysis/CostModel/AArch64/shuffle-extract.ll @@ -108,25 +108,25 @@ define void @extract_half_vscale2() vscale_range(2,2) { ; CHECK-LABEL: 'extract_half_vscale2' ; CHECK-NEXT: Cost Model: Found costs of 0 for: %v32i8_lo = shufflevector <32 x i8> poison, <32 x i8> poison, <16 x i32> ; CHECK-NEXT: Cost Model: Found costs of RThru:64 CodeSize:32 Lat:64 SizeLat:64 for: %v32i8_mi = shufflevector <32 x i8> poison, <32 x i8> poison, <16 x i32> -; CHECK-NEXT: Cost Model: Found costs of RThru:64 CodeSize:32 Lat:64 SizeLat:64 for: %v32i8_hi = shufflevector <32 x i8> poison, <32 x i8> poison, <16 x i32> +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v32i8_hi = shufflevector <32 x i8> poison, <32 x i8> poison, <16 x i32> ; CHECK-NEXT: Cost Model: Found costs of 0 for: %v64i8_lo = shufflevector <64 x i8> poison, <64 x i8> poison, <32 x i32> ; CHECK-NEXT: Cost Model: Found costs of RThru:128 CodeSize:64 Lat:128 SizeLat:128 for: %v64i8_mi = shufflevector <64 x i8> poison, <64 x i8> poison, <32 x i32> ; CHECK-NEXT: Cost Model: Found costs of RThru:128 CodeSize:64 Lat:128 SizeLat:128 for: %v64i8_hi = shufflevector <64 x i8> poison, <64 x i8> poison, <32 x i32> ; CHECK-NEXT: Cost Model: Found costs of 0 for: %v16i16_lo = shufflevector <16 x i16> poison, <16 x i16> poison, <8 x i32> ; CHECK-NEXT: Cost Model: Found costs of RThru:32 CodeSize:16 Lat:32 SizeLat:32 for: %v16i16_mi = shufflevector <16 x i16> poison, <16 x i16> poison, <8 x i32> -; CHECK-NEXT: Cost Model: Found costs of RThru:32 CodeSize:16 Lat:32 SizeLat:32 for: %v16i16_hi = shufflevector <16 x i16> poison, <16 x i16> poison, <8 x i32> +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v16i16_hi = shufflevector <16 x i16> poison, <16 x i16> poison, <8 x i32> ; CHECK-NEXT: Cost Model: Found costs of 0 for: %v32i16_lo = shufflevector <32 x i16> poison, <32 x i16> poison, <16 x i32> ; CHECK-NEXT: Cost Model: Found costs of RThru:64 CodeSize:32 Lat:64 SizeLat:64 for: %v32i16_mi = shufflevector <32 x i16> poison, <32 x i16> poison, <16 x i32> ; CHECK-NEXT: Cost Model: Found costs of RThru:64 CodeSize:32 Lat:64 SizeLat:64 for: %v32i16_hi = shufflevector <32 x i16> poison, <32 x i16> poison, <16 x i32> ; CHECK-NEXT: Cost Model: Found costs of 0 for: %v8i32_lo = shufflevector <8 x i32> poison, <8 x i32> poison, <4 x i32> ; CHECK-NEXT: Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for: %v8i32_mi = shufflevector <8 x i32> poison, <8 x i32> poison, <4 x i32> -; CHECK-NEXT: Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for: %v8i32_hi = shufflevector <8 x i32> poison, <8 x i32> poison, <4 x i32> +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v8i32_hi = shufflevector <8 x i32> poison, <8 x i32> poison, <4 x i32> ; CHECK-NEXT: Cost Model: Found costs of 0 for: %v16i32_lo = shufflevector <16 x i32> poison, <16 x i32> poison, <8 x i32> ; CHECK-NEXT: Cost Model: Found costs of RThru:32 CodeSize:16 Lat:32 SizeLat:32 for: %v16i32_mi = shufflevector <16 x i32> poison, <16 x i32> poison, <8 x i32> ; CHECK-NEXT: Cost Model: Found costs of RThru:32 CodeSize:16 Lat:32 SizeLat:32 for: %v16i32_hi = shufflevector <16 x i32> poison, <16 x i32> poison, <8 x i32> ; CHECK-NEXT: Cost Model: Found costs of 0 for: %v4i64_lo = shufflevector <4 x i64> poison, <4 x i64> poison, <2 x i32> ; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %v4i64_mi = shufflevector <4 x i64> poison, <4 x i64> poison, <2 x i32> -; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %v4i64_hi = shufflevector <4 x i64> poison, <4 x i64> poison, <2 x i32> +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4i64_hi = shufflevector <4 x i64> poison, <4 x i64> poison, <2 x i32> ; CHECK-NEXT: Cost Model: Found costs of 0 for: %v8i64_lo = shufflevector <8 x i64> poison, <8 x i64> poison, <4 x i32> ; CHECK-NEXT: Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for: %v8i64_mi = shufflevector <8 x i64> poison, <8 x i64> poison, <4 x i32> ; CHECK-NEXT: Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for: %v8i64_hi = shufflevector <8 x i64> poison, <8 x i64> poison, <4 x i32> @@ -168,28 +168,28 @@ define void @extract_half_vscale4() vscale_range(4,4) { ; CHECK-LABEL: 'extract_half_vscale4' ; CHECK-NEXT: Cost Model: Found costs of 0 for: %v32i8_lo = shufflevector <32 x i8> poison, <32 x i8> poison, <16 x i32> ; CHECK-NEXT: Cost Model: Found costs of RThru:64 CodeSize:32 Lat:64 SizeLat:64 for: %v32i8_mi = shufflevector <32 x i8> poison, <32 x i8> poison, <16 x i32> -; CHECK-NEXT: Cost Model: Found costs of RThru:64 CodeSize:32 Lat:64 SizeLat:64 for: %v32i8_hi = shufflevector <32 x i8> poison, <32 x i8> poison, <16 x i32> +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v32i8_hi = shufflevector <32 x i8> poison, <32 x i8> poison, <16 x i32> ; CHECK-NEXT: Cost Model: Found costs of 0 for: %v64i8_lo = shufflevector <64 x i8> poison, <64 x i8> poison, <32 x i32> ; CHECK-NEXT: Cost Model: Found costs of RThru:128 CodeSize:64 Lat:128 SizeLat:128 for: %v64i8_mi = shufflevector <64 x i8> poison, <64 x i8> poison, <32 x i32> -; CHECK-NEXT: Cost Model: Found costs of RThru:128 CodeSize:64 Lat:128 SizeLat:128 for: %v64i8_hi = shufflevector <64 x i8> poison, <64 x i8> poison, <32 x i32> +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v64i8_hi = shufflevector <64 x i8> poison, <64 x i8> poison, <32 x i32> ; CHECK-NEXT: Cost Model: Found costs of 0 for: %v16i16_lo = shufflevector <16 x i16> poison, <16 x i16> poison, <8 x i32> ; CHECK-NEXT: Cost Model: Found costs of RThru:32 CodeSize:16 Lat:32 SizeLat:32 for: %v16i16_mi = shufflevector <16 x i16> poison, <16 x i16> poison, <8 x i32> -; CHECK-NEXT: Cost Model: Found costs of RThru:32 CodeSize:16 Lat:32 SizeLat:32 for: %v16i16_hi = shufflevector <16 x i16> poison, <16 x i16> poison, <8 x i32> +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v16i16_hi = shufflevector <16 x i16> poison, <16 x i16> poison, <8 x i32> ; CHECK-NEXT: Cost Model: Found costs of 0 for: %v32i16_lo = shufflevector <32 x i16> poison, <32 x i16> poison, <16 x i32> ; CHECK-NEXT: Cost Model: Found costs of RThru:64 CodeSize:32 Lat:64 SizeLat:64 for: %v32i16_mi = shufflevector <32 x i16> poison, <32 x i16> poison, <16 x i32> -; CHECK-NEXT: Cost Model: Found costs of RThru:64 CodeSize:32 Lat:64 SizeLat:64 for: %v32i16_hi = shufflevector <32 x i16> poison, <32 x i16> poison, <16 x i32> +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v32i16_hi = shufflevector <32 x i16> poison, <32 x i16> poison, <16 x i32> ; CHECK-NEXT: Cost Model: Found costs of 0 for: %v8i32_lo = shufflevector <8 x i32> poison, <8 x i32> poison, <4 x i32> ; CHECK-NEXT: Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for: %v8i32_mi = shufflevector <8 x i32> poison, <8 x i32> poison, <4 x i32> -; CHECK-NEXT: Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for: %v8i32_hi = shufflevector <8 x i32> poison, <8 x i32> poison, <4 x i32> +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v8i32_hi = shufflevector <8 x i32> poison, <8 x i32> poison, <4 x i32> ; CHECK-NEXT: Cost Model: Found costs of 0 for: %v16i32_lo = shufflevector <16 x i32> poison, <16 x i32> poison, <8 x i32> ; CHECK-NEXT: Cost Model: Found costs of RThru:32 CodeSize:16 Lat:32 SizeLat:32 for: %v16i32_mi = shufflevector <16 x i32> poison, <16 x i32> poison, <8 x i32> -; CHECK-NEXT: Cost Model: Found costs of RThru:32 CodeSize:16 Lat:32 SizeLat:32 for: %v16i32_hi = shufflevector <16 x i32> poison, <16 x i32> poison, <8 x i32> +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v16i32_hi = shufflevector <16 x i32> poison, <16 x i32> poison, <8 x i32> ; CHECK-NEXT: Cost Model: Found costs of 0 for: %v4i64_lo = shufflevector <4 x i64> poison, <4 x i64> poison, <2 x i32> ; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %v4i64_mi = shufflevector <4 x i64> poison, <4 x i64> poison, <2 x i32> -; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %v4i64_hi = shufflevector <4 x i64> poison, <4 x i64> poison, <2 x i32> +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4i64_hi = shufflevector <4 x i64> poison, <4 x i64> poison, <2 x i32> ; CHECK-NEXT: Cost Model: Found costs of 0 for: %v8i64_lo = shufflevector <8 x i64> poison, <8 x i64> poison, <4 x i32> ; CHECK-NEXT: Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for: %v8i64_mi = shufflevector <8 x i64> poison, <8 x i64> poison, <4 x i32> -; CHECK-NEXT: Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for: %v8i64_hi = shufflevector <8 x i64> poison, <8 x i64> poison, <4 x i32> +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v8i64_hi = shufflevector <8 x i64> poison, <8 x i64> poison, <4 x i32> ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %v32i8_lo = shufflevector <32 x i8> poison, <32 x i8> poison, <16 x i32> @@ -320,13 +320,13 @@ define void @extract_qtr_vscale2() vscale_range(2,2) { ; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %v8i32_mi = shufflevector <8 x i32> poison, <8 x i32> poison, <2 x i32> ; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %v8i32_hi = shufflevector <8 x i32> poison, <8 x i32> poison, <2 x i32> ; CHECK-NEXT: Cost Model: Found costs of 0 for: %v16i32_lo = shufflevector <16 x i32> poison, <16 x i32> poison, <4 x i32> -; CHECK-NEXT: Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for: %v16i32_mi = shufflevector <16 x i32> poison, <16 x i32> poison, <4 x i32> +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v16i32_mi = shufflevector <16 x i32> poison, <16 x i32> poison, <4 x i32> ; CHECK-NEXT: Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for: %v16i32_hi = shufflevector <16 x i32> poison, <16 x i32> poison, <4 x i32> ; CHECK-NEXT: Cost Model: Found costs of 0 for: %v4i64_lo = shufflevector <4 x i64> poison, <4 x i64> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %v4i64_mi = shufflevector <4 x i64> poison, <4 x i64> poison, <1 x i32> ; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %v4i64_hi = shufflevector <4 x i64> poison, <4 x i64> poison, <1 x i32> ; CHECK-NEXT: Cost Model: Found costs of 0 for: %v8i64_lo = shufflevector <8 x i64> poison, <8 x i64> poison, <2 x i32> -; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %v8i64_mi = shufflevector <8 x i64> poison, <8 x i64> poison, <2 x i32> +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v8i64_mi = shufflevector <8 x i64> poison, <8 x i64> poison, <2 x i32> ; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %v8i64_hi = shufflevector <8 x i64> poison, <8 x i64> poison, <2 x i32> ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; From 92a30831667f2dd8d529bf36ad32273148225e7b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ga=C3=ABtan=20Bossu?= Date: Thu, 28 Aug 2025 08:19:29 +0000 Subject: [PATCH 3/6] Replace SVEBitsPerBlock with plain 128 --- llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index 120f29a1aa22c..0df496735490c 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -5751,12 +5751,13 @@ AArch64TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, Kind = improveShuffleKindFromMask(Kind, Mask, SrcTy, Index, SubTp); bool IsExtractSubvector = Kind == TTI::SK_ExtractSubvector; // A subvector extract can be implemented with a NEON/SVE ext (or trivial - // extract, if from lane 0). This currently only handles low or high extracts - // to prevent SLP vectorizer regressions. + // extract, if from lane 0) for 128-bit NEON vectors or legal SVE vectors. + // This currently only handles low or high extracts to prevent SLP vectorizer + // regressions. // Note that SVE's ext instruciton is destructive, but it can be fused with // a movprfx to act like a constructive instruction. if (IsExtractSubvector && LT.second.isFixedLengthVector()) { - if (LT.second.getFixedSizeInBits() >= AArch64::SVEBitsPerBlock && + if (LT.second.getFixedSizeInBits() >= 128 && cast(SubTp)->getNumElements() == LT.second.getVectorNumElements() / 2) { if (Index == 0) From 8d57a656271ea982687c75a410bada16c61231ab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ga=C3=ABtan=20Bossu?= Date: Mon, 1 Sep 2025 13:42:42 +0000 Subject: [PATCH 4/6] Restore NEON shuffle-extract.ll test I'll create a new one for VLS SVE. --- .../CostModel/AArch64/shuffle-extract.ll | 192 +----------------- 1 file changed, 5 insertions(+), 187 deletions(-) diff --git a/llvm/test/Analysis/CostModel/AArch64/shuffle-extract.ll b/llvm/test/Analysis/CostModel/AArch64/shuffle-extract.ll index 8199f26b9c11c..867c35ab79446 100644 --- a/llvm/test/Analysis/CostModel/AArch64/shuffle-extract.ll +++ b/llvm/test/Analysis/CostModel/AArch64/shuffle-extract.ll @@ -1,10 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py -; RUN: opt < %s -mtriple=aarch64 -mattr=+sve -passes="print" -cost-kind=all 2>&1 -disable-output | FileCheck %s +; RUN: opt < %s -mtriple=aarch64--linux-gnu -passes="print" -cost-kind=all 2>&1 -disable-output | FileCheck %s target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" -define void @extract_half_vscale_any() { -; CHECK-LABEL: 'extract_half_vscale_any' +define void @extract_half() { +; CHECK-LABEL: 'extract_half' ; CHECK-NEXT: Cost Model: Found costs of 0 for: %v2i8_lo = shufflevector <2 x i8> poison, <2 x i8> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2i8_hi = shufflevector <2 x i8> poison, <2 x i8> poison, <1 x i32> ; CHECK-NEXT: Cost Model: Found costs of 0 for: %v4i8_lo = shufflevector <4 x i8> poison, <4 x i8> poison, <2 x i32> @@ -16,9 +16,6 @@ define void @extract_half_vscale_any() { ; CHECK-NEXT: Cost Model: Found costs of 0 for: %v16i8_lo = shufflevector <16 x i8> poison, <16 x i8> poison, <8 x i32> ; CHECK-NEXT: Cost Model: Found costs of 8 for: %v16i8_mi = shufflevector <16 x i8> poison, <16 x i8> poison, <8 x i32> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v16i8_hi = shufflevector <16 x i8> poison, <16 x i8> poison, <8 x i32> -; CHECK-NEXT: Cost Model: Found costs of 0 for: %v32i8_lo = shufflevector <32 x i8> poison, <32 x i8> poison, <16 x i32> -; CHECK-NEXT: Cost Model: Found costs of 16 for: %v32i8_mi = shufflevector <32 x i8> poison, <32 x i8> poison, <16 x i32> -; CHECK-NEXT: Cost Model: Found costs of 16 for: %v32i8_hi = shufflevector <32 x i8> poison, <32 x i8> poison, <16 x i32> ; CHECK-NEXT: Cost Model: Found costs of 0 for: %v2i16_lo = shufflevector <2 x i16> poison, <2 x i16> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2i16_hi = shufflevector <2 x i16> poison, <2 x i16> poison, <1 x i32> ; CHECK-NEXT: Cost Model: Found costs of 0 for: %v4i16_lo = shufflevector <4 x i16> poison, <4 x i16> poison, <2 x i32> @@ -62,9 +59,6 @@ define void @extract_half_vscale_any() { %v16i8_lo = shufflevector <16 x i8> poison, <16 x i8> poison, <8 x i32> %v16i8_mi = shufflevector <16 x i8> poison, <16 x i8> poison, <8 x i32> %v16i8_hi = shufflevector <16 x i8> poison, <16 x i8> poison, <8 x i32> - %v32i8_lo = shufflevector <32 x i8> poison, <32 x i8> poison, <16 x i32> - %v32i8_mi = shufflevector <32 x i8> poison, <32 x i8> poison, <16 x i32> - %v32i8_hi = shufflevector <32 x i8> poison, <32 x i8> poison, <16 x i32> %v2i16_lo = shufflevector <2 x i16> poison, <2 x i16> poison, <1 x i32> %v2i16_hi = shufflevector <2 x i16> poison, <2 x i16> poison, <1 x i32> @@ -102,129 +96,8 @@ define void @extract_half_vscale_any() { ret void } -; Similar as above, but for a 256-bit SVE core. -; Vectors smaller than 256-bit aren't tested again. -define void @extract_half_vscale2() vscale_range(2,2) { -; CHECK-LABEL: 'extract_half_vscale2' -; CHECK-NEXT: Cost Model: Found costs of 0 for: %v32i8_lo = shufflevector <32 x i8> poison, <32 x i8> poison, <16 x i32> -; CHECK-NEXT: Cost Model: Found costs of RThru:64 CodeSize:32 Lat:64 SizeLat:64 for: %v32i8_mi = shufflevector <32 x i8> poison, <32 x i8> poison, <16 x i32> -; CHECK-NEXT: Cost Model: Found costs of 1 for: %v32i8_hi = shufflevector <32 x i8> poison, <32 x i8> poison, <16 x i32> -; CHECK-NEXT: Cost Model: Found costs of 0 for: %v64i8_lo = shufflevector <64 x i8> poison, <64 x i8> poison, <32 x i32> -; CHECK-NEXT: Cost Model: Found costs of RThru:128 CodeSize:64 Lat:128 SizeLat:128 for: %v64i8_mi = shufflevector <64 x i8> poison, <64 x i8> poison, <32 x i32> -; CHECK-NEXT: Cost Model: Found costs of RThru:128 CodeSize:64 Lat:128 SizeLat:128 for: %v64i8_hi = shufflevector <64 x i8> poison, <64 x i8> poison, <32 x i32> -; CHECK-NEXT: Cost Model: Found costs of 0 for: %v16i16_lo = shufflevector <16 x i16> poison, <16 x i16> poison, <8 x i32> -; CHECK-NEXT: Cost Model: Found costs of RThru:32 CodeSize:16 Lat:32 SizeLat:32 for: %v16i16_mi = shufflevector <16 x i16> poison, <16 x i16> poison, <8 x i32> -; CHECK-NEXT: Cost Model: Found costs of 1 for: %v16i16_hi = shufflevector <16 x i16> poison, <16 x i16> poison, <8 x i32> -; CHECK-NEXT: Cost Model: Found costs of 0 for: %v32i16_lo = shufflevector <32 x i16> poison, <32 x i16> poison, <16 x i32> -; CHECK-NEXT: Cost Model: Found costs of RThru:64 CodeSize:32 Lat:64 SizeLat:64 for: %v32i16_mi = shufflevector <32 x i16> poison, <32 x i16> poison, <16 x i32> -; CHECK-NEXT: Cost Model: Found costs of RThru:64 CodeSize:32 Lat:64 SizeLat:64 for: %v32i16_hi = shufflevector <32 x i16> poison, <32 x i16> poison, <16 x i32> -; CHECK-NEXT: Cost Model: Found costs of 0 for: %v8i32_lo = shufflevector <8 x i32> poison, <8 x i32> poison, <4 x i32> -; CHECK-NEXT: Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for: %v8i32_mi = shufflevector <8 x i32> poison, <8 x i32> poison, <4 x i32> -; CHECK-NEXT: Cost Model: Found costs of 1 for: %v8i32_hi = shufflevector <8 x i32> poison, <8 x i32> poison, <4 x i32> -; CHECK-NEXT: Cost Model: Found costs of 0 for: %v16i32_lo = shufflevector <16 x i32> poison, <16 x i32> poison, <8 x i32> -; CHECK-NEXT: Cost Model: Found costs of RThru:32 CodeSize:16 Lat:32 SizeLat:32 for: %v16i32_mi = shufflevector <16 x i32> poison, <16 x i32> poison, <8 x i32> -; CHECK-NEXT: Cost Model: Found costs of RThru:32 CodeSize:16 Lat:32 SizeLat:32 for: %v16i32_hi = shufflevector <16 x i32> poison, <16 x i32> poison, <8 x i32> -; CHECK-NEXT: Cost Model: Found costs of 0 for: %v4i64_lo = shufflevector <4 x i64> poison, <4 x i64> poison, <2 x i32> -; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %v4i64_mi = shufflevector <4 x i64> poison, <4 x i64> poison, <2 x i32> -; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4i64_hi = shufflevector <4 x i64> poison, <4 x i64> poison, <2 x i32> -; CHECK-NEXT: Cost Model: Found costs of 0 for: %v8i64_lo = shufflevector <8 x i64> poison, <8 x i64> poison, <4 x i32> -; CHECK-NEXT: Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for: %v8i64_mi = shufflevector <8 x i64> poison, <8 x i64> poison, <4 x i32> -; CHECK-NEXT: Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for: %v8i64_hi = shufflevector <8 x i64> poison, <8 x i64> poison, <4 x i32> -; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void -; - %v32i8_lo = shufflevector <32 x i8> poison, <32 x i8> poison, <16 x i32> - %v32i8_mi = shufflevector <32 x i8> poison, <32 x i8> poison, <16 x i32> - %v32i8_hi = shufflevector <32 x i8> poison, <32 x i8> poison, <16 x i32> - %v64i8_lo = shufflevector <64 x i8> poison, <64 x i8> poison, <32 x i32> - %v64i8_mi = shufflevector <64 x i8> poison, <64 x i8> poison, <32 x i32> - %v64i8_hi = shufflevector <64 x i8> poison, <64 x i8> poison, <32 x i32> - - %v16i16_lo = shufflevector <16 x i16> poison, <16 x i16> poison, <8 x i32> - %v16i16_mi = shufflevector <16 x i16> poison, <16 x i16> poison, <8 x i32> - %v16i16_hi = shufflevector <16 x i16> poison, <16 x i16> poison, <8 x i32> - %v32i16_lo = shufflevector <32 x i16> poison, <32 x i16> poison, <16 x i32> - %v32i16_mi = shufflevector <32 x i16> poison, <32 x i16> poison, <16 x i32> - %v32i16_hi = shufflevector <32 x i16> poison, <32 x i16> poison, <16 x i32> - - %v8i32_lo = shufflevector <8 x i32> poison, <8 x i32> poison, <4 x i32> - %v8i32_mi = shufflevector <8 x i32> poison, <8 x i32> poison, <4 x i32> - %v8i32_hi = shufflevector <8 x i32> poison, <8 x i32> poison, <4 x i32> - %v16i32_lo = shufflevector <16 x i32> poison, <16 x i32> poison, <8 x i32> - %v16i32_mi = shufflevector <16 x i32> poison, <16 x i32> poison, <8 x i32> - %v16i32_hi = shufflevector <16 x i32> poison, <16 x i32> poison, <8 x i32> - - %v4i64_lo = shufflevector <4 x i64> poison, <4 x i64> poison, <2 x i32> - %v4i64_mi = shufflevector <4 x i64> poison, <4 x i64> poison, <2 x i32> - %v4i64_hi = shufflevector <4 x i64> poison, <4 x i64> poison, <2 x i32> - %v8i64_lo = shufflevector <8 x i64> poison, <8 x i64> poison, <4 x i32> - %v8i64_mi = shufflevector <8 x i64> poison, <8 x i64> poison, <4 x i32> - %v8i64_hi = shufflevector <8 x i64> poison, <8 x i64> poison, <4 x i32> - - ret void -} - -; Similar as above, but for a 512-bit SVE core. -define void @extract_half_vscale4() vscale_range(4,4) { -; CHECK-LABEL: 'extract_half_vscale4' -; CHECK-NEXT: Cost Model: Found costs of 0 for: %v32i8_lo = shufflevector <32 x i8> poison, <32 x i8> poison, <16 x i32> -; CHECK-NEXT: Cost Model: Found costs of RThru:64 CodeSize:32 Lat:64 SizeLat:64 for: %v32i8_mi = shufflevector <32 x i8> poison, <32 x i8> poison, <16 x i32> -; CHECK-NEXT: Cost Model: Found costs of 1 for: %v32i8_hi = shufflevector <32 x i8> poison, <32 x i8> poison, <16 x i32> -; CHECK-NEXT: Cost Model: Found costs of 0 for: %v64i8_lo = shufflevector <64 x i8> poison, <64 x i8> poison, <32 x i32> -; CHECK-NEXT: Cost Model: Found costs of RThru:128 CodeSize:64 Lat:128 SizeLat:128 for: %v64i8_mi = shufflevector <64 x i8> poison, <64 x i8> poison, <32 x i32> -; CHECK-NEXT: Cost Model: Found costs of 1 for: %v64i8_hi = shufflevector <64 x i8> poison, <64 x i8> poison, <32 x i32> -; CHECK-NEXT: Cost Model: Found costs of 0 for: %v16i16_lo = shufflevector <16 x i16> poison, <16 x i16> poison, <8 x i32> -; CHECK-NEXT: Cost Model: Found costs of RThru:32 CodeSize:16 Lat:32 SizeLat:32 for: %v16i16_mi = shufflevector <16 x i16> poison, <16 x i16> poison, <8 x i32> -; CHECK-NEXT: Cost Model: Found costs of 1 for: %v16i16_hi = shufflevector <16 x i16> poison, <16 x i16> poison, <8 x i32> -; CHECK-NEXT: Cost Model: Found costs of 0 for: %v32i16_lo = shufflevector <32 x i16> poison, <32 x i16> poison, <16 x i32> -; CHECK-NEXT: Cost Model: Found costs of RThru:64 CodeSize:32 Lat:64 SizeLat:64 for: %v32i16_mi = shufflevector <32 x i16> poison, <32 x i16> poison, <16 x i32> -; CHECK-NEXT: Cost Model: Found costs of 1 for: %v32i16_hi = shufflevector <32 x i16> poison, <32 x i16> poison, <16 x i32> -; CHECK-NEXT: Cost Model: Found costs of 0 for: %v8i32_lo = shufflevector <8 x i32> poison, <8 x i32> poison, <4 x i32> -; CHECK-NEXT: Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for: %v8i32_mi = shufflevector <8 x i32> poison, <8 x i32> poison, <4 x i32> -; CHECK-NEXT: Cost Model: Found costs of 1 for: %v8i32_hi = shufflevector <8 x i32> poison, <8 x i32> poison, <4 x i32> -; CHECK-NEXT: Cost Model: Found costs of 0 for: %v16i32_lo = shufflevector <16 x i32> poison, <16 x i32> poison, <8 x i32> -; CHECK-NEXT: Cost Model: Found costs of RThru:32 CodeSize:16 Lat:32 SizeLat:32 for: %v16i32_mi = shufflevector <16 x i32> poison, <16 x i32> poison, <8 x i32> -; CHECK-NEXT: Cost Model: Found costs of 1 for: %v16i32_hi = shufflevector <16 x i32> poison, <16 x i32> poison, <8 x i32> -; CHECK-NEXT: Cost Model: Found costs of 0 for: %v4i64_lo = shufflevector <4 x i64> poison, <4 x i64> poison, <2 x i32> -; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %v4i64_mi = shufflevector <4 x i64> poison, <4 x i64> poison, <2 x i32> -; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4i64_hi = shufflevector <4 x i64> poison, <4 x i64> poison, <2 x i32> -; CHECK-NEXT: Cost Model: Found costs of 0 for: %v8i64_lo = shufflevector <8 x i64> poison, <8 x i64> poison, <4 x i32> -; CHECK-NEXT: Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for: %v8i64_mi = shufflevector <8 x i64> poison, <8 x i64> poison, <4 x i32> -; CHECK-NEXT: Cost Model: Found costs of 1 for: %v8i64_hi = shufflevector <8 x i64> poison, <8 x i64> poison, <4 x i32> -; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void -; - %v32i8_lo = shufflevector <32 x i8> poison, <32 x i8> poison, <16 x i32> - %v32i8_mi = shufflevector <32 x i8> poison, <32 x i8> poison, <16 x i32> - %v32i8_hi = shufflevector <32 x i8> poison, <32 x i8> poison, <16 x i32> - %v64i8_lo = shufflevector <64 x i8> poison, <64 x i8> poison, <32 x i32> - %v64i8_mi = shufflevector <64 x i8> poison, <64 x i8> poison, <32 x i32> - %v64i8_hi = shufflevector <64 x i8> poison, <64 x i8> poison, <32 x i32> - - %v16i16_lo = shufflevector <16 x i16> poison, <16 x i16> poison, <8 x i32> - %v16i16_mi = shufflevector <16 x i16> poison, <16 x i16> poison, <8 x i32> - %v16i16_hi = shufflevector <16 x i16> poison, <16 x i16> poison, <8 x i32> - %v32i16_lo = shufflevector <32 x i16> poison, <32 x i16> poison, <16 x i32> - %v32i16_mi = shufflevector <32 x i16> poison, <32 x i16> poison, <16 x i32> - %v32i16_hi = shufflevector <32 x i16> poison, <32 x i16> poison, <16 x i32> - - %v8i32_lo = shufflevector <8 x i32> poison, <8 x i32> poison, <4 x i32> - %v8i32_mi = shufflevector <8 x i32> poison, <8 x i32> poison, <4 x i32> - %v8i32_hi = shufflevector <8 x i32> poison, <8 x i32> poison, <4 x i32> - %v16i32_lo = shufflevector <16 x i32> poison, <16 x i32> poison, <8 x i32> - %v16i32_mi = shufflevector <16 x i32> poison, <16 x i32> poison, <8 x i32> - %v16i32_hi = shufflevector <16 x i32> poison, <16 x i32> poison, <8 x i32> - - %v4i64_lo = shufflevector <4 x i64> poison, <4 x i64> poison, <2 x i32> - %v4i64_mi = shufflevector <4 x i64> poison, <4 x i64> poison, <2 x i32> - %v4i64_hi = shufflevector <4 x i64> poison, <4 x i64> poison, <2 x i32> - %v8i64_lo = shufflevector <8 x i64> poison, <8 x i64> poison, <4 x i32> - %v8i64_mi = shufflevector <8 x i64> poison, <8 x i64> poison, <4 x i32> - %v8i64_hi = shufflevector <8 x i64> poison, <8 x i64> poison, <4 x i32> - - ret void -} - -define void @extract_qtr_vscale_any() { -; CHECK-LABEL: 'extract_qtr_vscale_any' +define void @extract_qtr() { +; CHECK-LABEL: 'extract_qtr' ; CHECK-NEXT: Cost Model: Found costs of 0 for: %v4i8_lo = shufflevector <4 x i8> poison, <4 x i8> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of 3 for: %v4i8_mi = shufflevector <4 x i8> poison, <4 x i8> poison, <1 x i32> ; CHECK-NEXT: Cost Model: Found costs of 3 for: %v4i8_hi = shufflevector <4 x i8> poison, <4 x i8> poison, <1 x i32> @@ -234,9 +107,6 @@ define void @extract_qtr_vscale_any() { ; CHECK-NEXT: Cost Model: Found costs of 0 for: %v16i8_lo = shufflevector <16 x i8> poison, <16 x i8> poison, <4 x i32> ; CHECK-NEXT: Cost Model: Found costs of 8 for: %v16i8_mi = shufflevector <16 x i8> poison, <16 x i8> poison, <4 x i32> ; CHECK-NEXT: Cost Model: Found costs of 8 for: %v16i8_hi = shufflevector <16 x i8> poison, <16 x i8> poison, <4 x i32> -; CHECK-NEXT: Cost Model: Found costs of 0 for: %v32i8_lo = shufflevector <32 x i8> poison, <32 x i8> poison, <8 x i32> -; CHECK-NEXT: Cost Model: Found costs of 16 for: %v32i8_mi = shufflevector <32 x i8> poison, <32 x i8> poison, <8 x i32> -; CHECK-NEXT: Cost Model: Found costs of 16 for: %v32i8_hi = shufflevector <32 x i8> poison, <32 x i8> poison, <8 x i32> ; CHECK-NEXT: Cost Model: Found costs of 0 for: %v4i16_lo = shufflevector <4 x i16> poison, <4 x i16> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of 3 for: %v4i16_mi = shufflevector <4 x i16> poison, <4 x i16> poison, <1 x i32> ; CHECK-NEXT: Cost Model: Found costs of 3 for: %v4i16_hi = shufflevector <4 x i16> poison, <4 x i16> poison, <1 x i32> @@ -272,9 +142,6 @@ define void @extract_qtr_vscale_any() { %v16i8_lo = shufflevector <16 x i8> poison, <16 x i8> poison, <4 x i32> %v16i8_mi = shufflevector <16 x i8> poison, <16 x i8> poison, <4 x i32> %v16i8_hi = shufflevector <16 x i8> poison, <16 x i8> poison, <4 x i32> - %v32i8_lo = shufflevector <32 x i8> poison, <32 x i8> poison, <8 x i32> - %v32i8_mi = shufflevector <32 x i8> poison, <32 x i8> poison, <8 x i32> - %v32i8_hi = shufflevector <32 x i8> poison, <32 x i8> poison, <8 x i32> %v4i16_lo = shufflevector <4 x i16> poison, <4 x i16> poison, <1 x i32> %v4i16_mi = shufflevector <4 x i16> poison, <4 x i16> poison, <1 x i32> @@ -305,52 +172,3 @@ define void @extract_qtr_vscale_any() { ret void } - -; Similar as above, but for a 256-bit SVE core. -; Vectors smaller than 256-bit aren't tested again. -define void @extract_qtr_vscale2() vscale_range(2,2) { -; CHECK-LABEL: 'extract_qtr_vscale2' -; CHECK-NEXT: Cost Model: Found costs of 0 for: %v32i8_lo = shufflevector <32 x i8> poison, <32 x i8> poison, <8 x i32> -; CHECK-NEXT: Cost Model: Found costs of RThru:32 CodeSize:16 Lat:32 SizeLat:32 for: %v32i8_mi = shufflevector <32 x i8> poison, <32 x i8> poison, <8 x i32> -; CHECK-NEXT: Cost Model: Found costs of RThru:32 CodeSize:16 Lat:32 SizeLat:32 for: %v32i8_hi = shufflevector <32 x i8> poison, <32 x i8> poison, <8 x i32> -; CHECK-NEXT: Cost Model: Found costs of 0 for: %v16i16_lo = shufflevector <16 x i16> poison, <16 x i16> poison, <4 x i32> -; CHECK-NEXT: Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for: %v16i16_mi = shufflevector <16 x i16> poison, <16 x i16> poison, <4 x i32> -; CHECK-NEXT: Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for: %v16i16_hi = shufflevector <16 x i16> poison, <16 x i16> poison, <4 x i32> -; CHECK-NEXT: Cost Model: Found costs of 0 for: %v8i32_lo = shufflevector <8 x i32> poison, <8 x i32> poison, <2 x i32> -; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %v8i32_mi = shufflevector <8 x i32> poison, <8 x i32> poison, <2 x i32> -; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %v8i32_hi = shufflevector <8 x i32> poison, <8 x i32> poison, <2 x i32> -; CHECK-NEXT: Cost Model: Found costs of 0 for: %v16i32_lo = shufflevector <16 x i32> poison, <16 x i32> poison, <4 x i32> -; CHECK-NEXT: Cost Model: Found costs of 1 for: %v16i32_mi = shufflevector <16 x i32> poison, <16 x i32> poison, <4 x i32> -; CHECK-NEXT: Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for: %v16i32_hi = shufflevector <16 x i32> poison, <16 x i32> poison, <4 x i32> -; CHECK-NEXT: Cost Model: Found costs of 0 for: %v4i64_lo = shufflevector <4 x i64> poison, <4 x i64> poison, <1 x i32> zeroinitializer -; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %v4i64_mi = shufflevector <4 x i64> poison, <4 x i64> poison, <1 x i32> -; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %v4i64_hi = shufflevector <4 x i64> poison, <4 x i64> poison, <1 x i32> -; CHECK-NEXT: Cost Model: Found costs of 0 for: %v8i64_lo = shufflevector <8 x i64> poison, <8 x i64> poison, <2 x i32> -; CHECK-NEXT: Cost Model: Found costs of 1 for: %v8i64_mi = shufflevector <8 x i64> poison, <8 x i64> poison, <2 x i32> -; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %v8i64_hi = shufflevector <8 x i64> poison, <8 x i64> poison, <2 x i32> -; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void -; - %v32i8_lo = shufflevector <32 x i8> poison, <32 x i8> poison, <8 x i32> - %v32i8_mi = shufflevector <32 x i8> poison, <32 x i8> poison, <8 x i32> - %v32i8_hi = shufflevector <32 x i8> poison, <32 x i8> poison, <8 x i32> - - %v16i16_lo = shufflevector <16 x i16> poison, <16 x i16> poison, <4 x i32> - %v16i16_mi = shufflevector <16 x i16> poison, <16 x i16> poison, <4 x i32> - %v16i16_hi = shufflevector <16 x i16> poison, <16 x i16> poison, <4 x i32> - - %v8i32_lo = shufflevector <8 x i32> poison, <8 x i32> poison, <2 x i32> - %v8i32_mi = shufflevector <8 x i32> poison, <8 x i32> poison, <2 x i32> - %v8i32_hi = shufflevector <8 x i32> poison, <8 x i32> poison, <2 x i32> - %v16i32_lo = shufflevector <16 x i32> poison, <16 x i32> poison, <4 x i32> - %v16i32_mi = shufflevector <16 x i32> poison, <16 x i32> poison, <4 x i32> - %v16i32_hi = shufflevector <16 x i32> poison, <16 x i32> poison, <4 x i32> - - %v4i64_lo = shufflevector <4 x i64> poison, <4 x i64> poison, <1 x i32> - %v4i64_mi = shufflevector <4 x i64> poison, <4 x i64> poison, <1 x i32> - %v4i64_hi = shufflevector <4 x i64> poison, <4 x i64> poison, <1 x i32> - %v8i64_lo = shufflevector <8 x i64> poison, <8 x i64> poison, <2 x i32> - %v8i64_mi = shufflevector <8 x i64> poison, <8 x i64> poison, <2 x i32> - %v8i64_hi = shufflevector <8 x i64> poison, <8 x i64> poison, <2 x i32> - - ret void -} From 8d3248507920f1aedb5b75f1590be9e434997203 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ga=C3=ABtan=20Bossu?= Date: Mon, 1 Sep 2025 13:56:53 +0000 Subject: [PATCH 5/6] Add SVE-specific cost test for VLS --- .../CostModel/AArch64/shuffle-extract.ll | 3 + .../AArch64/sve-vls-shuffle-extract.ll | 339 ++++++++++++++++++ 2 files changed, 342 insertions(+) create mode 100644 llvm/test/Analysis/CostModel/AArch64/sve-vls-shuffle-extract.ll diff --git a/llvm/test/Analysis/CostModel/AArch64/shuffle-extract.ll b/llvm/test/Analysis/CostModel/AArch64/shuffle-extract.ll index 867c35ab79446..07764fbf4acf3 100644 --- a/llvm/test/Analysis/CostModel/AArch64/shuffle-extract.ll +++ b/llvm/test/Analysis/CostModel/AArch64/shuffle-extract.ll @@ -1,6 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py ; RUN: opt < %s -mtriple=aarch64--linux-gnu -passes="print" -cost-kind=all 2>&1 -disable-output | FileCheck %s +; This tests the cost of fixed-length subvector extracts for NEON. +; For the SVE equivalent test, see sve-vls-shuffle-extract.ll + target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" define void @extract_half() { diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-vls-shuffle-extract.ll b/llvm/test/Analysis/CostModel/AArch64/sve-vls-shuffle-extract.ll new file mode 100644 index 0000000000000..65261a8b2c6d6 --- /dev/null +++ b/llvm/test/Analysis/CostModel/AArch64/sve-vls-shuffle-extract.ll @@ -0,0 +1,339 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt < %s -mtriple=aarch64 -mattr=+sve -passes="print" 2>&1 -disable-output \ +; RUN: -aarch64-sve-vector-bits-min=128 \ +; RUN: | FileCheck %s --check-prefixes=VSCALE-ANY,VSCALE-1 +; RUN: opt < %s -mtriple=aarch64 -mattr=+sve -passes="print" 2>&1 -disable-output \ +; RUN: -aarch64-sve-vector-bits-min=128 -aarch64-sve-vector-bits-max=128 \ +; RUN: | FileCheck %s --check-prefixes=VSCALE-ANY,VSCALE-1 +; RUN: opt < %s -mtriple=aarch64 -mattr=+sve -passes="print" 2>&1 -disable-output \ +; RUN: -aarch64-sve-vector-bits-min=256 \ +; RUN: | FileCheck %s --check-prefixes=VSCALE-ANY,VSCALE-2 +; RUN: opt < %s -mtriple=aarch64 -mattr=+sve -passes="print" 2>&1 -disable-output \ +; RUN: -aarch64-sve-vector-bits-min=256 -aarch64-sve-vector-bits-max=256 \ +; RUN: | FileCheck %s --check-prefixes=VSCALE-ANY,VSCALE-2 +; RUN: opt < %s -mtriple=aarch64 -mattr=+sve -passes="print" 2>&1 -disable-output \ +; RUN: -aarch64-sve-vector-bits-min=512 \ +; RUN: | FileCheck %s --check-prefixes=VSCALE-ANY,VSCALE-4 +; RUN: opt < %s -mtriple=aarch64 -mattr=+sve -passes="print" 2>&1 -disable-output \ +; RUN: -aarch64-sve-vector-bits-min=512 -aarch64-sve-vector-bits-max=512 \ +; RUN: | FileCheck %s --check-prefixes=VSCALE-ANY,VSCALE-4 + +; This tests the cost of fixed-length subvector extracts for SVE, +; either for a minimum vscale or a fixed vscale (aka VLS). +; For the NEON equivalent test, see shuffle-extract.ll + +define void @extract_half_lo() { +; VSCALE-ANY-LABEL: 'extract_half_lo' +; VSCALE-ANY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16i8_lo = shufflevector <16 x i8> poison, <16 x i8> poison, <8 x i32> +; VSCALE-ANY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v32i8_lo = shufflevector <32 x i8> poison, <32 x i8> poison, <16 x i32> +; VSCALE-ANY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v64i8_lo = shufflevector <64 x i8> poison, <64 x i8> poison, <32 x i32> +; VSCALE-ANY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8i16_lo = shufflevector <8 x i16> poison, <8 x i16> poison, <4 x i32> +; VSCALE-ANY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16i16_lo = shufflevector <16 x i16> poison, <16 x i16> poison, <8 x i32> +; VSCALE-ANY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v32i16_lo = shufflevector <32 x i16> poison, <32 x i16> poison, <16 x i32> +; VSCALE-ANY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4i32_lo = shufflevector <4 x i32> poison, <4 x i32> poison, <2 x i32> +; VSCALE-ANY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8i32_lo = shufflevector <8 x i32> poison, <8 x i32> poison, <4 x i32> +; VSCALE-ANY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16i32_lo = shufflevector <16 x i32> poison, <16 x i32> poison, <8 x i32> +; VSCALE-ANY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2i64_lo = shufflevector <2 x i64> poison, <2 x i64> poison, <1 x i32> zeroinitializer +; VSCALE-ANY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4i64_lo = shufflevector <4 x i64> poison, <4 x i64> poison, <2 x i32> +; VSCALE-ANY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8i64_lo = shufflevector <8 x i64> poison, <8 x i64> poison, <4 x i32> +; VSCALE-ANY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %v16i8_lo = shufflevector <16 x i8> poison, <16 x i8> poison, <8 x i32> + %v32i8_lo = shufflevector <32 x i8> poison, <32 x i8> poison, <16 x i32> + %v64i8_lo = shufflevector <64 x i8> poison, <64 x i8> poison, <32 x i32> + + %v8i16_lo = shufflevector <8 x i16> poison, <8 x i16> poison, <4 x i32> + %v16i16_lo = shufflevector <16 x i16> poison, <16 x i16> poison, <8 x i32> + %v32i16_lo = shufflevector <32 x i16> poison, <32 x i16> poison, <16 x i32> + + %v4i32_lo = shufflevector <4 x i32> poison, <4 x i32> poison, <2 x i32> + %v8i32_lo = shufflevector <8 x i32> poison, <8 x i32> poison, <4 x i32> + %v16i32_lo = shufflevector <16 x i32> poison, <16 x i32> poison, <8 x i32> + + %v2i64_lo = shufflevector <2 x i64> poison, <2 x i64> poison, <1 x i32> + %v4i64_lo = shufflevector <4 x i64> poison, <4 x i64> poison, <2 x i32> + %v8i64_lo = shufflevector <8 x i64> poison, <8 x i64> poison, <4 x i32> + + ret void +} + +define void @extract_half_hi() { +; VSCALE-1-LABEL: 'extract_half_hi' +; VSCALE-1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_hi = shufflevector <16 x i8> poison, <16 x i8> poison, <8 x i32> +; VSCALE-1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v32i8_hi = shufflevector <32 x i8> poison, <32 x i8> poison, <16 x i32> +; VSCALE-1-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v64i8_hi = shufflevector <64 x i8> poison, <64 x i8> poison, <32 x i32> +; VSCALE-1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_hi = shufflevector <8 x i16> poison, <8 x i16> poison, <4 x i32> +; VSCALE-1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v16i16_hi = shufflevector <16 x i16> poison, <16 x i16> poison, <8 x i32> +; VSCALE-1-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v32i16_hi = shufflevector <32 x i16> poison, <32 x i16> poison, <16 x i32> +; VSCALE-1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_hi = shufflevector <4 x i32> poison, <4 x i32> poison, <2 x i32> +; VSCALE-1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v8i32_hi = shufflevector <8 x i32> poison, <8 x i32> poison, <4 x i32> +; VSCALE-1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v16i32_hi = shufflevector <16 x i32> poison, <16 x i32> poison, <8 x i32> +; VSCALE-1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_hi = shufflevector <2 x i64> poison, <2 x i64> poison, <1 x i32> +; VSCALE-1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i64_hi = shufflevector <4 x i64> poison, <4 x i64> poison, <2 x i32> +; VSCALE-1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i64_hi = shufflevector <8 x i64> poison, <8 x i64> poison, <4 x i32> +; VSCALE-1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; VSCALE-2-LABEL: 'extract_half_hi' +; VSCALE-2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_hi = shufflevector <16 x i8> poison, <16 x i8> poison, <8 x i32> +; VSCALE-2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_hi = shufflevector <32 x i8> poison, <32 x i8> poison, <16 x i32> +; VSCALE-2-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %v64i8_hi = shufflevector <64 x i8> poison, <64 x i8> poison, <32 x i32> +; VSCALE-2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_hi = shufflevector <8 x i16> poison, <8 x i16> poison, <4 x i32> +; VSCALE-2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_hi = shufflevector <16 x i16> poison, <16 x i16> poison, <8 x i32> +; VSCALE-2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v32i16_hi = shufflevector <32 x i16> poison, <32 x i16> poison, <16 x i32> +; VSCALE-2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_hi = shufflevector <4 x i32> poison, <4 x i32> poison, <2 x i32> +; VSCALE-2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_hi = shufflevector <8 x i32> poison, <8 x i32> poison, <4 x i32> +; VSCALE-2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v16i32_hi = shufflevector <16 x i32> poison, <16 x i32> poison, <8 x i32> +; VSCALE-2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_hi = shufflevector <2 x i64> poison, <2 x i64> poison, <1 x i32> +; VSCALE-2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_hi = shufflevector <4 x i64> poison, <4 x i64> poison, <2 x i32> +; VSCALE-2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v8i64_hi = shufflevector <8 x i64> poison, <8 x i64> poison, <4 x i32> +; VSCALE-2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; VSCALE-4-LABEL: 'extract_half_hi' +; VSCALE-4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_hi = shufflevector <16 x i8> poison, <16 x i8> poison, <8 x i32> +; VSCALE-4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_hi = shufflevector <32 x i8> poison, <32 x i8> poison, <16 x i32> +; VSCALE-4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_hi = shufflevector <64 x i8> poison, <64 x i8> poison, <32 x i32> +; VSCALE-4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_hi = shufflevector <8 x i16> poison, <8 x i16> poison, <4 x i32> +; VSCALE-4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_hi = shufflevector <16 x i16> poison, <16 x i16> poison, <8 x i32> +; VSCALE-4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_hi = shufflevector <32 x i16> poison, <32 x i16> poison, <16 x i32> +; VSCALE-4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_hi = shufflevector <4 x i32> poison, <4 x i32> poison, <2 x i32> +; VSCALE-4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_hi = shufflevector <8 x i32> poison, <8 x i32> poison, <4 x i32> +; VSCALE-4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_hi = shufflevector <16 x i32> poison, <16 x i32> poison, <8 x i32> +; VSCALE-4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_hi = shufflevector <2 x i64> poison, <2 x i64> poison, <1 x i32> +; VSCALE-4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_hi = shufflevector <4 x i64> poison, <4 x i64> poison, <2 x i32> +; VSCALE-4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_hi = shufflevector <8 x i64> poison, <8 x i64> poison, <4 x i32> +; VSCALE-4-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %v16i8_hi = shufflevector <16 x i8> poison, <16 x i8> poison, <8 x i32> + %v32i8_hi = shufflevector <32 x i8> poison, <32 x i8> poison, <16 x i32> + %v64i8_hi = shufflevector <64 x i8> poison, <64 x i8> poison, <32 x i32> + + %v8i16_hi = shufflevector <8 x i16> poison, <8 x i16> poison, <4 x i32> + %v16i16_hi = shufflevector <16 x i16> poison, <16 x i16> poison, <8 x i32> + %v32i16_hi = shufflevector <32 x i16> poison, <32 x i16> poison, <16 x i32> + + %v4i32_hi = shufflevector <4 x i32> poison, <4 x i32> poison, <2 x i32> + %v8i32_hi = shufflevector <8 x i32> poison, <8 x i32> poison, <4 x i32> + %v16i32_hi = shufflevector <16 x i32> poison, <16 x i32> poison, <8 x i32> + + %v2i64_hi = shufflevector <2 x i64> poison, <2 x i64> poison, <1 x i32> + %v4i64_hi = shufflevector <4 x i64> poison, <4 x i64> poison, <2 x i32> + %v8i64_hi = shufflevector <8 x i64> poison, <8 x i64> poison, <4 x i32> + + ret void +} + +define void @extract_half_unaligned() { +; VSCALE-1-LABEL: 'extract_half_unaligned' +; VSCALE-1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16i8_mi = shufflevector <16 x i8> poison, <16 x i8> poison, <8 x i32> +; VSCALE-1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v32i8_mi = shufflevector <32 x i8> poison, <32 x i8> poison, <16 x i32> +; VSCALE-1-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v64i8_mi = shufflevector <64 x i8> poison, <64 x i8> poison, <32 x i32> +; VSCALE-1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8i16_mi = shufflevector <8 x i16> poison, <8 x i16> poison, <4 x i32> +; VSCALE-1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v16i16_mi = shufflevector <16 x i16> poison, <16 x i16> poison, <8 x i32> +; VSCALE-1-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v32i16_mi = shufflevector <32 x i16> poison, <32 x i16> poison, <16 x i32> +; VSCALE-1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i32_mi = shufflevector <4 x i32> poison, <4 x i32> poison, <2 x i32> +; VSCALE-1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v8i32_mi = shufflevector <8 x i32> poison, <8 x i32> poison, <4 x i32> +; VSCALE-1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v16i32_mi = shufflevector <16 x i32> poison, <16 x i32> poison, <8 x i32> +; VSCALE-1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i64_mi = shufflevector <4 x i64> poison, <4 x i64> poison, <2 x i32> +; VSCALE-1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i64_mi = shufflevector <8 x i64> poison, <8 x i64> poison, <4 x i32> +; VSCALE-1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; VSCALE-2-LABEL: 'extract_half_unaligned' +; VSCALE-2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16i8_mi = shufflevector <16 x i8> poison, <16 x i8> poison, <8 x i32> +; VSCALE-2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v32i8_mi = shufflevector <32 x i8> poison, <32 x i8> poison, <16 x i32> +; VSCALE-2-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %v64i8_mi = shufflevector <64 x i8> poison, <64 x i8> poison, <32 x i32> +; VSCALE-2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8i16_mi = shufflevector <8 x i16> poison, <8 x i16> poison, <4 x i32> +; VSCALE-2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v16i16_mi = shufflevector <16 x i16> poison, <16 x i16> poison, <8 x i32> +; VSCALE-2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v32i16_mi = shufflevector <32 x i16> poison, <32 x i16> poison, <16 x i32> +; VSCALE-2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i32_mi = shufflevector <4 x i32> poison, <4 x i32> poison, <2 x i32> +; VSCALE-2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v8i32_mi = shufflevector <8 x i32> poison, <8 x i32> poison, <4 x i32> +; VSCALE-2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v16i32_mi = shufflevector <16 x i32> poison, <16 x i32> poison, <8 x i32> +; VSCALE-2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4i64_mi = shufflevector <4 x i64> poison, <4 x i64> poison, <2 x i32> +; VSCALE-2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v8i64_mi = shufflevector <8 x i64> poison, <8 x i64> poison, <4 x i32> +; VSCALE-2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; VSCALE-4-LABEL: 'extract_half_unaligned' +; VSCALE-4-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16i8_mi = shufflevector <16 x i8> poison, <16 x i8> poison, <8 x i32> +; VSCALE-4-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v32i8_mi = shufflevector <32 x i8> poison, <32 x i8> poison, <16 x i32> +; VSCALE-4-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %v64i8_mi = shufflevector <64 x i8> poison, <64 x i8> poison, <32 x i32> +; VSCALE-4-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8i16_mi = shufflevector <8 x i16> poison, <8 x i16> poison, <4 x i32> +; VSCALE-4-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v16i16_mi = shufflevector <16 x i16> poison, <16 x i16> poison, <8 x i32> +; VSCALE-4-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v32i16_mi = shufflevector <32 x i16> poison, <32 x i16> poison, <16 x i32> +; VSCALE-4-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i32_mi = shufflevector <4 x i32> poison, <4 x i32> poison, <2 x i32> +; VSCALE-4-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v8i32_mi = shufflevector <8 x i32> poison, <8 x i32> poison, <4 x i32> +; VSCALE-4-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v16i32_mi = shufflevector <16 x i32> poison, <16 x i32> poison, <8 x i32> +; VSCALE-4-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4i64_mi = shufflevector <4 x i64> poison, <4 x i64> poison, <2 x i32> +; VSCALE-4-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v8i64_mi = shufflevector <8 x i64> poison, <8 x i64> poison, <4 x i32> +; VSCALE-4-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %v16i8_mi = shufflevector <16 x i8> poison, <16 x i8> poison, <8 x i32> + %v32i8_mi = shufflevector <32 x i8> poison, <32 x i8> poison, <16 x i32> + %v64i8_mi = shufflevector <64 x i8> poison, <64 x i8> poison, <32 x i32> + + %v8i16_mi = shufflevector <8 x i16> poison, <8 x i16> poison, <4 x i32> + %v16i16_mi = shufflevector <16 x i16> poison, <16 x i16> poison, <8 x i32> + %v32i16_mi = shufflevector <32 x i16> poison, <32 x i16> poison, <16 x i32> + + %v4i32_mi = shufflevector <4 x i32> poison, <4 x i32> poison, <2 x i32> + %v8i32_mi = shufflevector <8 x i32> poison, <8 x i32> poison, <4 x i32> + %v16i32_mi = shufflevector <16 x i32> poison, <16 x i32> poison, <8 x i32> + + %v4i64_mi = shufflevector <4 x i64> poison, <4 x i64> poison, <2 x i32> + %v8i64_mi = shufflevector <8 x i64> poison, <8 x i64> poison, <4 x i32> + + ret void +} + +define void @extract_qtr_lo() { +; VSCALE-ANY-LABEL: 'extract_qtr_lo' +; VSCALE-ANY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16i8_lo = shufflevector <16 x i8> poison, <16 x i8> poison, <4 x i32> +; VSCALE-ANY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v32i8_lo = shufflevector <32 x i8> poison, <32 x i8> poison, <8 x i32> +; VSCALE-ANY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v64i8_lo = shufflevector <64 x i8> poison, <64 x i8> poison, <16 x i32> +; VSCALE-ANY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8i16_lo = shufflevector <8 x i16> poison, <8 x i16> poison, <2 x i32> +; VSCALE-ANY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16i16_lo = shufflevector <16 x i16> poison, <16 x i16> poison, <4 x i32> +; VSCALE-ANY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v32i16_lo = shufflevector <32 x i16> poison, <32 x i16> poison, <8 x i32> +; VSCALE-ANY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4i32_lo = shufflevector <4 x i32> poison, <4 x i32> poison, <1 x i32> zeroinitializer +; VSCALE-ANY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8i32_lo = shufflevector <8 x i32> poison, <8 x i32> poison, <2 x i32> +; VSCALE-ANY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16i32_lo = shufflevector <16 x i32> poison, <16 x i32> poison, <4 x i32> +; VSCALE-ANY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4i64_lo = shufflevector <4 x i64> poison, <4 x i64> poison, <1 x i32> zeroinitializer +; VSCALE-ANY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8i64_lo = shufflevector <8 x i64> poison, <8 x i64> poison, <2 x i32> +; VSCALE-ANY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %v16i8_lo = shufflevector <16 x i8> poison, <16 x i8> poison, <4 x i32> + %v32i8_lo = shufflevector <32 x i8> poison, <32 x i8> poison, <8 x i32> + %v64i8_lo = shufflevector <64 x i8> poison, <64 x i8> poison, <16 x i32> + + %v8i16_lo = shufflevector <8 x i16> poison, <8 x i16> poison, <2 x i32> + %v16i16_lo = shufflevector <16 x i16> poison, <16 x i16> poison, <4 x i32> + %v32i16_lo = shufflevector <32 x i16> poison, <32 x i16> poison, <8 x i32> + + %v4i32_lo = shufflevector <4 x i32> poison, <4 x i32> poison, <1 x i32> + %v8i32_lo = shufflevector <8 x i32> poison, <8 x i32> poison, <2 x i32> + %v16i32_lo = shufflevector <16 x i32> poison, <16 x i32> poison, <4 x i32> + + %v4i64_lo = shufflevector <4 x i64> poison, <4 x i64> poison, <1 x i32> + %v8i64_lo = shufflevector <8 x i64> poison, <8 x i64> poison, <2 x i32> + + ret void +} + +define void @extract_qtr_hi() { +; VSCALE-1-LABEL: 'extract_qtr_hi' +; VSCALE-1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16i8_hi = shufflevector <16 x i8> poison, <16 x i8> poison, <4 x i32> +; VSCALE-1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v32i8_hi = shufflevector <32 x i8> poison, <32 x i8> poison, <8 x i32> +; VSCALE-1-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v64i8_hi = shufflevector <64 x i8> poison, <64 x i8> poison, <16 x i32> +; VSCALE-1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8i16_hi = shufflevector <8 x i16> poison, <8 x i16> poison, <2 x i32> +; VSCALE-1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v16i16_hi = shufflevector <16 x i16> poison, <16 x i16> poison, <4 x i32> +; VSCALE-1-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v32i16_hi = shufflevector <32 x i16> poison, <32 x i16> poison, <8 x i32> +; VSCALE-1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i32_hi = shufflevector <4 x i32> poison, <4 x i32> poison, <1 x i32> +; VSCALE-1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v8i32_hi = shufflevector <8 x i32> poison, <8 x i32> poison, <2 x i32> +; VSCALE-1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v16i32_hi = shufflevector <16 x i32> poison, <16 x i32> poison, <4 x i32> +; VSCALE-1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i64_hi = shufflevector <4 x i64> poison, <4 x i64> poison, <1 x i32> +; VSCALE-1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i64_hi = shufflevector <8 x i64> poison, <8 x i64> poison, <2 x i32> +; VSCALE-1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; VSCALE-2-LABEL: 'extract_qtr_hi' +; VSCALE-2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16i8_hi = shufflevector <16 x i8> poison, <16 x i8> poison, <4 x i32> +; VSCALE-2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v32i8_hi = shufflevector <32 x i8> poison, <32 x i8> poison, <8 x i32> +; VSCALE-2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v64i8_hi = shufflevector <64 x i8> poison, <64 x i8> poison, <16 x i32> +; VSCALE-2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8i16_hi = shufflevector <8 x i16> poison, <8 x i16> poison, <2 x i32> +; VSCALE-2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v16i16_hi = shufflevector <16 x i16> poison, <16 x i16> poison, <4 x i32> +; VSCALE-2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v32i16_hi = shufflevector <32 x i16> poison, <32 x i16> poison, <8 x i32> +; VSCALE-2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i32_hi = shufflevector <4 x i32> poison, <4 x i32> poison, <1 x i32> +; VSCALE-2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8i32_hi = shufflevector <8 x i32> poison, <8 x i32> poison, <2 x i32> +; VSCALE-2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v16i32_hi = shufflevector <16 x i32> poison, <16 x i32> poison, <4 x i32> +; VSCALE-2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i64_hi = shufflevector <4 x i64> poison, <4 x i64> poison, <1 x i32> +; VSCALE-2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8i64_hi = shufflevector <8 x i64> poison, <8 x i64> poison, <2 x i32> +; VSCALE-2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; VSCALE-4-LABEL: 'extract_qtr_hi' +; VSCALE-4-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16i8_hi = shufflevector <16 x i8> poison, <16 x i8> poison, <4 x i32> +; VSCALE-4-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v32i8_hi = shufflevector <32 x i8> poison, <32 x i8> poison, <8 x i32> +; VSCALE-4-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v64i8_hi = shufflevector <64 x i8> poison, <64 x i8> poison, <16 x i32> +; VSCALE-4-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8i16_hi = shufflevector <8 x i16> poison, <8 x i16> poison, <2 x i32> +; VSCALE-4-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v16i16_hi = shufflevector <16 x i16> poison, <16 x i16> poison, <4 x i32> +; VSCALE-4-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v32i16_hi = shufflevector <32 x i16> poison, <32 x i16> poison, <8 x i32> +; VSCALE-4-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i32_hi = shufflevector <4 x i32> poison, <4 x i32> poison, <1 x i32> +; VSCALE-4-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8i32_hi = shufflevector <8 x i32> poison, <8 x i32> poison, <2 x i32> +; VSCALE-4-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v16i32_hi = shufflevector <16 x i32> poison, <16 x i32> poison, <4 x i32> +; VSCALE-4-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i64_hi = shufflevector <4 x i64> poison, <4 x i64> poison, <1 x i32> +; VSCALE-4-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8i64_hi = shufflevector <8 x i64> poison, <8 x i64> poison, <2 x i32> +; VSCALE-4-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %v16i8_hi = shufflevector <16 x i8> poison, <16 x i8> poison, <4 x i32> + %v32i8_hi = shufflevector <32 x i8> poison, <32 x i8> poison, <8 x i32> + %v64i8_hi = shufflevector <64 x i8> poison, <64 x i8> poison, <16 x i32> + + %v8i16_hi = shufflevector <8 x i16> poison, <8 x i16> poison, <2 x i32> + %v16i16_hi = shufflevector <16 x i16> poison, <16 x i16> poison, <4 x i32> + %v32i16_hi = shufflevector <32 x i16> poison, <32 x i16> poison, <8 x i32> + + %v4i32_hi = shufflevector <4 x i32> poison, <4 x i32> poison, <1 x i32> + %v8i32_hi = shufflevector <8 x i32> poison, <8 x i32> poison, <2 x i32> + %v16i32_hi = shufflevector <16 x i32> poison, <16 x i32> poison, <4 x i32> + + %v4i64_hi = shufflevector <4 x i64> poison, <4 x i64> poison, <1 x i32> + %v8i64_hi = shufflevector <8 x i64> poison, <8 x i64> poison, <2 x i32> + + ret void +} + +define void @extract_qtr_unaligned() { +; VSCALE-1-LABEL: 'extract_qtr_unaligned' +; VSCALE-1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16i8_mi = shufflevector <16 x i8> poison, <16 x i8> poison, <4 x i32> +; VSCALE-1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v32i8_mi = shufflevector <32 x i8> poison, <32 x i8> poison, <8 x i32> +; VSCALE-1-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v64i8_mi = shufflevector <64 x i8> poison, <64 x i8> poison, <16 x i32> +; VSCALE-1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8i16_mi = shufflevector <8 x i16> poison, <8 x i16> poison, <2 x i32> +; VSCALE-1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_mi = shufflevector <16 x i16> poison, <16 x i16> poison, <4 x i32> +; VSCALE-1-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v32i16_mi = shufflevector <32 x i16> poison, <32 x i16> poison, <8 x i32> +; VSCALE-1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i32_mi = shufflevector <4 x i32> poison, <4 x i32> poison, <1 x i32> +; VSCALE-1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_mi = shufflevector <8 x i32> poison, <8 x i32> poison, <2 x i32> +; VSCALE-1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v16i32_mi = shufflevector <16 x i32> poison, <16 x i32> poison, <4 x i32> +; VSCALE-1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_mi = shufflevector <4 x i64> poison, <4 x i64> poison, <1 x i32> +; VSCALE-1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i64_mi = shufflevector <8 x i64> poison, <8 x i64> poison, <2 x i32> +; VSCALE-1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; VSCALE-2-LABEL: 'extract_qtr_unaligned' +; VSCALE-2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16i8_mi = shufflevector <16 x i8> poison, <16 x i8> poison, <4 x i32> +; VSCALE-2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v32i8_mi = shufflevector <32 x i8> poison, <32 x i8> poison, <8 x i32> +; VSCALE-2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v64i8_mi = shufflevector <64 x i8> poison, <64 x i8> poison, <16 x i32> +; VSCALE-2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8i16_mi = shufflevector <8 x i16> poison, <8 x i16> poison, <2 x i32> +; VSCALE-2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v16i16_mi = shufflevector <16 x i16> poison, <16 x i16> poison, <4 x i32> +; VSCALE-2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v32i16_mi = shufflevector <32 x i16> poison, <32 x i16> poison, <8 x i32> +; VSCALE-2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i32_mi = shufflevector <4 x i32> poison, <4 x i32> poison, <1 x i32> +; VSCALE-2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8i32_mi = shufflevector <8 x i32> poison, <8 x i32> poison, <2 x i32> +; VSCALE-2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_mi = shufflevector <16 x i32> poison, <16 x i32> poison, <4 x i32> +; VSCALE-2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i64_mi = shufflevector <4 x i64> poison, <4 x i64> poison, <1 x i32> +; VSCALE-2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_mi = shufflevector <8 x i64> poison, <8 x i64> poison, <2 x i32> +; VSCALE-2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; VSCALE-4-LABEL: 'extract_qtr_unaligned' +; VSCALE-4-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16i8_mi = shufflevector <16 x i8> poison, <16 x i8> poison, <4 x i32> +; VSCALE-4-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v32i8_mi = shufflevector <32 x i8> poison, <32 x i8> poison, <8 x i32> +; VSCALE-4-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v64i8_mi = shufflevector <64 x i8> poison, <64 x i8> poison, <16 x i32> +; VSCALE-4-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8i16_mi = shufflevector <8 x i16> poison, <8 x i16> poison, <2 x i32> +; VSCALE-4-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v16i16_mi = shufflevector <16 x i16> poison, <16 x i16> poison, <4 x i32> +; VSCALE-4-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v32i16_mi = shufflevector <32 x i16> poison, <32 x i16> poison, <8 x i32> +; VSCALE-4-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i32_mi = shufflevector <4 x i32> poison, <4 x i32> poison, <1 x i32> +; VSCALE-4-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8i32_mi = shufflevector <8 x i32> poison, <8 x i32> poison, <2 x i32> +; VSCALE-4-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v16i32_mi = shufflevector <16 x i32> poison, <16 x i32> poison, <4 x i32> +; VSCALE-4-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i64_mi = shufflevector <4 x i64> poison, <4 x i64> poison, <1 x i32> +; VSCALE-4-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8i64_mi = shufflevector <8 x i64> poison, <8 x i64> poison, <2 x i32> +; VSCALE-4-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %v16i8_mi = shufflevector <16 x i8> poison, <16 x i8> poison, <4 x i32> + %v32i8_mi = shufflevector <32 x i8> poison, <32 x i8> poison, <8 x i32> + %v64i8_mi = shufflevector <64 x i8> poison, <64 x i8> poison, <16 x i32> + + %v8i16_mi = shufflevector <8 x i16> poison, <8 x i16> poison, <2 x i32> + %v16i16_mi = shufflevector <16 x i16> poison, <16 x i16> poison, <4 x i32> + %v32i16_mi = shufflevector <32 x i16> poison, <32 x i16> poison, <8 x i32> + + %v4i32_mi = shufflevector <4 x i32> poison, <4 x i32> poison, <1 x i32> + %v8i32_mi = shufflevector <8 x i32> poison, <8 x i32> poison, <2 x i32> + %v16i32_mi = shufflevector <16 x i32> poison, <16 x i32> poison, <4 x i32> + + %v4i64_mi = shufflevector <4 x i64> poison, <4 x i64> poison, <1 x i32> + %v8i64_mi = shufflevector <8 x i64> poison, <8 x i64> poison, <2 x i32> + + ret void +} From e0d3f2d9a5053663b2bd9fbc2e59c7a696f7cd76 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ga=C3=ABtan=20Bossu?= Date: Mon, 1 Sep 2025 16:31:23 +0000 Subject: [PATCH 6/6] Fix typo --- llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index 0df496735490c..29bfd9eee785e 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -5754,7 +5754,7 @@ AArch64TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, // extract, if from lane 0) for 128-bit NEON vectors or legal SVE vectors. // This currently only handles low or high extracts to prevent SLP vectorizer // regressions. - // Note that SVE's ext instruciton is destructive, but it can be fused with + // Note that SVE's ext instruction is destructive, but it can be fused with // a movprfx to act like a constructive instruction. if (IsExtractSubvector && LT.second.isFixedLengthVector()) { if (LT.second.getFixedSizeInBits() >= 128 &&