50 changes: 50 additions & 0 deletions clang/test/CodeGen/X86/movrs-avx10.2-512-builtins-error-32.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
// RUN: %clang_cc1 -ffreestanding %s -Wno-implicit-function-declaration -triple=i386-- -target-feature +movrs -target-feature +avx10.2-512 -emit-llvm -verify

#include <immintrin.h>
__m512i test_mm512_loadrs_epi8(const __m512i * __A) {
return _mm512_loadrs_epi8(__A); // expected-error {{returning 'int' from a function with incompatible result type '__m512i' (vector of 8 'long long' values)}}
}

__m512i test_mm512_mask_loadrs_epi8(__m512i __A, __mmask64 __B, const __m512i * __C) {
return _mm512_mask_loadrs_epi8(__A, __B, __C); // expected-error {{returning 'int' from a function with incompatible result type '__m512i' (vector of 8 'long long' values)}}
}

__m512i test_mm512_maskz_loadrs_epi8(__mmask64 __A, const __m512i * __B) {
return _mm512_maskz_loadrs_epi8(__A, __B); // expected-error {{returning 'int' from a function with incompatible result type '__m512i' (vector of 8 'long long' values)}}
}

__m512i test_mm512_loadrs_epi32(const __m512i * __A) {
return _mm512_loadrs_epi32(__A); // expected-error {{returning 'int' from a function with incompatible result type '__m512i' (vector of 8 'long long' values)}}
}

__m512i test_mm512_mask_loadrs_epi32(__m512i __A, __mmask16 __B, const __m512i * __C) {
return _mm512_mask_loadrs_epi32(__A, __B, __C); // expected-error {{returning 'int' from a function with incompatible result type '__m512i' (vector of 8 'long long' values)}}
}

__m512i test_mm512_maskz_loadrs_epi32(__mmask16 __A, const __m512i * __B) {
return _mm512_maskz_loadrs_epi32(__A, __B); // expected-error {{returning 'int' from a function with incompatible result type '__m512i' (vector of 8 'long long' values)}}
}

__m512i test_mm512_loadrs_epi64(const __m512i * __A) {
return _mm512_loadrs_epi64(__A); // expected-error {{returning 'int' from a function with incompatible result type '__m512i' (vector of 8 'long long' values)}}
}

__m512i test_mm512_mask_loadrs_epi64(__m512i __A, __mmask8 __B, const __m512i * __C) {
return _mm512_mask_loadrs_epi64(__A, __B, __C); // expected-error {{returning 'int' from a function with incompatible result type '__m512i' (vector of 8 'long long' values)}}
}

__m512i test_mm512_maskz_loadrs_epi64(__mmask8 __A, const __m512i * __B) {
return _mm512_maskz_loadrs_epi64(__A, __B); // expected-error {{returning 'int' from a function with incompatible result type '__m512i' (vector of 8 'long long' values)}}
}

__m512i test_mm512_loadrs_epi16(const __m512i * __A) {
return _mm512_loadrs_epi16(__A); // expected-error {{returning 'int' from a function with incompatible result type '__m512i' (vector of 8 'long long' values)}}
}

__m512i test_mm512_mask_loadrs_epi16(__m512i __A, __mmask32 __B, const __m512i * __C) {
return _mm512_mask_loadrs_epi16(__A, __B, __C); // expected-error {{returning 'int' from a function with incompatible result type '__m512i' (vector of 8 'long long' values)}}
}

__m512i test_mm512_maskz_loadrs_epi16(__mmask32 __A, const __m512i * __B) {
return _mm512_maskz_loadrs_epi16(__A, __B); // expected-error {{returning 'int' from a function with incompatible result type '__m512i' (vector of 8 'long long' values)}}
}
87 changes: 87 additions & 0 deletions clang/test/CodeGen/X86/movrs-avx10.2-512-builtins.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-- -target-feature +movrs -target-feature +avx10.2-512 -emit-llvm -o - -Wall -Werror | FileCheck %s

#include <immintrin.h>

__m512i test_mm512_loadrs_epi8(const __m512i * __A) {
// CHECK-LABEL: @test_mm512_loadrs_epi8(
// CHECK: call <64 x i8> @llvm.x86.avx10.vmovrsb512(
return _mm512_loadrs_epi8(__A);
}

__m512i test_mm512_mask_loadrs_epi8(__m512i __A, __mmask64 __B, const __m512i * __C) {
// CHECK-LABEL: @test_mm512_mask_loadrs_epi8(
// CHECK: call <64 x i8> @llvm.x86.avx10.vmovrsb512(
// CHECK: select <64 x i1> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}}
return _mm512_mask_loadrs_epi8(__A, __B, __C);
}

__m512i test_mm512_maskz_loadrs_epi8(__mmask64 __A, const __m512i * __B) {
// CHECK-LABEL: @test_mm512_maskz_loadrs_epi8(
// CHECK: call <64 x i8> @llvm.x86.avx10.vmovrsb512(
// CHECK: store <8 x i64> zeroinitializer
// CHECK: select <64 x i1> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}}
return _mm512_maskz_loadrs_epi8(__A, __B);
}

__m512i test_mm512_loadrs_epi32(const __m512i * __A) {
// CHECK-LABEL: @test_mm512_loadrs_epi32(
// CHECK: call <16 x i32> @llvm.x86.avx10.vmovrsd512(
return _mm512_loadrs_epi32(__A);
}

__m512i test_mm512_mask_loadrs_epi32(__m512i __A, __mmask16 __B, const __m512i * __C) {
// CHECK-LABEL: @test_mm512_mask_loadrs_epi32(
// CHECK: call <16 x i32> @llvm.x86.avx10.vmovrsd512(
// CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
return _mm512_mask_loadrs_epi32(__A, __B, __C);
}

__m512i test_mm512_maskz_loadrs_epi32(__mmask16 __A, const __m512i * __B) {
// CHECK-LABEL: @test_mm512_maskz_loadrs_epi32(
// CHECK: call <16 x i32> @llvm.x86.avx10.vmovrsd512(
// CHECK: store <8 x i64> zeroinitializer
// CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
return _mm512_maskz_loadrs_epi32(__A, __B);
}

__m512i test_mm512_loadrs_epi64(const __m512i * __A) {
// CHECK-LABEL: @test_mm512_loadrs_epi64(
// CHECK: call <8 x i64> @llvm.x86.avx10.vmovrsq512(
return _mm512_loadrs_epi64(__A);
}

__m512i test_mm512_mask_loadrs_epi64(__m512i __A, __mmask8 __B, const __m512i * __C) {
// CHECK-LABEL: @test_mm512_mask_loadrs_epi64(
// CHECK: call <8 x i64> @llvm.x86.avx10.vmovrsq512(
// CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
return _mm512_mask_loadrs_epi64(__A, __B, __C);
}

__m512i test_mm512_maskz_loadrs_epi64(__mmask8 __A, const __m512i * __B) {
// CHECK-LABEL: @test_mm512_maskz_loadrs_epi64(
// CHECK: call <8 x i64> @llvm.x86.avx10.vmovrsq512(
// CHECK: store <8 x i64> zeroinitializer
// CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
return _mm512_maskz_loadrs_epi64(__A, __B);
}

__m512i test_mm512_loadrs_epi16(const __m512i * __A) {
// CHECK-LABEL: @test_mm512_loadrs_epi16(
// CHECK: call <32 x i16> @llvm.x86.avx10.vmovrsw512(
return _mm512_loadrs_epi16(__A);
}

__m512i test_mm512_mask_loadrs_epi16(__m512i __A, __mmask32 __B, const __m512i * __C) {
// CHECK-LABEL: @test_mm512_mask_loadrs_epi16(
// CHECK: call <32 x i16> @llvm.x86.avx10.vmovrsw512(
// CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
return _mm512_mask_loadrs_epi16(__A, __B, __C);
}

__m512i test_mm512_maskz_loadrs_epi16(__mmask32 __A, const __m512i * __B) {
// CHECK-LABEL: @test_mm512_maskz_loadrs_epi16(
// CHECK: call <32 x i16> @llvm.x86.avx10.vmovrsw512(
// CHECK: store <8 x i64> zeroinitializer
// CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
return _mm512_maskz_loadrs_epi16(__A, __B);
}
98 changes: 98 additions & 0 deletions clang/test/CodeGen/X86/movrs-avx10.2-builtins-error-32.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
// RUN: %clang_cc1 -ffreestanding %s -Wno-implicit-function-declaration -triple=i386-unknown-unknown -target-feature +movrs -target-feature +avx10.2-256 -emit-llvm -verify

#include <immintrin.h>
__m128i test_mm_loadrs_epi8(const __m128i * __A) {
return _mm_loadrs_epi8(__A); // expected-error {{returning 'int' from a function with incompatible result type '__m128i' (vector of 2 'long long' values)}}
}

__m128i test_mm_mask_loadrs_epi8(__m128i __A, __mmask16 __B, const __m128i * __C) {
return _mm_mask_loadrs_epi8(__A, __B, __C); // expected-error {{returning 'int' from a function with incompatible result type '__m128i' (vector of 2 'long long' values)}}
}

__m128i test_mm_maskz_loadrs_epi8(__mmask16 __A, const __m128i * __B) {
return _mm_maskz_loadrs_epi8(__A, __B); // expected-error {{returning 'int' from a function with incompatible result type '__m128i' (vector of 2 'long long' values)}}
}

__m256i test_mm256_loadrs_epi8(const __m256i * __A) {
return _mm256_loadrs_epi8(__A); // expected-error {{returning 'int' from a function with incompatible result type '__m256i' (vector of 4 'long long' values)}}
}

__m256i test_mm256_mask_loadrs_epi8(__m256i __A, __mmask32 __B, const __m256i * __C) {
return _mm256_mask_loadrs_epi8(__A, __B, __C); // expected-error {{returning 'int' from a function with incompatible result type '__m256i' (vector of 4 'long long' values)}}
}

__m256i test_mm256_maskz_loadrs_epi8(__mmask32 __A, const __m256i * __B) {
return _mm256_maskz_loadrs_epi8(__A, __B); // expected-error {{returning 'int' from a function with incompatible result type '__m256i' (vector of 4 'long long' values)}}
}

__m128i test_mm_loadrs_epi32(const __m128i * __A) {
return _mm_loadrs_epi32(__A); // expected-error {{returning 'int' from a function with incompatible result type '__m128i' (vector of 2 'long long' values)}}
}

__m128i test_mm_mask_loadrs_epi32(__m128i __A, __mmask8 __B, const __m128i * __C) {
return _mm_mask_loadrs_epi32(__A, __B, __C); // expected-error {{returning 'int' from a function with incompatible result type '__m128i' (vector of 2 'long long' values)}}
}

__m128i test_mm_maskz_loadrs_epi32(__mmask8 __A, const __m128i * __B) {
return _mm_maskz_loadrs_epi32(__A, __B); // expected-error {{returning 'int' from a function with incompatible result type '__m128i' (vector of 2 'long long' values)}}
}

__m256i test_mm256_loadrs_epi32(const __m256i * __A) {
return _mm256_loadrs_epi32(__A); // expected-error {{returning 'int' from a function with incompatible result type '__m256i' (vector of 4 'long long' values)}}
}

__m256i test_mm256_mask_loadrs_epi32(__m256i __A, __mmask8 __B, const __m256i * __C) {
return _mm256_mask_loadrs_epi32(__A, __B, __C); // expected-error {{returning 'int' from a function with incompatible result type '__m256i' (vector of 4 'long long' values)}}
}

__m256i test_mm256_maskz_loadrs_epi32(__mmask8 __A, const __m256i * __B) {
return _mm256_maskz_loadrs_epi32(__A, __B); // expected-error {{returning 'int' from a function with incompatible result type '__m256i' (vector of 4 'long long' values)}}
}

__m128i test_mm_loadrs_epi64(const __m128i * __A) {
return _mm_loadrs_epi64(__A); // expected-error {{returning 'int' from a function with incompatible result type '__m128i' (vector of 2 'long long' values)}}
}

__m128i test_mm_mask_loadrs_epi64(__m128i __A, __mmask8 __B, const __m128i * __C) {
return _mm_mask_loadrs_epi64(__A, __B, __C); // expected-error {{returning 'int' from a function with incompatible result type '__m128i' (vector of 2 'long long' values)}}
}

__m128i test_mm_maskz_loadrs_epi64(__mmask8 __A, const __m128i * __B) {
return _mm_maskz_loadrs_epi64(__A, __B); // expected-error {{returning 'int' from a function with incompatible result type '__m128i' (vector of 2 'long long' values)}}
}

__m256i test_mm256_loadrs_epi64(const __m256i * __A) {
return _mm256_loadrs_epi64(__A); // expected-error {{returning 'int' from a function with incompatible result type '__m256i' (vector of 4 'long long' values)}}
}

__m256i test_mm256_mask_loadrs_epi64(__m256i __A, __mmask8 __B, const __m256i * __C) {
return _mm256_mask_loadrs_epi64(__A, __B, __C); // expected-error {{returning 'int' from a function with incompatible result type '__m256i' (vector of 4 'long long' values)}}
}

__m256i test_mm256_maskz_loadrs_epi64(__mmask8 __A, const __m256i * __B) {
return _mm256_maskz_loadrs_epi64(__A, __B); // expected-error {{returning 'int' from a function with incompatible result type '__m256i' (vector of 4 'long long' values)}}
}

__m128i test_mm_loadrs_epi16(const __m128i * __A) {
return _mm_loadrs_epi16(__A); // expected-error {{returning 'int' from a function with incompatible result type '__m128i' (vector of 2 'long long' values)}}
}

__m128i test_mm_mask_loadrs_epi16(__m128i __A, __mmask8 __B, const __m128i * __C) {
return _mm_mask_loadrs_epi16(__A, __B, __C); // expected-error {{returning 'int' from a function with incompatible result type '__m128i' (vector of 2 'long long' values)}}
}

__m128i test_mm_maskz_loadrs_epi16(__mmask8 __A, const __m128i * __B) {
return _mm_maskz_loadrs_epi16(__A, __B); // expected-error {{returning 'int' from a function with incompatible result type '__m128i' (vector of 2 'long long' values)}}
}

__m256i test_mm256_loadrs_epi16(const __m256i * __A) {
return _mm256_loadrs_epi16(__A); // expected-error {{returning 'int' from a function with incompatible result type '__m256i' (vector of 4 'long long' values)}}
}

__m256i test_mm256_mask_loadrs_epi16(__m256i __A, __mmask16 __B, const __m256i * __C) {
return _mm256_mask_loadrs_epi16(__A, __B, __C); // expected-error {{returning 'int' from a function with incompatible result type '__m256i' (vector of 4 'long long' values)}}
}

__m256i test_mm256_maskz_loadrs_epi16(__mmask16 __A, const __m256i * __B) {
return _mm256_maskz_loadrs_epi16(__A, __B); // expected-error {{returning 'int' from a function with incompatible result type '__m256i' (vector of 4 'long long' values)}}
}
171 changes: 171 additions & 0 deletions clang/test/CodeGen/X86/movrs-avx10.2-builtins.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,171 @@
// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-- -target-feature +movrs -target-feature +avx10.2-256 -emit-llvm -o - -Wall -Werror | FileCheck %s

#include <immintrin.h>

__m128i test_mm_loadrs_epi8(const __m128i * __A) {
// CHECK-LABEL: @test_mm_loadrs_epi8(
// CHECK: call <16 x i8> @llvm.x86.avx10.vmovrsb128(
return _mm_loadrs_epi8(__A);
}

__m128i test_mm_mask_loadrs_epi8(__m128i __A, __mmask16 __B, const __m128i * __C) {
// CHECK-LABEL: @test_mm_mask_loadrs_epi8(
// CHECK: call <16 x i8> @llvm.x86.avx10.vmovrsb128(
// CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}
return _mm_mask_loadrs_epi8(__A, __B, __C);
}

__m128i test_mm_maskz_loadrs_epi8(__mmask16 __A, const __m128i * __B) {
// CHECK-LABEL: @test_mm_maskz_loadrs_epi8(
// CHECK: call <16 x i8> @llvm.x86.avx10.vmovrsb128(
// CHECK: store <2 x i64> zeroinitializer
// CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}
return _mm_maskz_loadrs_epi8(__A, __B);
}

__m256i test_mm256_loadrs_epi8(const __m256i * __A) {
// CHECK-LABEL: @test_mm256_loadrs_epi8(
// CHECK: call <32 x i8> @llvm.x86.avx10.vmovrsb256(
return _mm256_loadrs_epi8(__A);
}

__m256i test_mm256_mask_loadrs_epi8(__m256i __A, __mmask32 __B, const __m256i * __C) {
// CHECK-LABEL: @test_mm256_mask_loadrs_epi8(
// CHECK: call <32 x i8> @llvm.x86.avx10.vmovrsb256(
// CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}}
return _mm256_mask_loadrs_epi8(__A, __B, __C);
}

__m256i test_mm256_maskz_loadrs_epi8(__mmask32 __A, const __m256i * __B) {
// CHECK-LABEL: @test_mm256_maskz_loadrs_epi8(
// CHECK: call <32 x i8> @llvm.x86.avx10.vmovrsb256(
// CHECK: store <4 x i64> zeroinitializer
// CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}}
return _mm256_maskz_loadrs_epi8(__A, __B);
}

__m128i test_mm_loadrs_epi32(const __m128i * __A) {
// CHECK-LABEL: @test_mm_loadrs_epi32(
// CHECK: call <4 x i32> @llvm.x86.avx10.vmovrsd128(
return _mm_loadrs_epi32(__A);
}

__m128i test_mm_mask_loadrs_epi32(__m128i __A, __mmask8 __B, const __m128i * __C) {
// CHECK-LABEL: @test_mm_mask_loadrs_epi32(
// CHECK: call <4 x i32> @llvm.x86.avx10.vmovrsd128(
// CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
return _mm_mask_loadrs_epi32(__A, __B, __C);
}

__m128i test_mm_maskz_loadrs_epi32(__mmask8 __A, const __m128i * __B) {
// CHECK-LABEL: @test_mm_maskz_loadrs_epi32(
// CHECK: call <4 x i32> @llvm.x86.avx10.vmovrsd128(
// CHECK: store <2 x i64> zeroinitializer
// CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
return _mm_maskz_loadrs_epi32(__A, __B);
}

__m256i test_mm256_loadrs_epi32(const __m256i * __A) {
// CHECK-LABEL: @test_mm256_loadrs_epi32(
// CHECK: call <8 x i32> @llvm.x86.avx10.vmovrsd256(
return _mm256_loadrs_epi32(__A);
}

__m256i test_mm256_mask_loadrs_epi32(__m256i __A, __mmask8 __B, const __m256i * __C) {
// CHECK-LABEL: @test_mm256_mask_loadrs_epi32(
// CHECK: call <8 x i32> @llvm.x86.avx10.vmovrsd256(
// CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
return _mm256_mask_loadrs_epi32(__A, __B, __C);
}

__m256i test_mm256_maskz_loadrs_epi32(__mmask8 __A, const __m256i * __B) {
// CHECK-LABEL: @test_mm256_maskz_loadrs_epi32(
// CHECK: call <8 x i32> @llvm.x86.avx10.vmovrsd256(
// CHECK: store <4 x i64> zeroinitializer
// CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
return _mm256_maskz_loadrs_epi32(__A, __B);
}

__m128i test_mm_loadrs_epi64(const __m128i * __A) {
// CHECK-LABEL: @test_mm_loadrs_epi64(
// CHECK: call <2 x i64> @llvm.x86.avx10.vmovrsq128(
return _mm_loadrs_epi64(__A);
}

__m128i test_mm_mask_loadrs_epi64(__m128i __A, __mmask8 __B, const __m128i * __C) {
// CHECK-LABEL: @test_mm_mask_loadrs_epi64(
// CHECK: call <2 x i64> @llvm.x86.avx10.vmovrsq128(
// CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}
return _mm_mask_loadrs_epi64(__A, __B, __C);
}

__m128i test_mm_maskz_loadrs_epi64(__mmask8 __A, const __m128i * __B) {
// CHECK-LABEL: @test_mm_maskz_loadrs_epi64(
// CHECK: call <2 x i64> @llvm.x86.avx10.vmovrsq128(
// CHECK: store <2 x i64> zeroinitializer
// CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}
return _mm_maskz_loadrs_epi64(__A, __B);
}

__m256i test_mm256_loadrs_epi64(const __m256i * __A) {
// CHECK-LABEL: @test_mm256_loadrs_epi64(
// CHECK: call <4 x i64> @llvm.x86.avx10.vmovrsq256(
return _mm256_loadrs_epi64(__A);
}

__m256i test_mm256_mask_loadrs_epi64(__m256i __A, __mmask8 __B, const __m256i * __C) {
// CHECK-LABEL: @test_mm256_mask_loadrs_epi64(
// CHECK: call <4 x i64> @llvm.x86.avx10.vmovrsq256(
// CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}}
return _mm256_mask_loadrs_epi64(__A, __B, __C);
}

__m256i test_mm256_maskz_loadrs_epi64(__mmask8 __A, const __m256i * __B) {
// CHECK-LABEL: @test_mm256_maskz_loadrs_epi64(
// CHECK: call <4 x i64> @llvm.x86.avx10.vmovrsq256(
// CHECK: store <4 x i64> zeroinitializer
// CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}}
return _mm256_maskz_loadrs_epi64(__A, __B);
}

__m128i test_mm_loadrs_epi16(const __m128i * __A) {
// CHECK-LABEL: @test_mm_loadrs_epi16(
// CHECK: call <8 x i16> @llvm.x86.avx10.vmovrsw128(
return _mm_loadrs_epi16(__A);
}

__m128i test_mm_mask_loadrs_epi16(__m128i __A, __mmask8 __B, const __m128i * __C) {
// CHECK-LABEL: @test_mm_mask_loadrs_epi16(
// CHECK: call <8 x i16> @llvm.x86.avx10.vmovrsw128(
// CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}
return _mm_mask_loadrs_epi16(__A, __B, __C);
}

__m128i test_mm_maskz_loadrs_epi16(__mmask8 __A, const __m128i * __B) {
// CHECK-LABEL: @test_mm_maskz_loadrs_epi16(
// CHECK: call <8 x i16> @llvm.x86.avx10.vmovrsw128(
// CHECK: store <2 x i64> zeroinitializer
// CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}
return _mm_maskz_loadrs_epi16(__A, __B);
}

__m256i test_mm256_loadrs_epi16(const __m256i * __A) {
// CHECK-LABEL: @test_mm256_loadrs_epi16(
// CHECK: call <16 x i16> @llvm.x86.avx10.vmovrsw256(
return _mm256_loadrs_epi16(__A);
}

__m256i test_mm256_mask_loadrs_epi16(__m256i __A, __mmask16 __B, const __m256i * __C) {
// CHECK-LABEL: @test_mm256_mask_loadrs_epi16(
// CHECK: call <16 x i16> @llvm.x86.avx10.vmovrsw256(
// CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}}
return _mm256_mask_loadrs_epi16(__A, __B, __C);
}

__m256i test_mm256_maskz_loadrs_epi16(__mmask16 __A, const __m256i * __B) {
// CHECK-LABEL: @test_mm256_maskz_loadrs_epi16(
// CHECK: call <16 x i16> @llvm.x86.avx10.vmovrsw256(
// CHECK: store <4 x i64> zeroinitializer
// CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}}
return _mm256_maskz_loadrs_epi16(__A, __B);
}
1 change: 1 addition & 0 deletions clang/test/CodeGen/target-builtin-noerror.c
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,7 @@ void verifyfeaturestrings(void) {
(void)__builtin_cpu_supports("avx10.1-512");
(void)__builtin_cpu_supports("avx10.2-256");
(void)__builtin_cpu_supports("avx10.2-512");
(void)__builtin_cpu_supports("movrs");
}

void verifycpustrings(void) {
Expand Down
5 changes: 5 additions & 0 deletions clang/test/Driver/x86-target-features.c
Original file line number Diff line number Diff line change
Expand Up @@ -404,6 +404,11 @@
// USERMSR: "-target-feature" "+usermsr"
// NO-USERMSR: "-target-feature" "-usermsr"

// RUN: %clang --target=i386 -mmovrs %s -### -o %t.o 2>&1 | FileCheck -check-prefix=MOVRS %s
// RUN: %clang --target=i386 -mno-movrs %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-MOVRS %s
// MOVRS: "-target-feature" "+movrs"
// NO-MOVRS: "-target-feature" "-movrs"

// RUN: %clang --target=i386 -march=i386 -mcrc32 %s -### 2>&1 | FileCheck -check-prefix=CRC32 %s
// RUN: %clang --target=i386 -march=i386 -mno-crc32 %s -### 2>&1 | FileCheck -check-prefix=NO-CRC32 %s
// CRC32: "-target-feature" "+crc32"
Expand Down
6 changes: 6 additions & 0 deletions clang/test/Preprocessor/x86_target_features.c
Original file line number Diff line number Diff line change
Expand Up @@ -740,6 +740,12 @@
// RUN: %clang -target i686-unknown-linux-gnu -march=atom -mno-usermsr -x c -E -dM -o - %s | FileCheck -check-prefix=NO-USERMSR %s
// NO-USERMSR-NOT: #define __USERMSR__ 1

// RUN: %clang -target i686-unknown-linux-gnu -march=atom -mmovrs -x c -E -dM -o - %s | FileCheck -check-prefix=MOVRS %s
// MOVRS: #define __MOVRS__ 1

// RUN: %clang -target i686-unknown-linux-gnu -march=atom -mno-movrs -x c -E -dM -o - %s | FileCheck -check-prefix=NO-MOVRS %s
// NO-MOVRS-NOT: #define __MOVRS__ 1

// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mcrc32 -x c -E -dM -o - %s | FileCheck -check-prefix=CRC32 %s

// CRC32: #define __CRC32__ 1
Expand Down
3 changes: 3 additions & 0 deletions compiler-rt/lib/builtins/cpu_model/x86.c
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,7 @@ enum ProcessorFeatures {
FEATURE_AVX10_1_512,
FEATURE_AVX10_2_256,
FEATURE_AVX10_2_512,
FEATURE_MOVRS,
CPU_FEATURE_MAX
};

Expand Down Expand Up @@ -972,6 +973,8 @@ static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf,
setFeature(FEATURE_HRESET);
if (HasLeaf7Subleaf1 && ((EAX >> 23) & 1) && HasAVXSave)
setFeature(FEATURE_AVXIFMA);
if (HasLeaf7Subleaf1 && ((EAX >> 31) & 1))
setFeature(FEATURE_MOVRS);

if (HasLeaf7Subleaf1 && ((EDX >> 4) & 1) && HasAVXSave)
setFeature(FEATURE_AVXVNNIINT8);
Expand Down
6 changes: 3 additions & 3 deletions libcxx/include/__charconv/from_chars_floating_point.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,13 +37,13 @@ struct __from_chars_result {

template <class _Fp>
_LIBCPP_EXPORTED_FROM_ABI __from_chars_result<_Fp> __from_chars_floating_point(
[[clang::noescape]] const char* __first, [[clang::noescape]] const char* __last, chars_format __fmt);
_LIBCPP_NOESCAPE const char* __first, _LIBCPP_NOESCAPE const char* __last, chars_format __fmt);

extern template __from_chars_result<float> __from_chars_floating_point(
[[clang::noescape]] const char* __first, [[clang::noescape]] const char* __last, chars_format __fmt);
_LIBCPP_NOESCAPE const char* __first, _LIBCPP_NOESCAPE const char* __last, chars_format __fmt);

extern template __from_chars_result<double> __from_chars_floating_point(
[[clang::noescape]] const char* __first, [[clang::noescape]] const char* __last, chars_format __fmt);
_LIBCPP_NOESCAPE const char* __first, _LIBCPP_NOESCAPE const char* __last, chars_format __fmt);

template <class _Fp>
_LIBCPP_HIDE_FROM_ABI from_chars_result
Expand Down
6 changes: 6 additions & 0 deletions libcxx/include/__config
Original file line number Diff line number Diff line change
Expand Up @@ -1163,6 +1163,12 @@ typedef __char32_t char32_t;
# define _LIBCPP_LIFETIMEBOUND
# endif

# if __has_cpp_attribute(_Clang::__noescape__)
# define _LIBCPP_NOESCAPE [[_Clang::__noescape__]]
# else
# define _LIBCPP_NOESCAPE
# endif

# if __has_attribute(__nodebug__)
# define _LIBCPP_NODEBUG __attribute__((__nodebug__))
# else
Expand Down
6 changes: 3 additions & 3 deletions libcxx/src/charconv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -77,13 +77,13 @@ to_chars_result to_chars(char* __first, char* __last, long double __value, chars

template <class _Fp>
__from_chars_result<_Fp> __from_chars_floating_point(
[[clang::noescape]] const char* __first, [[clang::noescape]] const char* __last, chars_format __fmt) {
_LIBCPP_NOESCAPE const char* __first, _LIBCPP_NOESCAPE const char* __last, chars_format __fmt) {
return std::__from_chars_floating_point_impl<_Fp>(__first, __last, __fmt);
}

template __from_chars_result<float> __from_chars_floating_point(
[[clang::noescape]] const char* __first, [[clang::noescape]] const char* __last, chars_format __fmt);
_LIBCPP_NOESCAPE const char* __first, _LIBCPP_NOESCAPE const char* __last, chars_format __fmt);

template __from_chars_result<double> __from_chars_floating_point(
[[clang::noescape]] const char* __first, [[clang::noescape]] const char* __last, chars_format __fmt);
_LIBCPP_NOESCAPE const char* __first, _LIBCPP_NOESCAPE const char* __last, chars_format __fmt);
_LIBCPP_END_NAMESPACE_STD
2 changes: 1 addition & 1 deletion lld/test/wasm/lto/Inputs/archive.ll
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
target datalayout = "e-m:e-p:32:32-p10:8:8-p20:8:8-i64:64-n32:64-S128"
target datalayout = "e-m:e-p:32:32-p10:8:8-p20:8:8-i64:64-n32:64-S128-ni:1:10:20"
target triple = "wasm32-unknown-unknown"

define void @f() {
Expand Down
2 changes: 1 addition & 1 deletion lld/test/wasm/lto/Inputs/cache.ll
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
target datalayout = "e-m:e-p:32:32-p10:8:8-p20:8:8-i64:64-n32:64-S128"
target datalayout = "e-m:e-p:32:32-p10:8:8-p20:8:8-i64:64-n32:64-S128-ni:1:10:20"
target triple = "wasm32-unknown-unknown"

define i32 @_start() {
Expand Down
2 changes: 1 addition & 1 deletion lld/test/wasm/lto/Inputs/comdat_ordering1.ll
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
target datalayout = "e-m:e-p:32:32-p10:8:8-p20:8:8-i64:64-n32:64-S128"
target datalayout = "e-m:e-p:32:32-p10:8:8-p20:8:8-i64:64-n32:64-S128-ni:1:10:20"
target triple = "wasm32-unknown-unknown"

; Generated from this C++ code and simplified manually:
Expand Down
2 changes: 1 addition & 1 deletion lld/test/wasm/lto/Inputs/comdat_ordering2.ll
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
target datalayout = "e-m:e-p:32:32-p10:8:8-p20:8:8-i64:64-n32:64-S128"
target datalayout = "e-m:e-p:32:32-p10:8:8-p20:8:8-i64:64-n32:64-S128-ni:1:10:20"
target triple = "wasm32-unknown-unknown"

; Generated from this C++ code and simplified manually:
Expand Down
2 changes: 1 addition & 1 deletion lld/test/wasm/lto/Inputs/foo.ll
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
target datalayout = "e-m:e-p:32:32-p10:8:8-p20:8:8-i64:64-n32:64-S128"
target datalayout = "e-m:e-p:32:32-p10:8:8-p20:8:8-i64:64-n32:64-S128-ni:1:10:20"
target triple = "wasm32-unknown-unknown"

define void @foo() local_unnamed_addr {
Expand Down
2 changes: 1 addition & 1 deletion lld/test/wasm/lto/Inputs/libcall-archive.ll
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
target datalayout = "e-m:e-p:32:32-p10:8:8-p20:8:8-i64:64-n32:64-S128"
target datalayout = "e-m:e-p:32:32-p10:8:8-p20:8:8-i64:64-n32:64-S128-ni:1:10:20"
target triple = "wasm32-unknown-unknown"

define void @memcpy() {
Expand Down
2 changes: 1 addition & 1 deletion lld/test/wasm/lto/Inputs/libcall-truncsfhf2.ll
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
target datalayout = "e-m:e-p:32:32-p10:8:8-p20:8:8-i64:64-n32:64-S128"
target datalayout = "e-m:e-p:32:32-p10:8:8-p20:8:8-i64:64-n32:64-S128-ni:1:10:20"
target triple = "wasm32-unknown-unknown"

define half @__truncsfhf2(float) {
Expand Down
2 changes: 1 addition & 1 deletion lld/test/wasm/lto/Inputs/libcall.ll
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
target datalayout = "e-m:e-p:32:32-p10:8:8-p20:8:8-i64:64-n32:64-S128"
target datalayout = "e-m:e-p:32:32-p10:8:8-p20:8:8-i64:64-n32:64-S128-ni:1:10:20"
target triple = "wasm32-unknown-unknown"

; This function, when compiled will generate a new undefined reference to
Expand Down
2 changes: 1 addition & 1 deletion lld/test/wasm/lto/Inputs/save-temps.ll
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
target datalayout = "e-m:e-p:32:32-p10:8:8-p20:8:8-i64:64-n32:64-S128"
target datalayout = "e-m:e-p:32:32-p10:8:8-p20:8:8-i64:64-n32:64-S128-ni:1:10:20"
target triple = "wasm32-unknown-unknown"

define void @bar() {
Expand Down
2 changes: 1 addition & 1 deletion lld/test/wasm/lto/Inputs/thin1.ll
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
; Copied from lld/test/ELF/lto/Inputs/thin1.ll

target datalayout = "e-m:e-p:32:32-p10:8:8-p20:8:8-i64:64-n32:64-S128"
target datalayout = "e-m:e-p:32:32-p10:8:8-p20:8:8-i64:64-n32:64-S128-ni:1:10:20"
target triple = "wasm32-unknown-unknown"

define i32 @foo(i32 %goo) {
Expand Down
2 changes: 1 addition & 1 deletion lld/test/wasm/lto/Inputs/thin2.ll
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
; Copied from lld/test/ELF/lto/Inputs/thin2.ll

target datalayout = "e-m:e-p:32:32-p10:8:8-p20:8:8-i64:64-n32:64-S128"
target datalayout = "e-m:e-p:32:32-p10:8:8-p20:8:8-i64:64-n32:64-S128-ni:1:10:20"
target triple = "wasm32-unknown-unknown"

define i32 @blah(i32 %meh) #0 {
Expand Down
2 changes: 1 addition & 1 deletion lld/test/wasm/lto/Inputs/thinlto.ll
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
target datalayout = "e-m:e-p:32:32-p10:8:8-p20:8:8-i64:64-n32:64-S128"
target datalayout = "e-m:e-p:32:32-p10:8:8-p20:8:8-i64:64-n32:64-S128-ni:1:10:20"
target triple = "wasm32-unknown-unknown"

define void @g() {
Expand Down
2 changes: 1 addition & 1 deletion lld/test/wasm/lto/Inputs/used.ll
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
target datalayout = "e-m:e-p:32:32-p10:8:8-p20:8:8-i64:64-n32:64-S128"
target datalayout = "e-m:e-p:32:32-p10:8:8-p20:8:8-i64:64-n32:64-S128-ni:1:10:20"
target triple = "wasm32-unknown-unknown"

@foo = hidden global i32 1
Expand Down
2 changes: 1 addition & 1 deletion lld/test/wasm/lto/archive.ll
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
; RUN: wasm-ld --export-dynamic %t2.o %t.a -o %t3
; RUN: obj2yaml %t3 | FileCheck %s

target datalayout = "e-m:e-p:32:32-p10:8:8-p20:8:8-i64:64-n32:64-S128"
target datalayout = "e-m:e-p:32:32-p10:8:8-p20:8:8-i64:64-n32:64-S128-ni:1:10:20"
target triple = "wasm32-unknown-unknown"

define void @_start() {
Expand Down
2 changes: 1 addition & 1 deletion lld/test/wasm/lto/atomics.ll
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
; Atomic operations will not fail to compile if atomics are not
; enabled because LLVM atomics will be lowered to regular ops.

target datalayout = "e-m:e-p:32:32-p10:8:8-p20:8:8-i64:64-n32:64-S128"
target datalayout = "e-m:e-p:32:32-p10:8:8-p20:8:8-i64:64-n32:64-S128-ni:1:10:20"
target triple = "wasm32-unknown-unknown-wasm"

@foo = hidden global i32 1
Expand Down
1 change: 0 additions & 1 deletion lld/test/wasm/lto/cache-warnings.ll
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@
; SIZE: warning: ThinLTO cache pruning happens since the total size of{{.*}}--thinlto-cache-policy
; WARN-NOT: warning: ThinLTO cache pruning happens{{.*}}--thinlto-cache-policy

target datalayout = "e-m:e-p:32:32-p10:8:8-p20:8:8-i64:64-n32:64-S128"
target triple = "wasm32-unknown-unknown-wasm"

define void @globalfunc() #0 {
Expand Down
1 change: 0 additions & 1 deletion lld/test/wasm/lto/cache.ll
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@
; RUN: wasm-ld --thinlto-cache-dir=%t.cache --thinlto-cache-policy prune_after=0s:cache_size=0%:cache_size_files=1:prune_interval=0s -o %t.wasm %t2.o %t.o
; RUN: ls %t.cache | count 3

target datalayout = "e-m:e-p:32:32-p10:8:8-p20:8:8-i64:64-n32:64-S128"
target triple = "wasm32-unknown-unknown-wasm"

define void @globalfunc() #0 {
Expand Down
2 changes: 1 addition & 1 deletion lld/test/wasm/lto/cgo.ll
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
; ERROR-O4: wasm-ld: error: invalid optimization level for LTO: 4
; ERROR-CGO4: wasm-ld: error: invalid codegen optimization level for LTO: 4

target datalayout = "e-m:e-p:32:32-p10:8:8-p20:8:8-i64:64-n32:64-S128"
target datalayout = "e-m:e-p:32:32-p10:8:8-p20:8:8-i64:64-n32:64-S128-ni:1:10:20"
target triple = "wasm32-unknown-unknown-wasm"

define void @_start() {
Expand Down
2 changes: 1 addition & 1 deletion lld/test/wasm/lto/comdat.ll
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
; RUN: wasm-ld %t.bc %t.o -o %t.wasm
; RUN: wasm-ld %t.o %t.bc -o %t.wasm

target datalayout = "e-m:e-p:32:32-p10:8:8-p20:8:8-i64:64-n32:64-S128"
target datalayout = "e-m:e-p:32:32-p10:8:8-p20:8:8-i64:64-n32:64-S128-ni:1:10:20"
target triple = "wasm32-unknown-unknown"

$foo = comdat any
Expand Down
2 changes: 1 addition & 1 deletion lld/test/wasm/lto/diagnostics.ll
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
; RUN: llvm-as %s -o %t.o
; RUN: not wasm-ld --lto-O0 %t.o -o %t2 2>&1 | FileCheck %s

target datalayout = "e-m:e-p:32:32-p10:8:8-p20:8:8-i64:64-n32:64-S128"
target datalayout = "e-m:e-p:32:32-p10:8:8-p20:8:8-i64:64-n32:64-S128-ni:1:10:20"
target triple = "wasm32-unknown-unknown"

define void @_start() {
Expand Down
2 changes: 1 addition & 1 deletion lld/test/wasm/lto/export.ll
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
; RUN: wasm-ld --export=hidden_function -o %t.wasm %t.bc
; RUN: obj2yaml %t.wasm | FileCheck %s

target datalayout = "e-m:e-p:32:32-p10:8:8-p20:8:8-i64:64-n32:64-S128"
target datalayout = "e-m:e-p:32:32-p10:8:8-p20:8:8-i64:64-n32:64-S128-ni:1:10:20"
target triple = "wasm32-unknown-unknown"

define hidden i32 @hidden_function() local_unnamed_addr {
Expand Down
2 changes: 1 addition & 1 deletion lld/test/wasm/lto/import-attributes.ll
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
; RUN: obj2yaml %t.wasm | FileCheck %s

target triple = "wasm32-unknown-unknown-wasm"
target datalayout = "e-m:e-p:32:32-p10:8:8-p20:8:8-i64:64-n32:64-S128"
target datalayout = "e-m:e-p:32:32-p10:8:8-p20:8:8-i64:64-n32:64-S128-ni:1:10:20"

define void @_start() {
call void @foo();
Expand Down
2 changes: 1 addition & 1 deletion lld/test/wasm/lto/internalize-basic.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
; RUN: wasm-ld %t.o -o %t2 -save-temps
; RUN: llvm-dis < %t2.0.2.internalize.bc | FileCheck %s

target datalayout = "e-m:e-p:32:32-p10:8:8-p20:8:8-i64:64-n32:64-S128"
target datalayout = "e-m:e-p:32:32-p10:8:8-p20:8:8-i64:64-n32:64-S128-ni:1:10:20"
target triple = "wasm32-unknown-unknown-wasm"

define void @_start() {
Expand Down
2 changes: 1 addition & 1 deletion lld/test/wasm/lto/libcall-archive.ll
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
; RUN: wasm-ld -o %t %t.o %t.a
; RUN: obj2yaml %t | FileCheck %s

target datalayout = "e-m:e-p:32:32-p10:8:8-p20:8:8-i64:64-n32:64-S128"
target datalayout = "e-m:e-p:32:32-p10:8:8-p20:8:8-i64:64-n32:64-S128-ni:1:10:20"
target triple = "wasm32-unknown-unknown"

define void @_start(ptr %a, ptr %b) {
Expand Down
2 changes: 1 addition & 1 deletion lld/test/wasm/lto/libcall-truncsfhf2.ll
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
; RUN: llvm-ar rcs %t.a %t.truncsfhf2.o
; RUN: not wasm-ld --export-all %t.o %t.a -o %t.wasm 2>&1 | FileCheck %s

target datalayout = "e-m:e-p:32:32-p10:8:8-p20:8:8-i64:64-n32:64-S128"
target datalayout = "e-m:e-p:32:32-p10:8:8-p20:8:8-i64:64-n32:64-S128-ni:1:10:20"
target triple = "wasm32-unknown-unknown"

@g_float = global float 0.0
Expand Down
2 changes: 1 addition & 1 deletion lld/test/wasm/lto/lto-start.ll
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
; CHECK-NEXT: - Index: 0
; CHECK-NEXT: Name: _start

target datalayout = "e-m:e-p:32:32-p10:8:8-p20:8:8-i64:64-n32:64-S128"
target datalayout = "e-m:e-p:32:32-p10:8:8-p20:8:8-i64:64-n32:64-S128-ni:1:10:20"
target triple = "wasm32-unknown-unknown-wasm"

define void @_start() {
Expand Down
2 changes: 1 addition & 1 deletion lld/test/wasm/lto/new-pass-manager.ll
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

; CHECK: Running pass: GlobalOptPass

target datalayout = "e-m:e-p:32:32-p10:8:8-p20:8:8-i64:64-n32:64-S128"
target datalayout = "e-m:e-p:32:32-p10:8:8-p20:8:8-i64:64-n32:64-S128-ni:1:10:20"
target triple = "wasm32-unknown-unknown"

define void @_start() local_unnamed_addr {
Expand Down
2 changes: 1 addition & 1 deletion lld/test/wasm/lto/opt-level.ll
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
; RUN: FileCheck --check-prefix=INVALIDNEGATIVE %s
; INVALIDNEGATIVE: invalid optimization level for LTO: 4294967295

target datalayout = "e-m:e-p:32:32-p10:8:8-p20:8:8-i64:64-n32:64-S128"
target datalayout = "e-m:e-p:32:32-p10:8:8-p20:8:8-i64:64-n32:64-S128-ni:1:10:20"
target triple = "wasm32-unknown-unknown-wasm"

; CHECK-O0: Name: foo
Expand Down
2 changes: 1 addition & 1 deletion lld/test/wasm/lto/parallel.ll
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
; RUN: llvm-nm %t.lto.o | FileCheck --check-prefix=CHECK0 %s
; RUN: llvm-nm %t1.lto.o | FileCheck --check-prefix=CHECK1 %s

target datalayout = "e-m:e-p:32:32-p10:8:8-p20:8:8-i64:64-n32:64-S128"
target datalayout = "e-m:e-p:32:32-p10:8:8-p20:8:8-i64:64-n32:64-S128-ni:1:10:20"
target triple = "wasm32-unknown-unknown-wasm"

; CHECK0-NOT: bar
Expand Down
2 changes: 1 addition & 1 deletion lld/test/wasm/lto/relocatable-undefined.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
; RUN: wasm-ld -r -o %t.wasm %t.o
; RUN: obj2yaml %t.wasm | FileCheck %s

target datalayout = "e-m:e-p:32:32-p10:8:8-p20:8:8-i64:64-n32:64-S128"
target datalayout = "e-m:e-p:32:32-p10:8:8-p20:8:8-i64:64-n32:64-S128-ni:1:10:20"
target triple = "wasm32-unknown-unknown"

@missing_data = external global i32
Expand Down
2 changes: 1 addition & 1 deletion lld/test/wasm/lto/relocatable.ll
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
; CHECK-NEXT: }
; CHECK-NEXT: ]

target datalayout = "e-m:e-p:32:32-p10:8:8-p20:8:8-i64:64-n32:64-S128"
target datalayout = "e-m:e-p:32:32-p10:8:8-p20:8:8-i64:64-n32:64-S128-ni:1:10:20"
target triple = "wasm32-unknown-unknown"

define void @foo() {
Expand Down
2 changes: 1 addition & 1 deletion lld/test/wasm/lto/save-temps.ll
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
; RUN: llvm-nm a.out.lto.o | FileCheck %s
; RUN: llvm-dis a.out.0.0.preopt.bc

target datalayout = "e-m:e-p:32:32-p10:8:8-p20:8:8-i64:64-n32:64-S128"
target datalayout = "e-m:e-p:32:32-p10:8:8-p20:8:8-i64:64-n32:64-S128-ni:1:10:20"
target triple = "wasm32-unknown-unknown"

define void @foo() {
Expand Down
1 change: 0 additions & 1 deletion lld/test/wasm/lto/thin-archivecollision.ll
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
; CHECK: Name: foo
; CHECK: Name: blah

target datalayout = "e-m:e-p:32:32-p10:8:8-p20:8:8-i64:64-n32:64-S128"
target triple = "wasm32-unknown-unknown"

define i32 @_start() #0 {
Expand Down
1 change: 0 additions & 1 deletion lld/test/wasm/lto/thinlto-thin-archive-collision.ll
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
; CHECK: thinlto-archives/thin.a(thin.o at {{[1-9][0-9]+}})
; CHECK-NEXT: -r=thinlto-archives/thin.a(thin.o at {{[1-9][0-9]+}}),blah,p

target datalayout = "e-m:e-p:32:32-p10:8:8-p20:8:8-i64:64-n32:64-S128"
target triple = "wasm32-unknown-unknown"

declare i32 @blah(i32 %meh)
Expand Down
1 change: 0 additions & 1 deletion lld/test/wasm/lto/thinlto.ll
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,6 @@
; NM1: T f
; NM2: T g

target datalayout = "e-m:e-p:32:32-p10:8:8-p20:8:8-i64:64-n32:64-S128"
target triple = "wasm32-unknown-unknown"

declare void @g(...)
Expand Down
2 changes: 1 addition & 1 deletion lld/test/wasm/lto/undef.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
; RUN: wasm-ld %t.o -o %t.wasm --allow-undefined
; RUN: obj2yaml %t.wasm | FileCheck %s

target datalayout = "e-m:e-p:32:32-p10:8:8-p20:8:8-i64:64-n32:64-S128"
target datalayout = "e-m:e-p:32:32-p10:8:8-p20:8:8-i64:64-n32:64-S128-ni:1:10:20"
target triple = "wasm32-unknown-unknown"

declare i32 @bar()
Expand Down
1 change: 0 additions & 1 deletion lld/test/wasm/lto/used.ll
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@

; Verify that symbols references from regular objects are preserved by LTO

target datalayout = "e-m:e-p:32:32-p10:8:8-p20:8:8-i64:64-n32:64-S128"
target triple = "wasm32-unknown-unknown"

declare void @bar()
Expand Down
2 changes: 1 addition & 1 deletion lld/test/wasm/lto/verify-invalid.ll
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
; RUN: wasm-ld %t.o -o %t2 --lto-debug-pass-manager \
; RUN: -disable-verify 2>&1 | FileCheck -check-prefix=DISABLE-NPM %s

target datalayout = "e-m:e-p:32:32-p10:8:8-p20:8:8-i64:64-n32:64-S128"
target datalayout = "e-m:e-p:32:32-p10:8:8-p20:8:8-i64:64-n32:64-S128-ni:1:10:20"
target triple = "wasm32-unknown-unknown"

define void @_start() {
Expand Down
2 changes: 1 addition & 1 deletion lld/test/wasm/lto/weak-undefined.ll
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
; We had a bug where stub function generation was failing because functions
; that are in bitcode (pre-LTO) don't have signatures assigned.

target datalayout = "e-m:e-p:32:32-p10:8:8-p20:8:8-i64:64-n32:64-S128"
target datalayout = "e-m:e-p:32:32-p10:8:8-p20:8:8-i64:64-n32:64-S128-ni:1:10:20"
target triple = "wasm32-unknown-unknown"

declare extern_weak i32 @foo()
Expand Down
2 changes: 1 addition & 1 deletion lld/test/wasm/lto/weak.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
; RUN: wasm-ld %t.o %t.o -o %t.wasm -r
; RUN: llvm-readobj --symbols %t.wasm | FileCheck %s

target datalayout = "e-m:e-p:32:32-p10:8:8-p20:8:8-i64:64-n32:64-S128"
target datalayout = "e-m:e-p:32:32-p10:8:8-p20:8:8-i64:64-n32:64-S128-ni:1:10:20"
target triple = "wasm32-unknown-unknown-wasm"

define weak void @f() {
Expand Down
2 changes: 2 additions & 0 deletions llvm/docs/ReleaseNotes.md
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,8 @@ Changes to the X86 Backend

* Support ISA of `AVX10.2-256` and `AVX10.2-512`.

* Supported instructions of `MOVRS AND AVX10.2`

Changes to the OCaml bindings
-----------------------------

Expand Down
11 changes: 11 additions & 0 deletions llvm/include/llvm/Analysis/VecFuncs.def
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,9 @@ TLI_DEFINE_VECFUNC("llvm.atan.f64", "_simd_atan_d2", FIXED(2), "_ZGV_LLVM_N2v")
TLI_DEFINE_VECFUNC("atanf", "_simd_atan_f4", FIXED(4), "_ZGV_LLVM_N4v")
TLI_DEFINE_VECFUNC("llvm.atan.f32", "_simd_atan_f4", FIXED(4), "_ZGV_LLVM_N4v")
TLI_DEFINE_VECFUNC("atan2", "_simd_atan2_d2", FIXED(2), "_ZGV_LLVM_N2vv")
TLI_DEFINE_VECFUNC("llvm.atan2.f64", "_simd_atan2_d2", FIXED(2), "_ZGV_LLVM_N2vv")
TLI_DEFINE_VECFUNC("atan2f", "_simd_atan2_f4", FIXED(4), "_ZGV_LLVM_N4vv")
TLI_DEFINE_VECFUNC("llvm.atan2.f32", "_simd_atan2_f4", FIXED(4), "_ZGV_LLVM_N4vv")

TLI_DEFINE_VECFUNC("cos", "_simd_cos_d2", FIXED(2), "_ZGV_LLVM_N2v")
TLI_DEFINE_VECFUNC("llvm.cos.f64", "_simd_cos_d2", FIXED(2), "_ZGV_LLVM_N2v")
Expand Down Expand Up @@ -531,6 +533,7 @@ TLI_DEFINE_VECFUNC("atan", "_ZGVnN2v_atan", FIXED(2), "_ZGV_LLVM_N2v")
TLI_DEFINE_VECFUNC("llvm.atan.f64", "_ZGVnN2v_atan", FIXED(2), "_ZGV_LLVM_N2v")

TLI_DEFINE_VECFUNC("atan2", "_ZGVnN2vv_atan2", FIXED(2), "_ZGV_LLVM_N2vv")
TLI_DEFINE_VECFUNC("llvm.atan2.f64", "_ZGVnN2vv_atan2", FIXED(2), "_ZGV_LLVM_N2vv")

TLI_DEFINE_VECFUNC("atanh", "_ZGVnN2v_atanh", FIXED(2), "_ZGV_LLVM_N2v")

Expand Down Expand Up @@ -635,6 +638,7 @@ TLI_DEFINE_VECFUNC("atanf", "_ZGVnN4v_atanf", FIXED(4), "_ZGV_LLVM_N4v")
TLI_DEFINE_VECFUNC("llvm.atan.f32", "_ZGVnN4v_atanf", FIXED(4), "_ZGV_LLVM_N4v")

TLI_DEFINE_VECFUNC("atan2f", "_ZGVnN4vv_atan2f", FIXED(4), "_ZGV_LLVM_N4vv")
TLI_DEFINE_VECFUNC("llvm.atan2.f32", "_ZGVnN4vv_atan2f", FIXED(4), "_ZGV_LLVM_N4vv")

TLI_DEFINE_VECFUNC("atanhf", "_ZGVnN4v_atanhf", FIXED(4), "_ZGV_LLVM_N4v")

Expand Down Expand Up @@ -748,6 +752,8 @@ TLI_DEFINE_VECFUNC("llvm.atan.f32", "_ZGVsMxv_atanf", SCALABLE(4), MASKED, "_ZGV

TLI_DEFINE_VECFUNC("atan2", "_ZGVsMxvv_atan2", SCALABLE(2), MASKED, "_ZGVsMxvv")
TLI_DEFINE_VECFUNC("atan2f", "_ZGVsMxvv_atan2f", SCALABLE(4), MASKED, "_ZGVsMxvv")
TLI_DEFINE_VECFUNC("llvm.atan2.f64", "_ZGVsMxvv_atan2", SCALABLE(2), MASKED, "_ZGVsMxvv")
TLI_DEFINE_VECFUNC("llvm.atan2.f32", "_ZGVsMxvv_atan2f", SCALABLE(4), MASKED, "_ZGVsMxvv")

TLI_DEFINE_VECFUNC("atanh", "_ZGVsMxv_atanh", SCALABLE(2), MASKED, "_ZGVsMxv")
TLI_DEFINE_VECFUNC("atanhf", "_ZGVsMxv_atanhf", SCALABLE(4), MASKED, "_ZGVsMxv")
Expand Down Expand Up @@ -933,6 +939,11 @@ TLI_DEFINE_VECFUNC("atan2f", "armpl_vatan2q_f32", FIXED(4), NOMASK, "_ZGV_LLVM_N
TLI_DEFINE_VECFUNC("atan2", "armpl_svatan2_f64_x", SCALABLE(2), MASKED, "_ZGVsMxvv")
TLI_DEFINE_VECFUNC("atan2f", "armpl_svatan2_f32_x", SCALABLE(4), MASKED, "_ZGVsMxvv")

TLI_DEFINE_VECFUNC("llvm.atan2.f64", "armpl_vatan2q_f64", FIXED(2), NOMASK, "_ZGV_LLVM_N2vv")
TLI_DEFINE_VECFUNC("llvm.atan2.f32", "armpl_vatan2q_f32", FIXED(4), NOMASK, "_ZGV_LLVM_N4vv")
TLI_DEFINE_VECFUNC("llvm.atan2.f64", "armpl_svatan2_f64_x", SCALABLE(2), MASKED, "_ZGVsMxvv")
TLI_DEFINE_VECFUNC("llvm.atan2.f32", "armpl_svatan2_f32_x", SCALABLE(4), MASKED, "_ZGVsMxvv")

TLI_DEFINE_VECFUNC("atanh", "armpl_vatanhq_f64", FIXED(2), NOMASK, "_ZGV_LLVM_N2v")
TLI_DEFINE_VECFUNC("atanhf", "armpl_vatanhq_f32", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")
TLI_DEFINE_VECFUNC("atanh", "armpl_svatanh_f64_x", SCALABLE(2), MASKED, "_ZGVsMxv")
Expand Down
2 changes: 1 addition & 1 deletion llvm/include/llvm/IR/DataLayout.h
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ class DataLayout {
/// representation (e.g. may be relocated by a copying garbage collector).
/// Additionally, they may also be non-integral (i.e. containing additional
/// metadata such as bounds information/permissions).
bool IsNonIntegral = false;
bool IsNonIntegral;
bool operator==(const PointerSpec &Other) const;
};

Expand Down
39 changes: 39 additions & 0 deletions llvm/include/llvm/IR/IntrinsicsX86.td
Original file line number Diff line number Diff line change
Expand Up @@ -7572,3 +7572,42 @@ def int_x86_avx10_vfnmsub231nepbf16128 : ClangBuiltin<"__builtin_ia32_vfnmsub231
DefaultAttrsIntrinsic<[llvm_v8bf16_ty], [llvm_v8bf16_ty, llvm_v8bf16_ty, llvm_v8bf16_ty ],
[IntrNoMem]>;
}

let TargetPrefix = "x86" in {
def int_x86_avx10_vmovrsb128 : ClangBuiltin<"__builtin_ia32_vmovrsb128">,
DefaultAttrsIntrinsic<[llvm_v16i8_ty], [llvm_ptr_ty],
[IntrReadMem]>;
def int_x86_avx10_vmovrsb256 : ClangBuiltin<"__builtin_ia32_vmovrsb256">,
DefaultAttrsIntrinsic<[llvm_v32i8_ty], [llvm_ptr_ty],
[IntrReadMem]>;
def int_x86_avx10_vmovrsb512 : ClangBuiltin<"__builtin_ia32_vmovrsb512">,
DefaultAttrsIntrinsic<[llvm_v64i8_ty], [llvm_ptr_ty],
[IntrReadMem]>;
def int_x86_avx10_vmovrsd128 : ClangBuiltin<"__builtin_ia32_vmovrsd128">,
DefaultAttrsIntrinsic<[llvm_v4i32_ty], [llvm_ptr_ty],
[IntrReadMem]>;
def int_x86_avx10_vmovrsd256 : ClangBuiltin<"__builtin_ia32_vmovrsd256">,
DefaultAttrsIntrinsic<[llvm_v8i32_ty], [llvm_ptr_ty],
[IntrReadMem]>;
def int_x86_avx10_vmovrsd512 : ClangBuiltin<"__builtin_ia32_vmovrsd512">,
DefaultAttrsIntrinsic<[llvm_v16i32_ty], [llvm_ptr_ty],
[IntrReadMem]>;
def int_x86_avx10_vmovrsq128 : ClangBuiltin<"__builtin_ia32_vmovrsq128">,
DefaultAttrsIntrinsic<[llvm_v2i64_ty], [llvm_ptr_ty],
[IntrReadMem]>;
def int_x86_avx10_vmovrsq256 : ClangBuiltin<"__builtin_ia32_vmovrsq256">,
DefaultAttrsIntrinsic<[llvm_v4i64_ty], [llvm_ptr_ty],
[IntrReadMem]>;
def int_x86_avx10_vmovrsq512 : ClangBuiltin<"__builtin_ia32_vmovrsq512">,
DefaultAttrsIntrinsic<[llvm_v8i64_ty], [llvm_ptr_ty],
[IntrReadMem]>;
def int_x86_avx10_vmovrsw128 : ClangBuiltin<"__builtin_ia32_vmovrsw128">,
DefaultAttrsIntrinsic<[llvm_v8i16_ty], [llvm_ptr_ty],
[IntrReadMem]>;
def int_x86_avx10_vmovrsw256 : ClangBuiltin<"__builtin_ia32_vmovrsw256">,
DefaultAttrsIntrinsic<[llvm_v16i16_ty], [llvm_ptr_ty],
[IntrReadMem]>;
def int_x86_avx10_vmovrsw512 : ClangBuiltin<"__builtin_ia32_vmovrsw512">,
DefaultAttrsIntrinsic<[llvm_v32i16_ty], [llvm_ptr_ty],
[IntrReadMem]>;
}
1 change: 1 addition & 0 deletions llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,7 @@ def : GINodeEquiv<G_FTAN, ftan>;
def : GINodeEquiv<G_FACOS, facos>;
def : GINodeEquiv<G_FASIN, fasin>;
def : GINodeEquiv<G_FATAN, fatan>;
def : GINodeEquiv<G_FATAN2, fatan2>;
def : GINodeEquiv<G_FCOSH, fcosh>;
def : GINodeEquiv<G_FSINH, fsinh>;
def : GINodeEquiv<G_FTANH, ftanh>;
Expand Down
2 changes: 2 additions & 0 deletions llvm/include/llvm/TargetParser/X86TargetParser.def
Original file line number Diff line number Diff line change
Expand Up @@ -261,6 +261,8 @@ X86_FEATURE_COMPAT(AVX10_1, "avx10.1-256", 36)
X86_FEATURE_COMPAT(AVX10_1_512, "avx10.1-512", 37)
X86_FEATURE_COMPAT(AVX10_2, "avx10.2-256", 0)
X86_FEATURE_COMPAT(AVX10_2_512, "avx10.2-512", 0)
//FIXME: make MOVRS _COMPAT defined when gcc landed relate patch.
X86_FEATURE (MOVRS, "movrs")
X86_FEATURE (ZU, "zu")
// These features aren't really CPU features, but the frontend can set them.
X86_FEATURE (RETPOLINE_EXTERNAL_THUNK, "retpoline-external-thunk")
Expand Down
5 changes: 5 additions & 0 deletions llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -457,6 +457,8 @@ static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
RTLIBCASE(ACOS_F);
case TargetOpcode::G_FATAN:
RTLIBCASE(ATAN_F);
case TargetOpcode::G_FATAN2:
RTLIBCASE(ATAN2_F);
case TargetOpcode::G_FSINH:
RTLIBCASE(SINH_F);
case TargetOpcode::G_FCOSH:
Expand Down Expand Up @@ -1202,6 +1204,7 @@ LegalizerHelper::libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver) {
case TargetOpcode::G_FACOS:
case TargetOpcode::G_FASIN:
case TargetOpcode::G_FATAN:
case TargetOpcode::G_FATAN2:
case TargetOpcode::G_FCOSH:
case TargetOpcode::G_FSINH:
case TargetOpcode::G_FTANH:
Expand Down Expand Up @@ -3122,6 +3125,7 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
case TargetOpcode::G_FACOS:
case TargetOpcode::G_FASIN:
case TargetOpcode::G_FATAN:
case TargetOpcode::G_FATAN2:
case TargetOpcode::G_FCOSH:
case TargetOpcode::G_FSINH:
case TargetOpcode::G_FTANH:
Expand Down Expand Up @@ -5141,6 +5145,7 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
case G_FACOS:
case G_FASIN:
case G_FATAN:
case G_FATAN2:
case G_FCOSH:
case G_FSINH:
case G_FTANH:
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/CodeGen/GlobalISel/Utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -828,6 +828,7 @@ bool llvm::isKnownNeverNaN(Register Val, const MachineRegisterInfo &MRI,
case TargetOpcode::G_FACOS:
case TargetOpcode::G_FASIN:
case TargetOpcode::G_FATAN:
case TargetOpcode::G_FATAN2:
case TargetOpcode::G_FCOSH:
case TargetOpcode::G_FSINH:
case TargetOpcode::G_FTANH:
Expand Down Expand Up @@ -1715,6 +1716,7 @@ bool llvm::isPreISelGenericFloatingPointOpcode(unsigned Opc) {
case TargetOpcode::G_FACOS:
case TargetOpcode::G_FASIN:
case TargetOpcode::G_FATAN:
case TargetOpcode::G_FATAN2:
case TargetOpcode::G_FCOSH:
case TargetOpcode::G_FSINH:
case TargetOpcode::G_FTANH:
Expand Down
6 changes: 4 additions & 2 deletions llvm/lib/IR/DataLayout.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -207,8 +207,8 @@ constexpr DataLayout::PrimitiveSpec DefaultVectorSpecs[] = {

// Default pointer type specifications.
constexpr DataLayout::PointerSpec DefaultPointerSpecs[] = {
{0, 64, Align::Constant<8>(), Align::Constant<8>(), 64,
false} // p0:64:64:64:64
// p0:64:64:64:64
{0, 64, Align::Constant<8>(), Align::Constant<8>(), 64, false},
};

DataLayout::DataLayout()
Expand Down Expand Up @@ -627,6 +627,8 @@ Error DataLayout::parseLayoutString(StringRef LayoutString) {
// to be done later since the non-integral property is not part of the data
// layout pointer specification.
for (unsigned AS : NonIntegralAddressSpaces) {
// If there is no special spec for a given AS, getPointerSpec(AS) returns
// the spec for AS0, and we then update that to mark it non-integral.
const PointerSpec &PS = getPointerSpec(AS);
setPointerSpec(AS, PS.BitWidth, PS.ABIAlign, PS.PrefAlign, PS.IndexBitWidth,
true);
Expand Down
6 changes: 5 additions & 1 deletion llvm/lib/Linker/IRMover.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -595,11 +595,15 @@ Value *IRLinker::materialize(Value *V, bool ForIndirectSymbol) {
if (!SGV)
return nullptr;

// If SGV is from dest, it was already materialized when dest was loaded.
if (SGV->getParent() == &DstM)
return nullptr;

// When linking a global from other modules than source & dest, skip
// materializing it because it would be mapped later when its containing
// module is linked. Linking it now would potentially pull in many types that
// may not be mapped properly.
if (SGV->getParent() != &DstM && SGV->getParent() != SrcM.get())
if (SGV->getParent() != SrcM.get())
return nullptr;

Expected<Constant *> NewProto = linkGlobalValueProto(SGV, ForIndirectSymbol);
Expand Down
29 changes: 16 additions & 13 deletions llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -734,18 +734,19 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FCOPYSIGN, MVT::bf16, Promote);
}

for (auto Op : {ISD::FREM, ISD::FPOW, ISD::FPOWI,
ISD::FCOS, ISD::FSIN, ISD::FSINCOS,
ISD::FACOS, ISD::FASIN, ISD::FATAN,
ISD::FCOSH, ISD::FSINH, ISD::FTANH,
ISD::FTAN, ISD::FEXP, ISD::FEXP2,
ISD::FEXP10, ISD::FLOG, ISD::FLOG2,
ISD::FLOG10, ISD::STRICT_FREM, ISD::STRICT_FPOW,
ISD::STRICT_FPOWI, ISD::STRICT_FCOS, ISD::STRICT_FSIN,
ISD::STRICT_FACOS, ISD::STRICT_FASIN, ISD::STRICT_FATAN,
ISD::STRICT_FCOSH, ISD::STRICT_FSINH, ISD::STRICT_FTANH,
ISD::STRICT_FEXP, ISD::STRICT_FEXP2, ISD::STRICT_FLOG,
ISD::STRICT_FLOG2, ISD::STRICT_FLOG10, ISD::STRICT_FTAN}) {
for (auto Op : {ISD::FREM, ISD::FPOW, ISD::FPOWI,
ISD::FCOS, ISD::FSIN, ISD::FSINCOS,
ISD::FACOS, ISD::FASIN, ISD::FATAN,
ISD::FATAN2, ISD::FCOSH, ISD::FSINH,
ISD::FTANH, ISD::FTAN, ISD::FEXP,
ISD::FEXP2, ISD::FEXP10, ISD::FLOG,
ISD::FLOG2, ISD::FLOG10, ISD::STRICT_FREM,
ISD::STRICT_FPOW, ISD::STRICT_FPOWI, ISD::STRICT_FCOS,
ISD::STRICT_FSIN, ISD::STRICT_FACOS, ISD::STRICT_FASIN,
ISD::STRICT_FATAN, ISD::STRICT_FATAN2, ISD::STRICT_FCOSH,
ISD::STRICT_FSINH, ISD::STRICT_FTANH, ISD::STRICT_FEXP,
ISD::STRICT_FEXP2, ISD::STRICT_FLOG, ISD::STRICT_FLOG2,
ISD::STRICT_FLOG10, ISD::STRICT_FTAN}) {
setOperationAction(Op, MVT::f16, Promote);
setOperationAction(Op, MVT::v4f16, Expand);
setOperationAction(Op, MVT::v8f16, Expand);
Expand Down Expand Up @@ -1190,7 +1191,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
// silliness like this:
// clang-format off
for (auto Op :
{ISD::SELECT, ISD::SELECT_CC,
{ISD::SELECT, ISD::SELECT_CC, ISD::FATAN2,
ISD::BR_CC, ISD::FADD, ISD::FSUB,
ISD::FMUL, ISD::FDIV, ISD::FMA,
ISD::FNEG, ISD::FABS, ISD::FCEIL,
Expand Down Expand Up @@ -1649,6 +1650,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FACOS, VT, Expand);
setOperationAction(ISD::FASIN, VT, Expand);
setOperationAction(ISD::FATAN, VT, Expand);
setOperationAction(ISD::FATAN2, VT, Expand);
setOperationAction(ISD::FCOSH, VT, Expand);
setOperationAction(ISD::FSINH, VT, Expand);
setOperationAction(ISD::FTANH, VT, Expand);
Expand Down Expand Up @@ -1904,6 +1906,7 @@ void AArch64TargetLowering::addTypeForNEON(MVT VT) {
setOperationAction(ISD::FASIN, VT, Expand);
setOperationAction(ISD::FACOS, VT, Expand);
setOperationAction(ISD::FATAN, VT, Expand);
setOperationAction(ISD::FATAN2, VT, Expand);
setOperationAction(ISD::FSINH, VT, Expand);
setOperationAction(ISD::FCOSH, VT, Expand);
setOperationAction(ISD::FTANH, VT, Expand);
Expand Down
7 changes: 4 additions & 3 deletions llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -269,9 +269,10 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
.libcallFor({{s64, s128}})
.minScalarOrElt(1, MinFPScalar);

getActionDefinitionsBuilder(
{G_FCOS, G_FSIN, G_FPOW, G_FLOG, G_FLOG2, G_FLOG10, G_FTAN, G_FEXP,
G_FEXP2, G_FEXP10, G_FACOS, G_FASIN, G_FATAN, G_FCOSH, G_FSINH, G_FTANH})
getActionDefinitionsBuilder({G_FCOS, G_FSIN, G_FPOW, G_FLOG, G_FLOG2,
G_FLOG10, G_FTAN, G_FEXP, G_FEXP2, G_FEXP10,
G_FACOS, G_FASIN, G_FATAN, G_FATAN2, G_FCOSH,
G_FSINH, G_FTANH})
// We need a call for these, so we always need to scalarize.
.scalarize(0)
// Regardless of FP16 support, widen 16-bit elements to 32-bits.
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/X86/X86.td
Original file line number Diff line number Diff line change
Expand Up @@ -351,6 +351,8 @@ def FeatureZU : SubtargetFeature<"zu", "HasZU", "true",
def FeatureUseGPR32InInlineAsm
: SubtargetFeature<"inline-asm-use-gpr32", "UseInlineAsmGPR32", "true",
"Enable use of GPR32 in inline assembly for APX">;
def FeatureMOVRS : SubtargetFeature<"movrs", "HasMOVRS", "true",
"Enable MOVRS", []>;

// Ivy Bridge and newer processors have enhanced REP MOVSB and STOSB (aka
// "string operations"). See "REP String Enhancement" in the Intel Software
Expand Down
28 changes: 28 additions & 0 deletions llvm/lib/Target/X86/X86InstrAVX10.td
Original file line number Diff line number Diff line change
Expand Up @@ -1647,3 +1647,31 @@ let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
def : InstAlias<"vmovw.s\t{$src, $dst|$dst, $src}",
(VMOVZPWILo2PWIZrr2 VR128X:$dst, VR128X:$src), 0>;
}

// MOVRS
multiclass vmovrs_p<bits<8> opc, string OpStr, X86VectorVTInfo _> {
let ExeDomain = _.ExeDomain in {
defm m: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.MemOp:$src), OpStr, "$src", "$src",
(_.VT (!cast<Intrinsic>("int_x86_avx10_"#OpStr#_.Size)
addr:$src))>, EVEX;
}
}

multiclass vmovrs_p_vl<bits<8> opc, string OpStr, AVX512VLVectorVTInfo _Vec> {
let Predicates = [HasMOVRS, HasAVX10_2_512] in
defm Z : vmovrs_p<opc, OpStr, _Vec.info512>, EVEX_V512;
let Predicates = [HasMOVRS, HasAVX10_2] in {
defm Z128 : vmovrs_p<opc, OpStr, _Vec.info128>, EVEX_V128;
defm Z256 : vmovrs_p<opc, OpStr, _Vec.info256>, EVEX_V256;
}
}

defm VMOVRSB : vmovrs_p_vl<0x6f, "vmovrsb", avx512vl_i8_info>,
T_MAP5, XD, EVEX_CD8<8, CD8VF>, Sched<[WriteVecLoad]>;
defm VMOVRSW : vmovrs_p_vl<0x6f, "vmovrsw", avx512vl_i16_info>,
T_MAP5, XD, REX_W, EVEX_CD8<16, CD8VF>, Sched<[WriteVecLoad]>;
defm VMOVRSD : vmovrs_p_vl<0x6f, "vmovrsd", avx512vl_i32_info>,
T_MAP5, XS, EVEX_CD8<32, CD8VF>, Sched<[WriteVecLoad]>;
defm VMOVRSQ : vmovrs_p_vl<0x6f, "vmovrsq", avx512vl_i64_info>,
T_MAP5, XS, REX_W, EVEX_CD8<64, CD8VF>, Sched<[WriteVecLoad]>;
1 change: 1 addition & 0 deletions llvm/lib/Target/X86/X86InstrPredicates.td
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,7 @@ def HasCLZERO : Predicate<"Subtarget->hasCLZERO()">;
def HasCLDEMOTE : Predicate<"Subtarget->hasCLDEMOTE()">;
def HasMOVDIRI : Predicate<"Subtarget->hasMOVDIRI()">;
def HasMOVDIR64B : Predicate<"Subtarget->hasMOVDIR64B()">;
def HasMOVRS : Predicate<"Subtarget->hasMOVRS()">;
def HasPTWRITE : Predicate<"Subtarget->hasPTWRITE()">;
def FPStackf32 : Predicate<"!Subtarget->hasSSE1()">;
def FPStackf64 : Predicate<"!Subtarget->hasSSE2()">;
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/TargetParser/Host.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1841,6 +1841,7 @@ const StringMap<bool> sys::getHostCPUFeatures() {
Features["cmpccxadd"] = HasLeaf7Subleaf1 && ((EAX >> 7) & 1);
Features["hreset"] = HasLeaf7Subleaf1 && ((EAX >> 22) & 1);
Features["avxifma"] = HasLeaf7Subleaf1 && ((EAX >> 23) & 1) && HasAVXSave;
Features["movrs"] = HasLeaf7Subleaf1 && ((EAX >> 31) & 1);
Features["avxvnniint8"] = HasLeaf7Subleaf1 && ((EDX >> 4) & 1) && HasAVXSave;
Features["avxneconvert"] = HasLeaf7Subleaf1 && ((EDX >> 5) & 1) && HasAVXSave;
Features["amx-complex"] = HasLeaf7Subleaf1 && ((EDX >> 8) & 1) && HasAMXSave;
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/TargetParser/X86TargetParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -639,6 +639,8 @@ constexpr FeatureBitset ImpliedFeaturesNF = {};
constexpr FeatureBitset ImpliedFeaturesCF = {};
constexpr FeatureBitset ImpliedFeaturesZU = {};

constexpr FeatureBitset ImpliedFeaturesMOVRS = {};

constexpr FeatureInfo FeatureInfos[X86::CPU_FEATURE_MAX] = {
#define X86_FEATURE(ENUM, STR) {{"+" STR}, ImpliedFeatures##ENUM},
#include "llvm/TargetParser/X86TargetParser.def"
Expand Down
8 changes: 8 additions & 0 deletions llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2345,6 +2345,14 @@ define float @test_atan_f32(float %x) {
ret float %y
}

declare float @llvm.atan2.f32(float, float)
define float @test_atan2_f32(float %x, float %y) {
; CHECK-LABEL: name: test_atan2_f32
; CHECK: %{{[0-9]+}}:_(s32) = G_FATAN2 %{{[0-9]+}}
%z = call float @llvm.atan2.f32(float %x, float %y)
ret float %z
}

declare float @llvm.cosh.f32(float)
define float @test_cosh_f32(float %x) {
; CHECK-LABEL: name: test_cosh_f32
Expand Down
333 changes: 333 additions & 0 deletions llvm/test/CodeGen/AArch64/GlobalISel/legalize-atan2.mir

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -718,8 +718,9 @@
# DEBUG-NEXT: .. the first uncovered type index: 1, OK
# DEBUG-NEXT: .. the first uncovered imm index: 0, OK
# DEBUG-NEXT: G_FATAN2 (opcode {{[0-9]+}}): 1 type index, 0 imm indices
# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined
# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined
# DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}}
# DEBUG-NEXT: .. the first uncovered type index: 1, OK
# DEBUG-NEXT: .. the first uncovered imm index: 0, OK
# DEBUG-NEXT: G_FCOSH (opcode {{[0-9]+}}): 1 type index, 0 imm indices
# DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}}
# DEBUG-NEXT: .. the first uncovered type index: 1, OK
Expand Down
6 changes: 6 additions & 0 deletions llvm/test/CodeGen/AArch64/f16-instructions.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1114,6 +1114,7 @@ declare half @llvm.tan.f16(half %a) #0
declare half @llvm.asin.f16(half %a) #0
declare half @llvm.acos.f16(half %a) #0
declare half @llvm.atan.f16(half %a) #0
declare half @llvm.atan2.f16(half %a, half %b) #0
declare half @llvm.sinh.f16(half %a) #0
declare half @llvm.cosh.f16(half %a) #0
declare half @llvm.tanh.f16(half %a) #0
Expand Down Expand Up @@ -1246,6 +1247,11 @@ define half @test_atan(half %a) #0 {
ret half %r
}

define half @test_atan2(half %a, half %b) #0 {
%r = call half @llvm.atan2.f16(half %a, half %b)
ret half %r
}

define half @test_cosh(half %a) #0 {
; CHECK-LABEL: test_cosh:
; CHECK: // %bb.0:
Expand Down
16 changes: 16 additions & 0 deletions llvm/test/CodeGen/AArch64/fp-intrinsics-fp16.ll
Original file line number Diff line number Diff line change
Expand Up @@ -398,6 +398,22 @@ define half @atan_f16(half %x) #0 {
ret half %val
}

define half @atan2_f16(half %x, half %y) #0 {
; CHECK-LABEL: atan2_f16:
; CHECK: // %bb.0:
; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: .cfi_offset w30, -16
; CHECK-NEXT: fcvt s1, h1
; CHECK-NEXT: fcvt s0, h0
; CHECK-NEXT: bl atan2f
; CHECK-NEXT: fcvt h0, s0
; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
%val = call half @llvm.experimental.constrained.atan2.f16(half %x, half %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
ret half %val
}

define half @sinh_f16(half %x) #0 {
; CHECK-LABEL: sinh_f16:
; CHECK: // %bb.0:
Expand Down
31 changes: 31 additions & 0 deletions llvm/test/CodeGen/AArch64/fp-intrinsics.ll
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,13 @@ define float @atan_f32(float %x) #0 {
ret float %val
}

; CHECK-LABEL: atan2_f32:
; CHECK: bl atan2f
define float @atan2_f32(float %x, float %y) #0 {
%val = call float @llvm.experimental.constrained.atan2.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
ret float %val
}

; CHECK-LABEL: sinh_f32:
; CHECK: bl sinhf
define float @sinh_f32(float %x) #0 {
Expand Down Expand Up @@ -707,6 +714,13 @@ define double @atan_f64(double %x) #0 {
ret double %val
}

; CHECK-LABEL: atan2_f64:
; CHECK: bl atan2
define double @atan2_f64(double %x, double %y) #0 {
%val = call double @llvm.experimental.constrained.atan2.f64(double %x, double %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
ret double %val
}

; CHECK-LABEL: sinh_f64:
; CHECK: bl sinh
define double @sinh_f64(double %x) #0 {
Expand Down Expand Up @@ -1240,6 +1254,13 @@ define fp128 @atan_f128(fp128 %x) #0 {
ret fp128 %val
}

; CHECK-LABEL: atan2_f128:
; CHECK: bl atan2l
define fp128 @atan2_f128(fp128 %x, fp128 %y) #0 {
%val = call fp128 @llvm.experimental.constrained.atan2.f128(fp128 %x, fp128 %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
ret fp128 %val
}

; CHECK-LABEL: sinh_f128:
; CHECK: bl sinhl
define fp128 @sinh_f128(fp128 %x) #0 {
Expand Down Expand Up @@ -1666,6 +1687,13 @@ define <1 x double> @atan_v1f64(<1 x double> %x, <1 x double> %y) #0 {
ret <1 x double> %val
}

; CHECK-LABEL: atan2_v1f64:
; CHECK: bl atan2
define <1 x double> @atan2_v1f64(<1 x double> %x, <1 x double> %y) #0 {
%val = call <1 x double> @llvm.experimental.constrained.atan2.v1f64(<1 x double> %x, <1 x double> %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
ret <1 x double> %val
}

; CHECK-LABEL: sinh_v1f64:
; CHECK: bl sinh
define <1 x double> @sinh_v1f64(<1 x double> %x, <1 x double> %y) #0 {
Expand Down Expand Up @@ -1755,6 +1783,7 @@ declare float @llvm.experimental.constrained.tan.f32(float, metadata, metadata)
declare float @llvm.experimental.constrained.asin.f32(float, metadata, metadata)
declare float @llvm.experimental.constrained.acos.f32(float, metadata, metadata)
declare float @llvm.experimental.constrained.atan.f32(float, metadata, metadata)
declare float @llvm.experimental.constrained.atan2.f32(float, float, metadata, metadata)
declare float @llvm.experimental.constrained.sinh.f32(float, metadata, metadata)
declare float @llvm.experimental.constrained.cosh.f32(float, metadata, metadata)
declare float @llvm.experimental.constrained.tanh.f32(float, metadata, metadata)
Expand Down Expand Up @@ -1806,6 +1835,7 @@ declare double @llvm.experimental.constrained.tan.f64(double, metadata, metadata
declare double @llvm.experimental.constrained.asin.f64(double, metadata, metadata)
declare double @llvm.experimental.constrained.acos.f64(double, metadata, metadata)
declare double @llvm.experimental.constrained.atan.f64(double, metadata, metadata)
declare double @llvm.experimental.constrained.atan2.f64(double, double, metadata, metadata)
declare double @llvm.experimental.constrained.sinh.f64(double, metadata, metadata)
declare double @llvm.experimental.constrained.cosh.f64(double, metadata, metadata)
declare double @llvm.experimental.constrained.tanh.f64(double, metadata, metadata)
Expand Down Expand Up @@ -1857,6 +1887,7 @@ declare fp128 @llvm.experimental.constrained.tan.f128(fp128, metadata, metadata)
declare fp128 @llvm.experimental.constrained.asin.f128(fp128, metadata, metadata)
declare fp128 @llvm.experimental.constrained.acos.f128(fp128, metadata, metadata)
declare fp128 @llvm.experimental.constrained.atan.f128(fp128, metadata, metadata)
declare fp128 @llvm.experimental.constrained.atan2.f128(fp128, fp128, metadata, metadata)
declare fp128 @llvm.experimental.constrained.sinh.f128(fp128, metadata, metadata)
declare fp128 @llvm.experimental.constrained.cosh.f128(fp128, metadata, metadata)
declare fp128 @llvm.experimental.constrained.tanh.f128(fp128, metadata, metadata)
Expand Down
21 changes: 21 additions & 0 deletions llvm/test/CodeGen/AArch64/illegal-float-ops.ll
Original file line number Diff line number Diff line change
Expand Up @@ -243,6 +243,27 @@ define void @test_atan(float %float, double %double, fp128 %fp128) {
ret void
}

declare float @llvm.atan2.f32(float, float)
declare double @llvm.atan2.f64(double, double)
declare fp128 @llvm.atan2.f128(fp128, fp128)

define void @test_atan2(float %float1, double %double1, fp128 %fp1281, float %float2, double %double2, fp128 %fp1282) {
; CHECK-LABEL: test_atan2:

%atan2float = call float @llvm.atan2.f32(float %float1, float %float2)
store float %atan2float, ptr @varfloat
; CHECK: bl atan2f

%atan2double = call double @llvm.atan2.f64(double %double1, double %double2)
store double %atan2double, ptr @vardouble
; CHECK: bl atan2

%atan2fp128 = call fp128 @llvm.atan2.f128(fp128 %fp1281, fp128 %fp1282)
store fp128 %atan2fp128, ptr @varfp128
; CHECK: bl atan2l
ret void
}

declare float @llvm.cosh.f32(float)
declare double @llvm.cosh.f64(double)
declare fp128 @llvm.cosh.f128(fp128)
Expand Down
47 changes: 46 additions & 1 deletion llvm/test/CodeGen/AArch64/replace-with-veclib-armpl.ll
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ declare <vscale x 2 x double> @llvm.cos.nxv2f64(<vscale x 2 x double>)
declare <vscale x 4 x float> @llvm.cos.nxv4f32(<vscale x 4 x float>)

;.
; CHECK: @llvm.compiler.used = appending global [64 x ptr] [ptr @armpl_vcosq_f64, ptr @armpl_vcosq_f32, ptr @armpl_svcos_f64_x, ptr @armpl_svcos_f32_x, ptr @armpl_vexpq_f64, ptr @armpl_vexpq_f32, ptr @armpl_svexp_f64_x, ptr @armpl_svexp_f32_x, ptr @armpl_vexp10q_f64, ptr @armpl_vexp10q_f32, ptr @armpl_svexp10_f64_x, ptr @armpl_svexp10_f32_x, ptr @armpl_vexp2q_f64, ptr @armpl_vexp2q_f32, ptr @armpl_svexp2_f64_x, ptr @armpl_svexp2_f32_x, ptr @armpl_vlogq_f64, ptr @armpl_vlogq_f32, ptr @armpl_svlog_f64_x, ptr @armpl_svlog_f32_x, ptr @armpl_vlog10q_f64, ptr @armpl_vlog10q_f32, ptr @armpl_svlog10_f64_x, ptr @armpl_svlog10_f32_x, ptr @armpl_vlog2q_f64, ptr @armpl_vlog2q_f32, ptr @armpl_svlog2_f64_x, ptr @armpl_svlog2_f32_x, ptr @armpl_vpowq_f64, ptr @armpl_vpowq_f32, ptr @armpl_svpow_f64_x, ptr @armpl_svpow_f32_x, ptr @armpl_vsinq_f64, ptr @armpl_vsinq_f32, ptr @armpl_svsin_f64_x, ptr @armpl_svsin_f32_x, ptr @armpl_vtanq_f64, ptr @armpl_vtanq_f32, ptr @armpl_svtan_f64_x, ptr @armpl_svtan_f32_x, ptr @armpl_vacosq_f64, ptr @armpl_vacosq_f32, ptr @armpl_svacos_f64_x, ptr @armpl_svacos_f32_x, ptr @armpl_vasinq_f64, ptr @armpl_vasinq_f32, ptr @armpl_svasin_f64_x, ptr @armpl_svasin_f32_x, ptr @armpl_vatanq_f64, ptr @armpl_vatanq_f32, ptr @armpl_svatan_f64_x, ptr @armpl_svatan_f32_x, ptr @armpl_vcoshq_f64, ptr @armpl_vcoshq_f32, ptr @armpl_svcosh_f64_x, ptr @armpl_svcosh_f32_x, ptr @armpl_vsinhq_f64, ptr @armpl_vsinhq_f32, ptr @armpl_svsinh_f64_x, ptr @armpl_svsinh_f32_x, ptr @armpl_vtanhq_f64, ptr @armpl_vtanhq_f32, ptr @armpl_svtanh_f64_x, ptr @armpl_svtanh_f32_x], section "llvm.metadata"
; CHECK: @llvm.compiler.used = appending global [68 x ptr] [ptr @armpl_vcosq_f64, ptr @armpl_vcosq_f32, ptr @armpl_svcos_f64_x, ptr @armpl_svcos_f32_x, ptr @armpl_vexpq_f64, ptr @armpl_vexpq_f32, ptr @armpl_svexp_f64_x, ptr @armpl_svexp_f32_x, ptr @armpl_vexp10q_f64, ptr @armpl_vexp10q_f32, ptr @armpl_svexp10_f64_x, ptr @armpl_svexp10_f32_x, ptr @armpl_vexp2q_f64, ptr @armpl_vexp2q_f32, ptr @armpl_svexp2_f64_x, ptr @armpl_svexp2_f32_x, ptr @armpl_vlogq_f64, ptr @armpl_vlogq_f32, ptr @armpl_svlog_f64_x, ptr @armpl_svlog_f32_x, ptr @armpl_vlog10q_f64, ptr @armpl_vlog10q_f32, ptr @armpl_svlog10_f64_x, ptr @armpl_svlog10_f32_x, ptr @armpl_vlog2q_f64, ptr @armpl_vlog2q_f32, ptr @armpl_svlog2_f64_x, ptr @armpl_svlog2_f32_x, ptr @armpl_vpowq_f64, ptr @armpl_vpowq_f32, ptr @armpl_svpow_f64_x, ptr @armpl_svpow_f32_x, ptr @armpl_vsinq_f64, ptr @armpl_vsinq_f32, ptr @armpl_svsin_f64_x, ptr @armpl_svsin_f32_x, ptr @armpl_vtanq_f64, ptr @armpl_vtanq_f32, ptr @armpl_svtan_f64_x, ptr @armpl_svtan_f32_x, ptr @armpl_vacosq_f64, ptr @armpl_vacosq_f32, ptr @armpl_svacos_f64_x, ptr @armpl_svacos_f32_x, ptr @armpl_vasinq_f64, ptr @armpl_vasinq_f32, ptr @armpl_svasin_f64_x, ptr @armpl_svasin_f32_x, ptr @armpl_vatanq_f64, ptr @armpl_vatanq_f32, ptr @armpl_svatan_f64_x, ptr @armpl_svatan_f32_x, ptr @armpl_vatan2q_f64, ptr @armpl_vatan2q_f32, ptr @armpl_svatan2_f64_x, ptr @armpl_svatan2_f32_x, ptr @armpl_vcoshq_f64, ptr @armpl_vcoshq_f32, ptr @armpl_svcosh_f64_x, ptr @armpl_svcosh_f32_x, ptr @armpl_vsinhq_f64, ptr @armpl_vsinhq_f32, ptr @armpl_svsinh_f64_x, ptr @armpl_svsinh_f32_x, ptr @armpl_vtanhq_f64, ptr @armpl_vtanhq_f32, ptr @armpl_svtanh_f64_x, ptr @armpl_svtanh_f32_x], section "llvm.metadata"

;.
define <2 x double> @llvm_cos_f64(<2 x double> %in) {
Expand Down Expand Up @@ -598,6 +598,51 @@ define <vscale x 4 x float> @llvm_atan_vscale_f32(<vscale x 4 x float> %in) #0 {
ret <vscale x 4 x float> %1
}

declare <2 x double> @llvm.atan2.v2f64(<2 x double>, <2 x double>)
declare <4 x float> @llvm.atan2.v4f32(<4 x float>, <4 x float>)
declare <vscale x 2 x double> @llvm.atan2.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>)
declare <vscale x 4 x float> @llvm.atan2.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>)

define <2 x double> @llvm_atan2_f64(<2 x double> %in1, <2 x double> %in2) {
; CHECK-LABEL: define <2 x double> @llvm_atan2_f64
; CHECK-SAME: (<2 x double> [[IN1:%.*]], <2 x double> [[IN2:%.*]]) {
; CHECK-NEXT: [[TMP1:%.*]] = call fast <2 x double> @armpl_vatan2q_f64(<2 x double> [[IN1]], <2 x double> [[IN2]])
; CHECK-NEXT: ret <2 x double> [[TMP1]]
;
%1 = call fast <2 x double> @llvm.atan2.v2f64(<2 x double> %in1, <2 x double> %in2)
ret <2 x double> %1
}

define <4 x float> @llvm_atan2_f32(<4 x float> %in1, <4 x float> %in2) {
; CHECK-LABEL: define <4 x float> @llvm_atan2_f32
; CHECK-SAME: (<4 x float> [[IN1:%.*]], <4 x float> [[IN2:%.*]]) {
; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @armpl_vatan2q_f32(<4 x float> [[IN1]], <4 x float> [[IN2]])
; CHECK-NEXT: ret <4 x float> [[TMP1]]
;
%1 = call fast <4 x float> @llvm.atan2.v4f32(<4 x float> %in1, <4 x float> %in2)
ret <4 x float> %1
}

define <vscale x 2 x double> @llvm_atan2_vscale_f64(<vscale x 2 x double> %in1, <vscale x 2 x double> %in2) #0 {
; CHECK-LABEL: define <vscale x 2 x double> @llvm_atan2_vscale_f64
; CHECK-SAME: (<vscale x 2 x double> [[IN1:%.*]], <vscale x 2 x double> [[IN2:%.*]]) #[[ATTR1]] {
; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @armpl_svatan2_f64_x(<vscale x 2 x double> [[IN1]], <vscale x 2 x double> [[IN2]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer))
; CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]]
;
%1 = call fast <vscale x 2 x double> @llvm.atan2.nxv2f64(<vscale x 2 x double> %in1, <vscale x 2 x double> %in2)
ret <vscale x 2 x double> %1
}

define <vscale x 4 x float> @llvm_atan2_vscale_f32(<vscale x 4 x float> %in1, <vscale x 4 x float> %in2) #0 {
; CHECK-LABEL: define <vscale x 4 x float> @llvm_atan2_vscale_f32
; CHECK-SAME: (<vscale x 4 x float> [[IN1:%.*]], <vscale x 4 x float> [[IN2:%.*]]) #[[ATTR1]] {
; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @armpl_svatan2_f32_x(<vscale x 4 x float> [[IN1]], <vscale x 4 x float> [[IN2]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
; CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]]
;
%1 = call fast <vscale x 4 x float> @llvm.atan2.nxv4f32(<vscale x 4 x float> %in1, <vscale x 4 x float> %in2)
ret <vscale x 4 x float> %1
}

declare <2 x double> @llvm.cosh.v2f64(<2 x double>)
declare <4 x float> @llvm.cosh.v4f32(<4 x float>)
declare <vscale x 2 x double> @llvm.cosh.nxv2f64(<vscale x 2 x double>)
Expand Down
20 changes: 19 additions & 1 deletion llvm/test/CodeGen/AArch64/replace-with-veclib-sleef-scalable.ll
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
target triple = "aarch64-unknown-linux-gnu"

;.
; CHECK: @llvm.compiler.used = appending global [32 x ptr] [ptr @_ZGVsMxv_cos, ptr @_ZGVsMxv_cosf, ptr @_ZGVsMxv_exp, ptr @_ZGVsMxv_expf, ptr @_ZGVsMxv_exp10, ptr @_ZGVsMxv_exp10f, ptr @_ZGVsMxv_exp2, ptr @_ZGVsMxv_exp2f, ptr @_ZGVsMxv_log, ptr @_ZGVsMxv_logf, ptr @_ZGVsMxv_log10, ptr @_ZGVsMxv_log10f, ptr @_ZGVsMxv_log2, ptr @_ZGVsMxv_log2f, ptr @_ZGVsMxvv_pow, ptr @_ZGVsMxvv_powf, ptr @_ZGVsMxv_sin, ptr @_ZGVsMxv_sinf, ptr @_ZGVsMxv_tan, ptr @_ZGVsMxv_tanf, ptr @_ZGVsMxv_acos, ptr @_ZGVsMxv_acosf, ptr @_ZGVsMxv_asin, ptr @_ZGVsMxv_asinf, ptr @_ZGVsMxv_atan, ptr @_ZGVsMxv_atanf, ptr @_ZGVsMxv_cosh, ptr @_ZGVsMxv_coshf, ptr @_ZGVsMxv_sinh, ptr @_ZGVsMxv_sinhf, ptr @_ZGVsMxv_tanh, ptr @_ZGVsMxv_tanhf], section "llvm.metadata"
; CHECK: @llvm.compiler.used = appending global [34 x ptr] [ptr @_ZGVsMxv_cos, ptr @_ZGVsMxv_cosf, ptr @_ZGVsMxv_exp, ptr @_ZGVsMxv_expf, ptr @_ZGVsMxv_exp10, ptr @_ZGVsMxv_exp10f, ptr @_ZGVsMxv_exp2, ptr @_ZGVsMxv_exp2f, ptr @_ZGVsMxv_log, ptr @_ZGVsMxv_logf, ptr @_ZGVsMxv_log10, ptr @_ZGVsMxv_log10f, ptr @_ZGVsMxv_log2, ptr @_ZGVsMxv_log2f, ptr @_ZGVsMxvv_pow, ptr @_ZGVsMxvv_powf, ptr @_ZGVsMxv_sin, ptr @_ZGVsMxv_sinf, ptr @_ZGVsMxv_tan, ptr @_ZGVsMxv_tanf, ptr @_ZGVsMxv_acos, ptr @_ZGVsMxv_acosf, ptr @_ZGVsMxv_asin, ptr @_ZGVsMxv_asinf, ptr @_ZGVsMxv_atan, ptr @_ZGVsMxv_atanf, ptr @_ZGVsMxvv_atan2, ptr @_ZGVsMxvv_atan2f, ptr @_ZGVsMxv_cosh, ptr @_ZGVsMxv_coshf, ptr @_ZGVsMxv_sinh, ptr @_ZGVsMxv_sinhf, ptr @_ZGVsMxv_tanh, ptr @_ZGVsMxv_tanhf], section "llvm.metadata"
;.
define <vscale x 2 x double> @llvm_ceil_vscale_f64(<vscale x 2 x double> %in) {
; CHECK-LABEL: @llvm_ceil_vscale_f64(
Expand Down Expand Up @@ -438,6 +438,24 @@ define <vscale x 4 x float> @llvm_atan_vscale_f32(<vscale x 4 x float> %in) {
ret <vscale x 4 x float> %1
}

define <vscale x 2 x double> @llvm_atan2_vscale_f64(<vscale x 2 x double> %x, <vscale x 2 x double> %y) {
; CHECK-LABEL: @llvm_atan2_vscale_f64(
; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @_ZGVsMxvv_atan2(<vscale x 2 x double> [[INX:%.*]], <vscale x 2 x double> [[INY:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer))
; CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]]
;
%1 = call fast <vscale x 2 x double> @llvm.atan2.nxv2f64(<vscale x 2 x double> %x, <vscale x 2 x double> %y)
ret <vscale x 2 x double> %1
}

define <vscale x 4 x float> @llvm_atan2_vscale_f32(<vscale x 4 x float> %x, <vscale x 4 x float> %y) {
; CHECK-LABEL: @llvm_atan2_vscale_f32(
; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @_ZGVsMxvv_atan2f(<vscale x 4 x float> [[INX:%.*]], <vscale x 4 x float> [[INY:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
; CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]]
;
%1 = call fast <vscale x 4 x float> @llvm.atan2.nxv4f32(<vscale x 4 x float> %x, <vscale x 4 x float> %y)
ret <vscale x 4 x float> %1
}

define <vscale x 2 x double> @llvm_cosh_vscale_f64(<vscale x 2 x double> %in) {
; CHECK-LABEL: @llvm_cosh_vscale_f64(
; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @_ZGVsMxv_cosh(<vscale x 2 x double> [[IN:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer))
Expand Down
20 changes: 19 additions & 1 deletion llvm/test/CodeGen/AArch64/replace-with-veclib-sleef.ll
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
target triple = "aarch64-unknown-linux-gnu"

;.
; CHECK: @llvm.compiler.used = appending global [32 x ptr] [ptr @_ZGVnN2v_cos, ptr @_ZGVnN4v_cosf, ptr @_ZGVnN2v_exp, ptr @_ZGVnN4v_expf, ptr @_ZGVnN2v_exp10, ptr @_ZGVnN4v_exp10f, ptr @_ZGVnN2v_exp2, ptr @_ZGVnN4v_exp2f, ptr @_ZGVnN2v_log, ptr @_ZGVnN4v_logf, ptr @_ZGVnN2v_log10, ptr @_ZGVnN4v_log10f, ptr @_ZGVnN2v_log2, ptr @_ZGVnN4v_log2f, ptr @_ZGVnN2vv_pow, ptr @_ZGVnN4vv_powf, ptr @_ZGVnN2v_sin, ptr @_ZGVnN4v_sinf, ptr @_ZGVnN2v_tan, ptr @_ZGVnN4v_tanf, ptr @_ZGVnN2v_acos, ptr @_ZGVnN4v_acosf, ptr @_ZGVnN2v_asin, ptr @_ZGVnN4v_asinf, ptr @_ZGVnN2v_atan, ptr @_ZGVnN4v_atanf, ptr @_ZGVnN2v_cosh, ptr @_ZGVnN4v_coshf, ptr @_ZGVnN2v_sinh, ptr @_ZGVnN4v_sinhf, ptr @_ZGVnN2v_tanh, ptr @_ZGVnN4v_tanhf], section "llvm.metadata"
; CHECK: @llvm.compiler.used = appending global [34 x ptr] [ptr @_ZGVnN2v_cos, ptr @_ZGVnN4v_cosf, ptr @_ZGVnN2v_exp, ptr @_ZGVnN4v_expf, ptr @_ZGVnN2v_exp10, ptr @_ZGVnN4v_exp10f, ptr @_ZGVnN2v_exp2, ptr @_ZGVnN4v_exp2f, ptr @_ZGVnN2v_log, ptr @_ZGVnN4v_logf, ptr @_ZGVnN2v_log10, ptr @_ZGVnN4v_log10f, ptr @_ZGVnN2v_log2, ptr @_ZGVnN4v_log2f, ptr @_ZGVnN2vv_pow, ptr @_ZGVnN4vv_powf, ptr @_ZGVnN2v_sin, ptr @_ZGVnN4v_sinf, ptr @_ZGVnN2v_tan, ptr @_ZGVnN4v_tanf, ptr @_ZGVnN2v_acos, ptr @_ZGVnN4v_acosf, ptr @_ZGVnN2v_asin, ptr @_ZGVnN4v_asinf, ptr @_ZGVnN2v_atan, ptr @_ZGVnN4v_atanf, ptr @_ZGVnN2vv_atan2, ptr @_ZGVnN4vv_atan2f, ptr @_ZGVnN2v_cosh, ptr @_ZGVnN4v_coshf, ptr @_ZGVnN2v_sinh, ptr @_ZGVnN4v_sinhf, ptr @_ZGVnN2v_tanh, ptr @_ZGVnN4v_tanhf], section "llvm.metadata"
;.
define <2 x double> @llvm_ceil_f64(<2 x double> %in) {
; CHECK-LABEL: @llvm_ceil_f64(
Expand Down Expand Up @@ -438,6 +438,24 @@ define <4 x float> @llvm_atan_f32(<4 x float> %in) {
ret <4 x float> %1
}

define <2 x double> @llvm_atan2_f64(<2 x double> %x, <2 x double> %y) {
; CHECK-LABEL: @llvm_atan2_f64(
; CHECK-NEXT: [[TMP1:%.*]] = call fast <2 x double> @_ZGVnN2vv_atan2(<2 x double> [[INX:%.*]], <2 x double> [[INY:%.*]])
; CHECK-NEXT: ret <2 x double> [[TMP1]]
;
%1 = call fast <2 x double> @llvm.atan2.v2f64(<2 x double> %x, <2 x double> %y)
ret <2 x double> %1
}

define <4 x float> @llvm_atan2_f32(<4 x float> %x, <4 x float> %y) {
; CHECK-LABEL: @llvm_atan2_f32(
; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @_ZGVnN4vv_atan2f(<4 x float> [[INX:%.*]], <4 x float> [[INY:%.*]])
; CHECK-NEXT: ret <4 x float> [[TMP1]]
;
%1 = call fast <4 x float> @llvm.atan2.v4f32(<4 x float> %x, <4 x float> %y)
ret <4 x float> %1
}

define <2 x double> @llvm_cosh_f64(<2 x double> %in) {
; CHECK-LABEL: @llvm_cosh_f64(
; CHECK-NEXT: [[TMP1:%.*]] = call fast <2 x double> @_ZGVnN2v_cosh(<2 x double> [[IN:%.*]])
Expand Down
35 changes: 35 additions & 0 deletions llvm/test/CodeGen/AArch64/vec-libcalls.ll
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ declare <3 x float> @llvm.tan.v3f32(<3 x float>)
declare <3 x float> @llvm.asin.v3f32(<3 x float>)
declare <3 x float> @llvm.acos.v3f32(<3 x float>)
declare <3 x float> @llvm.atan.v3f32(<3 x float>)
declare <3 x float> @llvm.atan2.v3f32(<3 x float>, <3 x float>)
declare <3 x float> @llvm.sinh.v3f32(<3 x float>)
declare <3 x float> @llvm.cosh.v3f32(<3 x float>)
declare <3 x float> @llvm.tanh.v3f32(<3 x float>)
Expand Down Expand Up @@ -428,6 +429,40 @@ define <3 x float> @atan_v3f32(<3 x float> %x) nounwind {
ret <3 x float> %r
}

define <3 x float> @atan2_v3f32(<3 x float> %x, <3 x float> %y) nounwind {
; CHECK-LABEL: atan2_v3f32:
; CHECK: // %bb.0:
; CHECK-NEXT: sub sp, sp, #64
; CHECK-NEXT: stp q0, q1, [sp, #16] // 32-byte Folded Spill
; CHECK-NEXT: mov s0, v0.s[1]
; CHECK-NEXT: mov s1, v1.s[1]
; CHECK-NEXT: str x30, [sp, #48] // 8-byte Folded Spill
; CHECK-NEXT: bl atan2f
; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0
; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
; CHECK-NEXT: ldp q0, q1, [sp, #16] // 32-byte Folded Reload
; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0
; CHECK-NEXT: // kill: def $s1 killed $s1 killed $q1
; CHECK-NEXT: bl atan2f
; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload
; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0
; CHECK-NEXT: mov v0.s[1], v1.s[0]
; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
; CHECK-NEXT: ldp q0, q1, [sp, #16] // 32-byte Folded Reload
; CHECK-NEXT: mov s0, v0.s[2]
; CHECK-NEXT: mov s1, v1.s[2]
; CHECK-NEXT: bl atan2f
; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload
; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0
; CHECK-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload
; CHECK-NEXT: mov v1.s[2], v0.s[0]
; CHECK-NEXT: mov v0.16b, v1.16b
; CHECK-NEXT: add sp, sp, #64
; CHECK-NEXT: ret
%r = call <3 x float> @llvm.atan2.v3f32(<3 x float> %x, <3 x float> %y)
ret <3 x float> %r
}

define <3 x float> @sinh_v3f32(<3 x float> %x) nounwind {
; CHECK-LABEL: sinh_v3f32:
; CHECK: // %bb.0:
Expand Down
163 changes: 163 additions & 0 deletions llvm/test/CodeGen/X86/movrs-avx10.2-512-intrinsics.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-- -mattr=+movrs,+avx10.2-512 -verify-machineinstrs --show-mc-encoding | FileCheck %s --check-prefixes=CHECK

declare <64 x i8> @llvm.x86.avx10.vmovrsb512(ptr)
declare <16 x i32> @llvm.x86.avx10.vmovrsd512(ptr)
declare <8 x i64> @llvm.x86.avx10.vmovrsq512(ptr)
declare <32 x i16> @llvm.x86.avx10.vmovrsw512(ptr)

define <8 x i64> @test_mm512_movrsb_epi8(ptr %__A) {
; CHECK-LABEL: test_mm512_movrsb_epi8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vmovrsb (%rdi), %zmm0 # encoding: [0x62,0xf5,0x7f,0x48,0x6f,0x07]
; CHECK-NEXT: retq # encoding: [0xc3]
entry:
%0 = tail call <64 x i8> @llvm.x86.avx10.vmovrsb512(ptr %__A)
%1 = bitcast <64 x i8> %0 to <8 x i64>
ret <8 x i64> %1
}

define <8 x i64> @test_mm512_mask_movrsb_epi8(<8 x i64> %__A, i64 %__B, ptr %__C) {
; CHECK-LABEL: test_mm512_mask_movrsb_epi8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
; CHECK-NEXT: vmovrsb (%rsi), %zmm0 {%k1} # encoding: [0x62,0xf5,0x7f,0x49,0x6f,0x06]
; CHECK-NEXT: retq # encoding: [0xc3]
entry:
%0 = tail call <64 x i8> @llvm.x86.avx10.vmovrsb512(ptr %__C)
%1 = bitcast <8 x i64> %__A to <64 x i8>
%2 = bitcast i64 %__B to <64 x i1>
%3 = select <64 x i1> %2, <64 x i8> %0, <64 x i8> %1
%4 = bitcast <64 x i8> %3 to <8 x i64>
ret <8 x i64> %4
}

define dso_local <8 x i64> @test_mm512_maskz_movrsb_epi8(i64 %__A, ptr %__B) {
; CHECK-LABEL: test_mm512_maskz_movrsb_epi8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
; CHECK-NEXT: vmovrsb (%rsi), %zmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7f,0xc9,0x6f,0x06]
; CHECK-NEXT: retq # encoding: [0xc3]
entry:
%0 = tail call <64 x i8> @llvm.x86.avx10.vmovrsb512(ptr %__B)
%1 = bitcast i64 %__A to <64 x i1>
%2 = select <64 x i1> %1, <64 x i8> %0, <64 x i8> zeroinitializer
%3 = bitcast <64 x i8> %2 to <8 x i64>
ret <8 x i64> %3
}

define <8 x i64> @test_mm512_movrsd_epi32(ptr %__A) {
; CHECK-LABEL: test_mm512_movrsd_epi32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vmovrsd (%rdi), %zmm0 # encoding: [0x62,0xf5,0x7e,0x48,0x6f,0x07]
; CHECK-NEXT: retq # encoding: [0xc3]
entry:
%0 = tail call <16 x i32> @llvm.x86.avx10.vmovrsd512(ptr %__A)
%1 = bitcast <16 x i32> %0 to <8 x i64>
ret <8 x i64> %1
}

define <8 x i64> @test_mm512_mask_movrsd_epi32(<8 x i64> %__A, i16 zeroext %__B, ptr %__C) {
; CHECK-LABEL: test_mm512_mask_movrsd_epi32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
; CHECK-NEXT: vmovrsd (%rsi), %zmm0 {%k1} # encoding: [0x62,0xf5,0x7e,0x49,0x6f,0x06]
; CHECK-NEXT: retq # encoding: [0xc3]
entry:
%0 = tail call <16 x i32> @llvm.x86.avx10.vmovrsd512(ptr %__C)
%1 = bitcast <8 x i64> %__A to <16 x i32>
%2 = bitcast i16 %__B to <16 x i1>
%3 = select <16 x i1> %2, <16 x i32> %0, <16 x i32> %1
%4 = bitcast <16 x i32> %3 to <8 x i64>
ret <8 x i64> %4
}

define <8 x i64> @test_mm512_maskz_movrsd_epi32(i16 zeroext %__A, ptr %__B) {
; CHECK-LABEL: test_mm512_maskz_movrsd_epi32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
; CHECK-NEXT: vmovrsd (%rsi), %zmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7e,0xc9,0x6f,0x06]
; CHECK-NEXT: retq # encoding: [0xc3]
entry:
%0 = tail call <16 x i32> @llvm.x86.avx10.vmovrsd512(ptr %__B)
%1 = bitcast i16 %__A to <16 x i1>
%2 = select <16 x i1> %1, <16 x i32> %0, <16 x i32> zeroinitializer
%3 = bitcast <16 x i32> %2 to <8 x i64>
ret <8 x i64> %3
}

define <8 x i64> @test_mm512_movrsq_epi64(ptr %__A) {
; CHECK-LABEL: test_mm512_movrsq_epi64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vmovrsq (%rdi), %zmm0 # encoding: [0x62,0xf5,0xfe,0x48,0x6f,0x07]
; CHECK-NEXT: retq # encoding: [0xc3]
entry:
%0 = tail call <8 x i64> @llvm.x86.avx10.vmovrsq512(ptr %__A)
ret <8 x i64> %0
}

define <8 x i64> @test_mm512_mask_movrsq_epi64(<8 x i64> %__A, i8 zeroext %__B, ptr %__C) {
; CHECK-LABEL: test_mm512_mask_movrsq_epi64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
; CHECK-NEXT: vmovrsq (%rsi), %zmm0 {%k1} # encoding: [0x62,0xf5,0xfe,0x49,0x6f,0x06]
; CHECK-NEXT: retq # encoding: [0xc3]
entry:
%0 = tail call <8 x i64> @llvm.x86.avx10.vmovrsq512(ptr %__C)
%1 = bitcast i8 %__B to <8 x i1>
%2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> %__A
ret <8 x i64> %2
}

define <8 x i64> @test_mm512_maskz_movrsq_epi64(i8 zeroext %__A, ptr %__B) {
; CHECK-LABEL: test_mm512_maskz_movrsq_epi64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
; CHECK-NEXT: vmovrsq (%rsi), %zmm0 {%k1} {z} # encoding: [0x62,0xf5,0xfe,0xc9,0x6f,0x06]
; CHECK-NEXT: retq # encoding: [0xc3]
entry:
%0 = tail call <8 x i64> @llvm.x86.avx10.vmovrsq512(ptr %__B)
%1 = bitcast i8 %__A to <8 x i1>
%2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> zeroinitializer
ret <8 x i64> %2
}

define <8 x i64> @test_mm512_movrsw_epi16(ptr %__A) {
; CHECK-LABEL: test_mm512_movrsw_epi16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vmovrsw (%rdi), %zmm0 # encoding: [0x62,0xf5,0xff,0x48,0x6f,0x07]
; CHECK-NEXT: retq # encoding: [0xc3]
entry:
%0 = tail call <32 x i16> @llvm.x86.avx10.vmovrsw512(ptr %__A)
%1 = bitcast <32 x i16> %0 to <8 x i64>
ret <8 x i64> %1
}

define <8 x i64> @test_mm512_mask_movrsw_epi16(<8 x i64> %__A, i32 %__B, ptr %__C) {
; CHECK-LABEL: test_mm512_mask_movrsw_epi16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
; CHECK-NEXT: vmovrsw (%rsi), %zmm0 {%k1} # encoding: [0x62,0xf5,0xff,0x49,0x6f,0x06]
; CHECK-NEXT: retq # encoding: [0xc3]
entry:
%0 = tail call <32 x i16> @llvm.x86.avx10.vmovrsw512(ptr %__C)
%1 = bitcast <8 x i64> %__A to <32 x i16>
%2 = bitcast i32 %__B to <32 x i1>
%3 = select <32 x i1> %2, <32 x i16> %0, <32 x i16> %1
%4 = bitcast <32 x i16> %3 to <8 x i64>
ret <8 x i64> %4
}

define <8 x i64> @test_mm512_maskz_movrsw_epi16(i32 %__A, ptr %__B) {
; CHECK-LABEL: test_mm512_maskz_movrsw_epi16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
; CHECK-NEXT: vmovrsw (%rsi), %zmm0 {%k1} {z} # encoding: [0x62,0xf5,0xff,0xc9,0x6f,0x06]
; CHECK-NEXT: retq # encoding: [0xc3]
entry:
%0 = tail call <32 x i16> @llvm.x86.avx10.vmovrsw512(ptr %__B)
%1 = bitcast i32 %__A to <32 x i1>
%2 = select <32 x i1> %1, <32 x i16> %0, <32 x i16> zeroinitializer
%3 = bitcast <32 x i16> %2 to <8 x i64>
ret <8 x i64> %3
}
329 changes: 329 additions & 0 deletions llvm/test/CodeGen/X86/movrs-avx10.2-intrinsics.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,329 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-- -mattr=+movrs,+avx10.2-256 -verify-machineinstrs --show-mc-encoding | FileCheck %s --check-prefixes=CHECK

define <2 x i64> @test_mm_movrsb_epu8(ptr %__A) {
; CHECK-LABEL: test_mm_movrsb_epu8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vmovrsb (%rdi), %xmm0 # encoding: [0x62,0xf5,0x7f,0x08,0x6f,0x07]
; CHECK-NEXT: retq # encoding: [0xc3]
entry:
%0 = tail call <16 x i8> @llvm.x86.avx10.vmovrsb128(ptr %__A)
%1 = bitcast <16 x i8> %0 to <2 x i64>
ret <2 x i64> %1
}

define <2 x i64> @test_mm_mask_movrsb_epu8(<2 x i64> %__A, i16 zeroext %__B, ptr %__C) {
; CHECK-LABEL: test_mm_mask_movrsb_epu8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
; CHECK-NEXT: vmovrsb (%rsi), %xmm0 {%k1} # encoding: [0x62,0xf5,0x7f,0x09,0x6f,0x06]
; CHECK-NEXT: retq # encoding: [0xc3]
entry:
%0 = tail call <16 x i8> @llvm.x86.avx10.vmovrsb128(ptr %__C)
%1 = bitcast <2 x i64> %__A to <16 x i8>
%2 = bitcast i16 %__B to <16 x i1>
%3 = select <16 x i1> %2, <16 x i8> %0, <16 x i8> %1
%4 = bitcast <16 x i8> %3 to <2 x i64>
ret <2 x i64> %4
}

define <2 x i64> @test_mm_maskz_movrsb_epu8(i16 zeroext %__A, ptr %__B) {
; CHECK-LABEL: test_mm_maskz_movrsb_epu8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
; CHECK-NEXT: vmovrsb (%rsi), %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7f,0x89,0x6f,0x06]
; CHECK-NEXT: retq # encoding: [0xc3]
entry:
%0 = tail call <16 x i8> @llvm.x86.avx10.vmovrsb128(ptr %__B )
%1 = bitcast i16 %__A to <16 x i1>
%2 = select <16 x i1> %1, <16 x i8> %0, <16 x i8> zeroinitializer
%3 = bitcast <16 x i8> %2 to <2 x i64>
ret <2 x i64> %3
}

define <4 x i64> @test_mm256_movrsb_epu8(ptr %__A) {
; CHECK-LABEL: test_mm256_movrsb_epu8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vmovrsb (%rdi), %ymm0 # encoding: [0x62,0xf5,0x7f,0x28,0x6f,0x07]
; CHECK-NEXT: retq # encoding: [0xc3]
entry:
%0 = tail call <32 x i8> @llvm.x86.avx10.vmovrsb256(ptr %__A)
%1 = bitcast <32 x i8> %0 to <4 x i64>
ret <4 x i64> %1
}

define <4 x i64> @test_mm256_mask_movrsb_epu8(<4 x i64> %__A, i32 %__B, ptr %__C) {
; CHECK-LABEL: test_mm256_mask_movrsb_epu8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
; CHECK-NEXT: vmovrsb (%rsi), %ymm0 {%k1} # encoding: [0x62,0xf5,0x7f,0x29,0x6f,0x06]
; CHECK-NEXT: retq # encoding: [0xc3]
entry:
%0 = tail call <32 x i8> @llvm.x86.avx10.vmovrsb256(ptr %__C)
%1 = bitcast <4 x i64> %__A to <32 x i8>
%2 = bitcast i32 %__B to <32 x i1>
%3 = select <32 x i1> %2, <32 x i8> %0, <32 x i8> %1
%4 = bitcast <32 x i8> %3 to <4 x i64>
ret <4 x i64> %4
}

define <4 x i64> @test_mm256_maskz_movrsb_epu8(i32 %__A, ptr %__B) {
; CHECK-LABEL: test_mm256_maskz_movrsb_epu8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
; CHECK-NEXT: vmovrsb (%rsi), %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x7f,0xa9,0x6f,0x06]
; CHECK-NEXT: retq # encoding: [0xc3]
entry:
%0 = tail call <32 x i8> @llvm.x86.avx10.vmovrsb256(ptr %__B)
%1 = bitcast i32 %__A to <32 x i1>
%2 = select <32 x i1> %1, <32 x i8> %0, <32 x i8> zeroinitializer
%3 = bitcast <32 x i8> %2 to <4 x i64>
ret <4 x i64> %3
}

define <2 x i64> @test_mm_movrsd_epu32(ptr %__A) {
; CHECK-LABEL: test_mm_movrsd_epu32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vmovrsd (%rdi), %xmm0 # encoding: [0x62,0xf5,0x7e,0x08,0x6f,0x07]
; CHECK-NEXT: retq # encoding: [0xc3]
entry:
%0 = tail call <4 x i32> @llvm.x86.avx10.vmovrsd128(ptr %__A)
%1 = bitcast <4 x i32> %0 to <2 x i64>
ret <2 x i64> %1
}

define <2 x i64> @test_mm_mask_movrsd_epu32(<2 x i64> %__A, i8 zeroext %__B, ptr %__C) {
; CHECK-LABEL: test_mm_mask_movrsd_epu32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
; CHECK-NEXT: vmovrsd (%rsi), %xmm0 {%k1} # encoding: [0x62,0xf5,0x7e,0x09,0x6f,0x06]
; CHECK-NEXT: retq # encoding: [0xc3]
entry:
%0 = tail call <4 x i32> @llvm.x86.avx10.vmovrsd128(ptr %__C)
%1 = bitcast <2 x i64> %__A to <4 x i32>
%2 = bitcast i8 %__B to <8 x i1>
%extract.i = shufflevector <8 x i1> %2, <8 x i1> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%3 = select <4 x i1> %extract.i, <4 x i32> %0, <4 x i32> %1
%4 = bitcast <4 x i32> %3 to <2 x i64>
ret <2 x i64> %4
}

define <2 x i64> @test_mm_maskz_movrsd_epu32(i8 zeroext %__A, ptr %__B) {
; CHECK-LABEL: test_mm_maskz_movrsd_epu32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
; CHECK-NEXT: vmovrsd (%rsi), %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7e,0x89,0x6f,0x06]
; CHECK-NEXT: retq # encoding: [0xc3]
entry:
%0 = tail call <4 x i32> @llvm.x86.avx10.vmovrsd128(ptr %__B)
%1 = bitcast i8 %__A to <8 x i1>
%extract.i = shufflevector <8 x i1> %1, <8 x i1> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%2 = select <4 x i1> %extract.i, <4 x i32> %0, <4 x i32> zeroinitializer
%3 = bitcast <4 x i32> %2 to <2 x i64>
ret <2 x i64> %3
}

define <4 x i64> @test_mm256_movrsd_epu32(ptr %__A) {
; CHECK-LABEL: test_mm256_movrsd_epu32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vmovrsd (%rdi), %ymm0 # encoding: [0x62,0xf5,0x7e,0x28,0x6f,0x07]
; CHECK-NEXT: retq # encoding: [0xc3]
entry:
%0 = tail call <8 x i32> @llvm.x86.avx10.vmovrsd256(ptr %__A)
%1 = bitcast <8 x i32> %0 to <4 x i64>
ret <4 x i64> %1
}

define <4 x i64> @test_mm256_mask_movrsd_epu32(<4 x i64> %__A, i8 zeroext %__B, ptr %__C) {
; CHECK-LABEL: test_mm256_mask_movrsd_epu32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
; CHECK-NEXT: vmovrsd (%rsi), %ymm0 {%k1} # encoding: [0x62,0xf5,0x7e,0x29,0x6f,0x06]
; CHECK-NEXT: retq # encoding: [0xc3]
entry:
%0 = tail call <8 x i32> @llvm.x86.avx10.vmovrsd256(ptr %__C)
%1 = bitcast <4 x i64> %__A to <8 x i32>
%2 = bitcast i8 %__B to <8 x i1>
%3 = select <8 x i1> %2, <8 x i32> %0, <8 x i32> %1
%4 = bitcast <8 x i32> %3 to <4 x i64>
ret <4 x i64> %4
}

define <4 x i64> @test_mm256_maskz_movrsd_epu32(i8 zeroext %__A, ptr %__B) {
; CHECK-LABEL: test_mm256_maskz_movrsd_epu32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
; CHECK-NEXT: vmovrsd (%rsi), %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x7e,0xa9,0x6f,0x06]
; CHECK-NEXT: retq # encoding: [0xc3]
entry:
%0 = tail call <8 x i32> @llvm.x86.avx10.vmovrsd256(ptr %__B)
%1 = bitcast i8 %__A to <8 x i1>
%2 = select <8 x i1> %1, <8 x i32> %0, <8 x i32> zeroinitializer
%3 = bitcast <8 x i32> %2 to <4 x i64>
ret <4 x i64> %3
}

define <2 x i64> @test_mm_movrsq_epu64(ptr %__A) {
; CHECK-LABEL: test_mm_movrsq_epu64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vmovrsq (%rdi), %xmm0 # encoding: [0x62,0xf5,0xfe,0x08,0x6f,0x07]
; CHECK-NEXT: retq # encoding: [0xc3]
entry:
%0 = tail call <2 x i64> @llvm.x86.avx10.vmovrsq128(ptr %__A)
ret <2 x i64> %0
}

define <2 x i64> @test_mm_mask_movrsq_epu64(<2 x i64> %__A, i8 zeroext %__B, ptr %__C) {
; CHECK-LABEL: test_mm_mask_movrsq_epu64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
; CHECK-NEXT: vmovrsq (%rsi), %xmm0 {%k1} # encoding: [0x62,0xf5,0xfe,0x09,0x6f,0x06]
; CHECK-NEXT: retq # encoding: [0xc3]
entry:
%0 = tail call <2 x i64> @llvm.x86.avx10.vmovrsq128(ptr %__C)
%1 = bitcast i8 %__B to <8 x i1>
%extract.i = shufflevector <8 x i1> %1, <8 x i1> poison, <2 x i32> <i32 0, i32 1>
%2 = select <2 x i1> %extract.i, <2 x i64> %0, <2 x i64> %__A
ret <2 x i64> %2
}

define <2 x i64> @test_mm_maskz_movrsq_epu64(i8 zeroext %__A, ptr %__B) {
; CHECK-LABEL: test_mm_maskz_movrsq_epu64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
; CHECK-NEXT: vmovrsq (%rsi), %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0xfe,0x89,0x6f,0x06]
; CHECK-NEXT: retq # encoding: [0xc3]
entry:
%0 = tail call <2 x i64> @llvm.x86.avx10.vmovrsq128(ptr %__B)
%1 = bitcast i8 %__A to <8 x i1>
%extract.i = shufflevector <8 x i1> %1, <8 x i1> poison, <2 x i32> <i32 0, i32 1>
%2 = select <2 x i1> %extract.i, <2 x i64> %0, <2 x i64> zeroinitializer
ret <2 x i64> %2
}

define <4 x i64> @test_mm256_movrsq_epu64(ptr %__A) {
; CHECK-LABEL: test_mm256_movrsq_epu64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vmovrsq (%rdi), %ymm0 # encoding: [0x62,0xf5,0xfe,0x28,0x6f,0x07]
; CHECK-NEXT: retq # encoding: [0xc3]
entry:
%0 = tail call <4 x i64> @llvm.x86.avx10.vmovrsq256(ptr %__A)
ret <4 x i64> %0
}

define <4 x i64> @test_mm256_mask_movrsq_epu64(<4 x i64> %__A, i8 zeroext %__B, ptr %__C) {
; CHECK-LABEL: test_mm256_mask_movrsq_epu64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
; CHECK-NEXT: vmovrsq (%rsi), %ymm0 {%k1} # encoding: [0x62,0xf5,0xfe,0x29,0x6f,0x06]
; CHECK-NEXT: retq # encoding: [0xc3]
entry:
%0 = tail call <4 x i64> @llvm.x86.avx10.vmovrsq256(ptr %__C)
%1 = bitcast i8 %__B to <8 x i1>
%extract.i = shufflevector <8 x i1> %1, <8 x i1> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%2 = select <4 x i1> %extract.i, <4 x i64> %0, <4 x i64> %__A
ret <4 x i64> %2
}

define <4 x i64> @test_mm256_maskz_movrsq_epu64(i8 zeroext %__A, ptr %__B) {
; CHECK-LABEL: test_mm256_maskz_movrsq_epu64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
; CHECK-NEXT: vmovrsq (%rsi), %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0xfe,0xa9,0x6f,0x06]
; CHECK-NEXT: retq # encoding: [0xc3]
entry:
%0 = tail call <4 x i64> @llvm.x86.avx10.vmovrsq256(ptr %__B)
%1 = bitcast i8 %__A to <8 x i1>
%extract.i = shufflevector <8 x i1> %1, <8 x i1> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%2 = select <4 x i1> %extract.i, <4 x i64> %0, <4 x i64> zeroinitializer
ret <4 x i64> %2
}

define <2 x i64> @test_mm_movrsw_epu16(ptr %__A) {
; CHECK-LABEL: test_mm_movrsw_epu16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vmovrsw (%rdi), %xmm0 # encoding: [0x62,0xf5,0xff,0x08,0x6f,0x07]
; CHECK-NEXT: retq # encoding: [0xc3]
entry:
%0 = tail call <8 x i16> @llvm.x86.avx10.vmovrsw128(ptr %__A)
%1 = bitcast <8 x i16> %0 to <2 x i64>
ret <2 x i64> %1
}

define <2 x i64> @test_mm_mask_movrsw_epu16(<2 x i64> %__A, i8 zeroext %__B, ptr %__C) {
; CHECK-LABEL: test_mm_mask_movrsw_epu16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
; CHECK-NEXT: vmovrsw (%rsi), %xmm0 {%k1} # encoding: [0x62,0xf5,0xff,0x09,0x6f,0x06]
; CHECK-NEXT: retq # encoding: [0xc3]
entry:
%0 = tail call <8 x i16> @llvm.x86.avx10.vmovrsw128(ptr %__C)
%1 = bitcast <2 x i64> %__A to <8 x i16>
%2 = bitcast i8 %__B to <8 x i1>
%3 = select <8 x i1> %2, <8 x i16> %0, <8 x i16> %1
%4 = bitcast <8 x i16> %3 to <2 x i64>
ret <2 x i64> %4
}

define <2 x i64> @test_mm_maskz_movrsw_epu16(i8 zeroext %__A, ptr %__B) {
; CHECK-LABEL: test_mm_maskz_movrsw_epu16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
; CHECK-NEXT: vmovrsw (%rsi), %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0xff,0x89,0x6f,0x06]
; CHECK-NEXT: retq # encoding: [0xc3]
entry:
%0 = tail call <8 x i16> @llvm.x86.avx10.vmovrsw128(ptr %__B)
%1 = bitcast i8 %__A to <8 x i1>
%2 = select <8 x i1> %1, <8 x i16> %0, <8 x i16> zeroinitializer
%3 = bitcast <8 x i16> %2 to <2 x i64>
ret <2 x i64> %3
}

define <4 x i64> @test_mm256_movrsw_epu16(ptr %__A) {
; CHECK-LABEL: test_mm256_movrsw_epu16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vmovrsw (%rdi), %ymm0 # encoding: [0x62,0xf5,0xff,0x28,0x6f,0x07]
; CHECK-NEXT: retq # encoding: [0xc3]
entry:
%0 = tail call <16 x i16> @llvm.x86.avx10.vmovrsw256(ptr %__A)
%1 = bitcast <16 x i16> %0 to <4 x i64>
ret <4 x i64> %1
}

define <4 x i64> @test_mm256_mask_movrsw_epu16(<4 x i64> %__A, i16 zeroext %__B, ptr %__C) {
; CHECK-LABEL: test_mm256_mask_movrsw_epu16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
; CHECK-NEXT: vmovrsw (%rsi), %ymm0 {%k1} # encoding: [0x62,0xf5,0xff,0x29,0x6f,0x06]
; CHECK-NEXT: retq # encoding: [0xc3]
entry:
%0 = tail call <16 x i16> @llvm.x86.avx10.vmovrsw256(ptr %__C)
%1 = bitcast <4 x i64> %__A to <16 x i16>
%2 = bitcast i16 %__B to <16 x i1>
%3 = select <16 x i1> %2, <16 x i16> %0, <16 x i16> %1
%4 = bitcast <16 x i16> %3 to <4 x i64>
ret <4 x i64> %4
}

define <4 x i64> @test_mm256_maskz_movrsw_epu16(i16 zeroext %__A, ptr %__B) {
; CHECK-LABEL: test_mm256_maskz_movrsw_epu16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
; CHECK-NEXT: vmovrsw (%rsi), %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0xff,0xa9,0x6f,0x06]
; CHECK-NEXT: retq # encoding: [0xc3]
entry:
%0 = tail call <16 x i16> @llvm.x86.avx10.vmovrsw256(ptr %__B)
%1 = bitcast i16 %__A to <16 x i1>
%2 = select <16 x i1> %1, <16 x i16> %0, <16 x i16> zeroinitializer
%3 = bitcast <16 x i16> %2 to <4 x i64>
ret <4 x i64> %3
}

declare <16 x i8> @llvm.x86.avx10.vmovrsb128(ptr)
declare <32 x i8> @llvm.x86.avx10.vmovrsb256(ptr)
declare <4 x i32> @llvm.x86.avx10.vmovrsd128(ptr)
declare <8 x i32> @llvm.x86.avx10.vmovrsd256(ptr)
declare <2 x i64> @llvm.x86.avx10.vmovrsq128(ptr)
declare <4 x i64> @llvm.x86.avx10.vmovrsq256(ptr)
declare <8 x i16> @llvm.x86.avx10.vmovrsw128(ptr)
declare <16 x i16> @llvm.x86.avx10.vmovrsw256(ptr)
Loading