Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions clang/include/clang/Basic/BuiltinsLoongArchLASX.def
Original file line number Diff line number Diff line change
Expand Up @@ -986,3 +986,22 @@ TARGET_BUILTIN(__builtin_lasx_xbnz_b, "iV32Uc", "nc", "lasx")
TARGET_BUILTIN(__builtin_lasx_xbnz_h, "iV16Us", "nc", "lasx")
TARGET_BUILTIN(__builtin_lasx_xbnz_w, "iV8Ui", "nc", "lasx")
TARGET_BUILTIN(__builtin_lasx_xbnz_d, "iV4ULLi", "nc", "lasx")

TARGET_BUILTIN(__builtin_lasx_cast_128_s, "V8fV4f", "nc", "lasx")
TARGET_BUILTIN(__builtin_lasx_cast_128_d, "V4dV2d", "nc", "lasx")
TARGET_BUILTIN(__builtin_lasx_cast_128, "V4LLiV2LLi", "nc", "lasx")
TARGET_BUILTIN(__builtin_lasx_concat_128_s, "V8fV4fV4f", "nc", "lasx")
TARGET_BUILTIN(__builtin_lasx_concat_128_d, "V4dV2dV2d", "nc", "lasx")
TARGET_BUILTIN(__builtin_lasx_concat_128, "V4LLiV2LLiV2LLi", "nc", "lasx")
TARGET_BUILTIN(__builtin_lasx_extract_128_lo_s, "V4fV8f", "nc", "lasx")
TARGET_BUILTIN(__builtin_lasx_extract_128_lo_d, "V2dV4d", "nc", "lasx")
TARGET_BUILTIN(__builtin_lasx_extract_128_lo, "V2LLiV4LLi", "nc", "lasx")
TARGET_BUILTIN(__builtin_lasx_extract_128_hi_s, "V4fV8f", "nc", "lasx")
TARGET_BUILTIN(__builtin_lasx_extract_128_hi_d, "V2dV4d", "nc", "lasx")
TARGET_BUILTIN(__builtin_lasx_extract_128_hi, "V2LLiV4LLi", "nc", "lasx")
TARGET_BUILTIN(__builtin_lasx_insert_128_lo_s, "V8fV8fV4f", "nc", "lasx")
TARGET_BUILTIN(__builtin_lasx_insert_128_lo_d, "V4dV4dV2d", "nc", "lasx")
TARGET_BUILTIN(__builtin_lasx_insert_128_lo, "V4LLiV4LLiV2LLi", "nc", "lasx")
TARGET_BUILTIN(__builtin_lasx_insert_128_hi_s, "V8fV8fV4f", "nc", "lasx")
TARGET_BUILTIN(__builtin_lasx_insert_128_hi_d, "V4dV4dV2d", "nc", "lasx")
TARGET_BUILTIN(__builtin_lasx_insert_128_hi, "V4LLiV4LLiV2LLi", "nc", "lasx")
1 change: 1 addition & 0 deletions clang/lib/Basic/Targets/LoongArch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -242,6 +242,7 @@ void LoongArchTargetInfo::getTargetDefines(const LangOptions &Opts,
Builder.defineMacro("__loongarch_simd_width", "256");
Builder.defineMacro("__loongarch_sx", Twine(1));
Builder.defineMacro("__loongarch_asx", Twine(1));
Builder.defineMacro("__loongarch_asx_sx_conv", Twine(1));
} else if (HasFeatureLSX) {
Builder.defineMacro("__loongarch_simd_width", "128");
Builder.defineMacro("__loongarch_sx", Twine(1));
Expand Down
113 changes: 113 additions & 0 deletions clang/lib/Headers/lasxintrin.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
#ifndef _LOONGSON_ASXINTRIN_H
#define _LOONGSON_ASXINTRIN_H 1

#include <lsxintrin.h>

#if defined(__loongarch_asx)

typedef signed char v32i8 __attribute__((vector_size(32), aligned(32)));
Expand Down Expand Up @@ -3882,5 +3884,116 @@ extern __inline

#define __lasx_xvrepli_w(/*si10*/ _1) ((__m256i)__builtin_lasx_xvrepli_w((_1)))

#if defined(__loongarch_asx_sx_conv)

extern __inline
__attribute__((__gnu_inline__, __always_inline__,
__artificial__)) __m256 __lasx_cast_128_s(__m128 _1) {
return (__m256)__builtin_lasx_cast_128_s((v4f32)_1);
}

extern __inline
__attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d
__lasx_cast_128_d(__m128d _1) {
return (__m256d)__builtin_lasx_cast_128_d((v2f64)_1);
}

extern __inline
__attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
__lasx_cast_128(__m128i _1) {
return (__m256i)__builtin_lasx_cast_128((v2i64)_1);
}

extern __inline
__attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256
__lasx_concat_128_s(__m128 _1, __m128 _2) {
return (__m256)__builtin_lasx_concat_128_s((v4f32)_1, (v4f32)_2);
}

extern __inline
__attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d
__lasx_concat_128_d(__m128d _1, __m128d _2) {
return (__m256d)__builtin_lasx_concat_128_d((v2f64)_1, (v2f64)_2);
}

extern __inline
__attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
__lasx_concat_128(__m128i _1, __m128i _2) {
return (__m256i)__builtin_lasx_concat_128((v2i64)_1, (v2i64)_2);
}

extern __inline
__attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128
__lasx_extract_128_lo_s(__m256 _1) {
return (__m128)__builtin_lasx_extract_128_lo_s((v8f32)_1);
}

extern __inline
__attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d
__lasx_extract_128_lo_d(__m256d _1) {
return (__m128d)__builtin_lasx_extract_128_lo_d((v4f64)_1);
}

extern __inline
__attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
__lasx_extract_128_lo(__m256i _1) {
return (__m128i)__builtin_lasx_extract_128_lo((v4i64)_1);
}

extern __inline
__attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128
__lasx_extract_128_hi_s(__m256 _1) {
return (__m128)__builtin_lasx_extract_128_hi_s((v8f32)_1);
}

extern __inline
__attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d
__lasx_extract_128_hi_d(__m256d _1) {
return (__m128d)__builtin_lasx_extract_128_hi_d((v4f64)_1);
}

extern __inline
__attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
__lasx_extract_128_hi(__m256i _1) {
return (__m128i)__builtin_lasx_extract_128_hi((v4i64)_1);
}

extern __inline
__attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256
__lasx_insert_128_lo_s(__m256 _1, __m128 _2) {
return (__m256)__builtin_lasx_insert_128_lo_s((v8f32)_1, (v4f32)_2);
}

extern __inline
__attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d
__lasx_insert_128_lo_d(__m256d _1, __m128d _2) {
return (__m256d)__builtin_lasx_insert_128_lo_d((v4f64)_1, (v2f64)_2);
}

extern __inline
__attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
__lasx_insert_128_lo(__m256i _1, __m128i _2) {
return (__m256i)__builtin_lasx_insert_128_lo((v4i64)_1, (v2i64)_2);
}

extern __inline
__attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256
__lasx_insert_128_hi_s(__m256 _1, __m128 _2) {
return (__m256)__builtin_lasx_insert_128_hi_s((v8f32)_1, (v4f32)_2);
}

extern __inline
__attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d
__lasx_insert_128_hi_d(__m256d _1, __m128d _2) {
return (__m256d)__builtin_lasx_insert_128_hi_d((v4f64)_1, (v2f64)_2);
}

extern __inline
__attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
__lasx_insert_128_hi(__m256i _1, __m128i _2) {
return (__m256i)__builtin_lasx_insert_128_hi((v4i64)_1, (v2i64)_2);
}

#endif /* defined(__loongarch_asx_sx_conv). */
#endif /* defined(__loongarch_asx). */
#endif /* _LOONGSON_ASXINTRIN_H. */
171 changes: 171 additions & 0 deletions clang/test/CodeGen/LoongArch/lasx/builtin-alias.c
Original file line number Diff line number Diff line change
Expand Up @@ -7120,6 +7120,177 @@ v16i16 xvrepli_h() { return __lasx_xvrepli_h(1); }
// CHECK-NEXT: ret void
//
v8i32 xvrepli_w() { return __lasx_xvrepli_w(1); }
// CHECK-LABEL: define dso_local void @cast_128_s(
// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], i128 noundef [[_1_COERCE:%.*]]) local_unnamed_addr #[[ATTR2]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE]] to <4 x float>
// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.cast.128.s(<4 x float> [[TMP0]])
// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]]
// CHECK-NEXT: ret void
//
v8f32 cast_128_s(v4f32 _1) { return __lasx_cast_128_s(_1); }
// CHECK-LABEL: define dso_local void @cast_128_d(
// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], i128 noundef [[_1_COERCE:%.*]]) local_unnamed_addr #[[ATTR2]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE]] to <2 x double>
// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.cast.128.d(<2 x double> [[TMP0]])
// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]]
// CHECK-NEXT: ret void
//
v4f64 cast_128_d(v2f64 _1) { return __lasx_cast_128_d(_1); }
// CHECK-LABEL: define dso_local void @cast_128(
// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], i128 noundef [[_1_COERCE:%.*]]) local_unnamed_addr #[[ATTR2]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE]] to <2 x i64>
// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.cast.128(<2 x i64> [[TMP0]])
// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]]
// CHECK-NEXT: ret void
//
v4i64 cast_128(v2i64 _1) { return __lasx_cast_128(_1); }
// CHECK-LABEL: define dso_local void @concat_128_s(
// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], i128 noundef [[_1_COERCE:%.*]], i128 noundef [[_2_COERCE:%.*]]) local_unnamed_addr #[[ATTR2]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE]] to <4 x float>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE]] to <4 x float>
// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.concat.128.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]]
// CHECK-NEXT: ret void
//
v8f32 concat_128_s(v4f32 _1, v4f32 _2) { return __lasx_concat_128_s(_1, _2); }
// CHECK-LABEL: define dso_local void @concat_128_d(
// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], i128 noundef [[_1_COERCE:%.*]], i128 noundef [[_2_COERCE:%.*]]) local_unnamed_addr #[[ATTR2]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE]] to <2 x double>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE]] to <2 x double>
// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.concat.128.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]]
// CHECK-NEXT: ret void
//
v4f64 concat_128_d(v2f64 _1, v2f64 _2) { return __lasx_concat_128_d(_1, _2); }
// CHECK-LABEL: define dso_local void @concat_128(
// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], i128 noundef [[_1_COERCE:%.*]], i128 noundef [[_2_COERCE:%.*]]) local_unnamed_addr #[[ATTR2]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE]] to <2 x i64>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE]] to <2 x i64>
// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.concat.128(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]]
// CHECK-NEXT: ret void
//
v4i64 concat_128(v2i64 _1, v2i64 _2) { return __lasx_concat_128(_1, _2); }
// CHECK-LABEL: define dso_local i128 @extract_128_lo_s(
// CHECK-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR7]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]]
// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lasx.extract.128.lo.s(<8 x float> [[_1]])
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128
// CHECK-NEXT: ret i128 [[TMP2]]
//
v4f32 extract_128_lo_s(v8f32 _1) { return __lasx_extract_128_lo_s(_1); }
// CHECK-LABEL: define dso_local i128 @extract_128_lo_d(
// CHECK-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR7]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]]
// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lasx.extract.128.lo.d(<4 x double> [[_1]])
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128
// CHECK-NEXT: ret i128 [[TMP2]]
//
v2f64 extract_128_lo_d(v4f64 _1) { return __lasx_extract_128_lo_d(_1); }
// CHECK-LABEL: define dso_local i128 @extract_128_lo(
// CHECK-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR7]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]]
// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lasx.extract.128.lo(<4 x i64> [[_1]])
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
// CHECK-NEXT: ret i128 [[TMP2]]
//
v2i64 extract_128_lo(v4i64 _1) { return __lasx_extract_128_lo(_1); }
// CHECK-LABEL: define dso_local i128 @extract_128_hi_s(
// CHECK-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR7]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]]
// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lasx.extract.128.hi.s(<8 x float> [[_1]])
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128
// CHECK-NEXT: ret i128 [[TMP2]]
//
v4f32 extract_128_hi_s(v8f32 _1) { return __lasx_extract_128_hi_s(_1); }
// CHECK-LABEL: define dso_local i128 @extract_128_hi_d(
// CHECK-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR7]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]]
// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lasx.extract.128.hi.d(<4 x double> [[_1]])
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128
// CHECK-NEXT: ret i128 [[TMP2]]
//
v2f64 extract_128_hi_d(v4f64 _1) { return __lasx_extract_128_hi_d(_1); }
// CHECK-LABEL: define dso_local i128 @extract_128_hi(
// CHECK-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR7]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]]
// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lasx.extract.128.hi(<4 x i64> [[_1]])
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
// CHECK-NEXT: ret i128 [[TMP2]]
//
v2i64 extract_128_hi(v4i64 _1) { return __lasx_extract_128_hi(_1); }
// CHECK-LABEL: define dso_local void @insert_128_lo_s(
// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], i128 noundef [[_2_COERCE:%.*]]) local_unnamed_addr #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]]
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE]] to <4 x float>
// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.insert.128.lo.s(<8 x float> [[_1]], <4 x float> [[TMP1]])
// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]]
// CHECK-NEXT: ret void
//
v8f32 insert_128_lo_s(v8f32 _1, v4f32 _2) { return __lasx_insert_128_lo_s(_1, _2); }
// CHECK-LABEL: define dso_local void @insert_128_lo_d(
// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], i128 noundef [[_2_COERCE:%.*]]) local_unnamed_addr #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]]
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE]] to <2 x double>
// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.insert.128.lo.d(<4 x double> [[_1]], <2 x double> [[TMP1]])
// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]]
// CHECK-NEXT: ret void
//
v4f64 insert_128_lo_d(v4f64 _1, v2f64 _2) { return __lasx_insert_128_lo_d(_1, _2); }
// CHECK-LABEL: define dso_local void @insert_128_lo(
// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], i128 noundef [[_2_COERCE:%.*]]) local_unnamed_addr #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]]
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE]] to <2 x i64>
// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.insert.128.lo(<4 x i64> [[_1]], <2 x i64> [[TMP1]])
// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]]
// CHECK-NEXT: ret void
//
v4i64 insert_128_lo(v4i64 _1, v2i64 _2) { return __lasx_insert_128_lo(_1, _2); }
// CHECK-LABEL: define dso_local void @insert_128_hi_s(
// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], i128 noundef [[_2_COERCE:%.*]]) local_unnamed_addr #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]]
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE]] to <4 x float>
// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.insert.128.hi.s(<8 x float> [[_1]], <4 x float> [[TMP1]])
// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]]
// CHECK-NEXT: ret void
//
v8f32 insert_128_hi_s(v8f32 _1, v4f32 _2) { return __lasx_insert_128_hi_s(_1, _2); }
// CHECK-LABEL: define dso_local void @insert_128_hi_d(
// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], i128 noundef [[_2_COERCE:%.*]]) local_unnamed_addr #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]]
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE]] to <2 x double>
// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.insert.128.hi.d(<4 x double> [[_1]], <2 x double> [[TMP1]])
// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]]
// CHECK-NEXT: ret void
//
v4f64 insert_128_hi_d(v4f64 _1, v2f64 _2) { return __lasx_insert_128_hi_d(_1, _2); }
// CHECK-LABEL: define dso_local void @insert_128_hi(
// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], i128 noundef [[_2_COERCE:%.*]]) local_unnamed_addr #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]]
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE]] to <2 x i64>
// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.insert.128.hi(<4 x i64> [[_1]], <2 x i64> [[TMP1]])
// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]]
// CHECK-NEXT: ret void
//
v4i64 insert_128_hi(v4i64 _1, v2i64 _2) { return __lasx_insert_128_hi(_1, _2); }
//.
// CHECK: [[META4:![0-9]+]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0}
// CHECK: [[META5]] = !{!"Simple C/C++ TBAA"}
Expand Down
Loading