-
Notifications
You must be signed in to change notification settings - Fork 15.3k
[CIR][X86] Implement lowering for pmuldq / pmuludq builtins #169853
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -115,6 +115,47 @@ static mlir::Value emitX86MaskLogic(CIRGenBuilderTy &builder, | |||||||||||
| ops[0].getType()); | ||||||||||||
| } | ||||||||||||
|
|
||||||||||||
| static mlir::Value emitX86Muldq(CIRGenFunction &cgf, const CallExpr *expr, | ||||||||||||
| bool isSigned, | ||||||||||||
| SmallVectorImpl<mlir::Value> &ops) { | ||||||||||||
| CIRGenBuilderTy &builder = cgf.getBuilder(); | ||||||||||||
| mlir::Location loc = cgf.getLoc(expr->getExprLoc()); | ||||||||||||
| mlir::Type ty = ops[0].getType(); | ||||||||||||
| unsigned tyPrimitiveSizeInBits = | ||||||||||||
| cgf.cgm.getDataLayout().getTypeSizeInBits(ty); | ||||||||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||||||||
| mlir::Value lhs, rhs; | ||||||||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||||||||
| // in cir, if a shiftOperation is shift right,it will be translated into Ashr | ||||||||||||
| // or lShr automatically in match and rewrite stage according to its operand's | ||||||||||||
| // type | ||||||||||||
| if (isSigned) { | ||||||||||||
| ty = | ||||||||||||
| cir::VectorType::get(builder.getSInt64Ty(), tyPrimitiveSizeInBits / 64); | ||||||||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This line appears in both clauses of the |
||||||||||||
| cir::ConstantOp shiftAmt = | ||||||||||||
| builder.getConstant(loc, cir::IntAttr::get(builder.getSInt64Ty(), 32)); | ||||||||||||
| cir::VecSplatOp shiftSplatVecOp = | ||||||||||||
| cir::VecSplatOp::create(builder, loc, ty, shiftAmt.getResult()); | ||||||||||||
| mlir::Value shiftSplatValue = shiftSplatVecOp.getResult(); | ||||||||||||
| lhs = builder.createBitcast(loc, ops[0], ty); | ||||||||||||
| rhs = builder.createBitcast(loc, ops[1], ty); | ||||||||||||
|
Comment on lines
+138
to
+139
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. These also should be hoisted out of the |
||||||||||||
| lhs = builder.createShift(loc, lhs, shiftSplatValue, true); | ||||||||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'd add a comment here explaining the purpose of these shifts. The comment above about arithmetic shift being used automatically really belongs here, but the more relevant information is that the is filling the upper bits with of the vector with the sign-bit of the 32-bit value in each lane. |
||||||||||||
| lhs = builder.createShift(loc, lhs, shiftSplatValue, false); | ||||||||||||
| rhs = builder.createShift(loc, rhs, shiftSplatValue, true); | ||||||||||||
| rhs = builder.createShift(loc, rhs, shiftSplatValue, false); | ||||||||||||
| } else { | ||||||||||||
| ty = | ||||||||||||
| cir::VectorType::get(builder.getSInt64Ty(), tyPrimitiveSizeInBits / 64); | ||||||||||||
| cir::ConstantOp maskScalar = builder.getConstant( | ||||||||||||
| loc, cir::IntAttr::get(builder.getSInt64Ty(), 0xffffffff)); | ||||||||||||
| cir::VecSplatOp mask = | ||||||||||||
| cir::VecSplatOp::create(builder, loc, ty, maskScalar.getResult()); | ||||||||||||
| lhs = builder.createBitcast(loc, ops[0], ty); | ||||||||||||
| rhs = builder.createBitcast(loc, ops[1], ty); | ||||||||||||
| lhs = builder.createAnd(loc, lhs, mask); | ||||||||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please keep the comment "// Clear the upper bits". It may seem obvious, but it's a useful hint as to the logic of this function. |
||||||||||||
| rhs = builder.createAnd(loc, rhs, mask); | ||||||||||||
| } | ||||||||||||
| return builder.createMul(loc, lhs, rhs); | ||||||||||||
| } | ||||||||||||
|
|
||||||||||||
| mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, | ||||||||||||
| const CallExpr *expr) { | ||||||||||||
| if (builtinID == Builtin::BI__builtin_cpu_is) { | ||||||||||||
|
|
@@ -851,12 +892,18 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, | |||||||||||
| case X86::BI__builtin_ia32_sqrtph512: | ||||||||||||
| case X86::BI__builtin_ia32_sqrtps512: | ||||||||||||
| case X86::BI__builtin_ia32_sqrtpd512: | ||||||||||||
| cgm.errorNYI(expr->getSourceRange(), | ||||||||||||
| std::string("unimplemented X86 builtin call: ") + | ||||||||||||
| getContext().BuiltinInfo.getName(builtinID)); | ||||||||||||
| return {}; | ||||||||||||
| case X86::BI__builtin_ia32_pmuludq128: | ||||||||||||
| case X86::BI__builtin_ia32_pmuludq256: | ||||||||||||
| case X86::BI__builtin_ia32_pmuludq512: | ||||||||||||
| return emitX86Muldq(*this, expr, /*IsSigned*/ false, ops); | ||||||||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
This suggestion reflects the change in signature suggested above, but it also aligns the |
||||||||||||
| case X86::BI__builtin_ia32_pmuldq128: | ||||||||||||
| case X86::BI__builtin_ia32_pmuldq256: | ||||||||||||
| case X86::BI__builtin_ia32_pmuldq512: | ||||||||||||
| return emitX86Muldq(*this, expr, /*IsSigned*/ true, ops); | ||||||||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||||||||
| case X86::BI__builtin_ia32_pternlogd512_mask: | ||||||||||||
| case X86::BI__builtin_ia32_pternlogq512_mask: | ||||||||||||
| case X86::BI__builtin_ia32_pternlogd128_mask: | ||||||||||||
|
|
||||||||||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,68 @@ | ||
| // RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx2 -fclangir -emit-cir -o %t.cir -Wall -Werror -Wsign-conversion | ||
| // RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s | ||
| // RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx2 -fclangir -emit-llvm -o %t.ll -Wall -Werror -Wsign-conversion | ||
| // RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s | ||
|
|
||
| // RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx2 -fclangir -emit-cir -o %t.cir -Wall -Werror -Wsign-conversion | ||
| // RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s | ||
| // RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx2 -fclangir -emit-llvm -o %t.ll -Wall -Werror -Wsign-conversion | ||
| // RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s | ||
|
|
||
| // RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx2 -emit-llvm -o - -Wall -Werror -Wsign-conversion | FileCheck %s --check-prefixes=OGCG | ||
| // RUN: %clang_cc1 -x c -flax-vector-conversions=none -fms-extensions -fms-compatibility -ffreestanding %s -triple=x86_64-windows-msvc -target-feature +avx2 -emit-llvm -o - -Wall -Werror -Wsign-conversion | FileCheck %s --check-prefixes=OGCG | ||
| // RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx2 -emit-llvm -o - -Wall -Werror -Wsign-conversion | FileCheck %s --check-prefixes=OGCG | ||
| // RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -fms-extensions -fms-compatibility -ffreestanding %s -triple=x86_64-windows-msvc -target-feature +avx2 -emit-llvm -o - -Wall -Werror -Wsign-conversion | FileCheck %s --check-prefixes=OGCG | ||
|
|
||
| #include <immintrin.h> | ||
|
|
||
| __m256i test_mm256_mul_epu32(__m256i a, __m256i b) { | ||
| // CIR-LABEL: _mm256_mul_epu32 | ||
| // CIR: [[MASK_SCALAR:%.*]] = cir.const #cir.int<4294967295> : !s64i | ||
| // CIR: [[MASK_VEC:%.*]] = cir.vec.splat [[MASK_SCALAR]] : !s64i, !cir.vector<4 x !s64i> | ||
| // CIR: [[BC_A:%.*]] = cir.cast bitcast %{{.*}} : {{.*}} -> !cir.vector<4 x !s64i> | ||
| // CIR: [[BC_B:%.*]] = cir.cast bitcast %{{.*}} : {{.*}} -> !cir.vector<4 x !s64i> | ||
| // CIR: [[AND_A:%.*]] = cir.binop(and, [[BC_A]], [[MASK_VEC]]) | ||
| // CIR: [[AND_B:%.*]] = cir.binop(and, [[BC_B]], [[MASK_VEC]]) | ||
| // CIR: [[MUL:%.*]] = cir.binop(mul, [[AND_A]], [[AND_B]]) | ||
|
|
||
| // LLVM-LABEL: _mm256_mul_epu32 | ||
| // LLVM: and <4 x i64> %{{.*}}, splat (i64 4294967295) | ||
| // LLVM: and <4 x i64> %{{.*}}, splat (i64 4294967295) | ||
| // LLVM: mul <4 x i64> %{{.*}}, %{{.*}} | ||
|
|
||
| // OGCG-LABEL: _mm256_mul_epu32 | ||
| // OGCG: and <4 x i64> %{{.*}}, splat (i64 4294967295) | ||
| // OGCG: and <4 x i64> %{{.*}}, splat (i64 4294967295) | ||
| // OGCG: mul <4 x i64> %{{.*}}, %{{.*}} | ||
|
|
||
| return _mm256_mul_epu32(a, b); | ||
| } | ||
|
|
||
| __m256i test_mm256_mul_epi32(__m256i a, __m256i b) { | ||
| // CIR-LABEL: _mm256_mul_epi32 | ||
| // CIR: [[SC:%.*]] = cir.const #cir.int<32> : !s64i | ||
| // CIR: [[SV:%.*]] = cir.vec.splat [[SC]] : !s64i, !cir.vector<4 x !s64i> | ||
| // CIR: [[A64:%.*]] = cir.cast bitcast %{{.*}} : {{.*}} -> !cir.vector<4 x !s64i> | ||
| // CIR: [[B64:%.*]] = cir.cast bitcast %{{.*}} : {{.*}} -> !cir.vector<4 x !s64i> | ||
| // CIR: [[SHL_A:%.*]] = cir.shift(left, [[A64]] : !cir.vector<4 x !s64i>, [[SV]] : !cir.vector<4 x !s64i>) | ||
| // CIR: [[ASHR_A:%.*]] = cir.shift(right, [[SHL_A]] : !cir.vector<4 x !s64i>, [[SV]] : !cir.vector<4 x !s64i>) | ||
| // CIR: [[SHL_B:%.*]] = cir.shift(left, [[B64]] : !cir.vector<4 x !s64i>, [[SV]] : !cir.vector<4 x !s64i>) | ||
| // CIR: [[ASHR_B:%.*]] = cir.shift(right, [[SHL_B]] : !cir.vector<4 x !s64i>, [[SV]] : !cir.vector<4 x !s64i>) | ||
| // CIR: [[MUL:%.*]] = cir.binop(mul, [[ASHR_A]], [[ASHR_B]]) | ||
|
|
||
| // LLVM-LABEL: _mm256_mul_epi32 | ||
| // LLVM: shl <4 x i64> %{{.*}}, splat (i64 32) | ||
| // LLVM: ashr <4 x i64> %{{.*}}, splat (i64 32) | ||
| // LLVM: shl <4 x i64> %{{.*}}, splat (i64 32) | ||
| // LLVM: ashr <4 x i64> %{{.*}}, splat (i64 32) | ||
| // LLVM: mul <4 x i64> %{{.*}}, %{{.*}} | ||
|
|
||
| // OGCG-LABEL: _mm256_mul_epi32 | ||
| // OGCG: shl <4 x i64> %{{.*}}, splat (i64 32) | ||
| // OGCG: ashr <4 x i64> %{{.*}}, splat (i64 32) | ||
| // OGCG: shl <4 x i64> %{{.*}}, splat (i64 32) | ||
| // OGCG: ashr <4 x i64> %{{.*}}, splat (i64 32) | ||
| // OGCG: mul <4 x i64> %{{.*}}, %{{.*}} | ||
|
|
||
| return _mm256_mul_epi32(a, b); | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,45 @@ | ||
| // RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +sse4.1 -fclangir -emit-cir -o %t.cir -Wall -Werror -Wsign-conversion | ||
| // RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s | ||
| // RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +sse4.1 -fclangir -emit-llvm -o %t.ll -Wall -Werror -Wsign-conversion | ||
| // RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s | ||
|
|
||
| // RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +sse4.1 -fclangir -emit-cir -o %t.cir -Wall -Werror -Wsign-conversion | ||
| // RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s | ||
| // RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +sse4.1 -fclangir -emit-llvm -o %t.ll -Wall -Werror -Wsign-conversion | ||
| // RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s | ||
|
|
||
| // RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +sse4.1 -emit-llvm -o - -Wall -Werror -Wsign-conversion | FileCheck %s --check-prefixes=OGCG | ||
| // RUN: %clang_cc1 -x c -flax-vector-conversions=none -fms-extensions -fms-compatibility -ffreestanding %s -triple=x86_64-windows-msvc -target-feature +sse4.1 -emit-llvm -o - -Wall -Werror -Wsign-conversion | FileCheck %s --check-prefixes=OGCG | ||
| // RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +sse4.1 -emit-llvm -o - -Wall -Werror -Wsign-conversion | FileCheck %s --check-prefixes=OGCG | ||
| // RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -fms-extensions -fms-compatibility -ffreestanding %s -triple=x86_64-windows-msvc -target-feature +sse4.1 -emit-llvm -o - -Wall -Werror -Wsign-conversion | FileCheck %s --check-prefixes=OGCG | ||
|
|
||
| #include <immintrin.h> | ||
|
|
||
| __m128i test_mm_mul_epi32(__m128i x, __m128i y) { | ||
| // CIR-LABEL: _mm_mul_epi32 | ||
| // CIR: [[SC:%.*]] = cir.const #cir.int<32> : !s64i | ||
| // CIR: [[SV:%.*]] = cir.vec.splat [[SC]] : !s64i, !cir.vector<2 x !s64i> | ||
| // CIR: [[A64:%.*]] = cir.cast bitcast %{{.*}} : {{.*}} -> !cir.vector<2 x !s64i> | ||
| // CIR: [[B64:%.*]] = cir.cast bitcast %{{.*}} : {{.*}} -> !cir.vector<2 x !s64i> | ||
| // CIR: [[SHL_A:%.*]] = cir.shift(left, [[A64]] : !cir.vector<2 x !s64i>, [[SV]] : !cir.vector<2 x !s64i>) | ||
| // CIR: [[ASHR_A:%.*]] = cir.shift(right, [[SHL_A]] : !cir.vector<2 x !s64i>, [[SV]] : !cir.vector<2 x !s64i>) | ||
| // CIR: [[SHL_B:%.*]] = cir.shift(left, [[B64]] : !cir.vector<2 x !s64i>, [[SV]] : !cir.vector<2 x !s64i>) | ||
| // CIR: [[ASHR_B:%.*]] = cir.shift(right, [[SHL_B]] : !cir.vector<2 x !s64i>, [[SV]] : !cir.vector<2 x !s64i>) | ||
| // CIR: [[MUL:%.*]] = cir.binop(mul, [[ASHR_A]], [[ASHR_B]]) | ||
|
|
||
| // LLVM-LABEL: _mm_mul_epi32 | ||
| // LLVM: shl <2 x i64> %{{.*}}, splat (i64 32) | ||
| // LLVM: ashr <2 x i64> %{{.*}}, splat (i64 32) | ||
| // LLVM: shl <2 x i64> %{{.*}}, splat (i64 32) | ||
| // LLVM: ashr <2 x i64> %{{.*}}, splat (i64 32) | ||
| // LLVM: mul <2 x i64> %{{.*}}, %{{.*}} | ||
|
|
||
| // OGCG-LABEL: _mm_mul_epi32 | ||
| // OGCG: shl <2 x i64> %{{.*}}, splat (i64 32) | ||
| // OGCG: ashr <2 x i64> %{{.*}}, splat (i64 32) | ||
| // OGCG: shl <2 x i64> %{{.*}}, splat (i64 32) | ||
| // OGCG: ashr <2 x i64> %{{.*}}, splat (i64 32) | ||
| // OGCG: mul <2 x i64> %{{.*}}, %{{.*}} | ||
|
|
||
| return _mm_mul_epi32(x, y); | ||
| } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.