Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 47 additions & 0 deletions clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,47 @@ static mlir::Value emitX86MaskLogic(CIRGenBuilderTy &builder,
ops[0].getType());
}

static mlir::Value emitX86Muldq(CIRGenFunction &cgf, const CallExpr *expr,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
static mlir::Value emitX86Muldq(CIRGenFunction &cgf, const CallExpr *expr,
static mlir::Value emitX86Muldq(CIRGenBuilderTy &builder, mlir::Location loc,

bool isSigned,
SmallVectorImpl<mlir::Value> &ops) {
CIRGenBuilderTy &builder = cgf.getBuilder();
mlir::Location loc = cgf.getLoc(expr->getExprLoc());
mlir::Type ty = ops[0].getType();
unsigned tyPrimitiveSizeInBits =
cgf.cgm.getDataLayout().getTypeSizeInBits(ty);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
cgf.cgm.getDataLayout().getTypeSizeInBits(ty);
unsigned tyPrimitiveSizeInBits =
cgf.cgm.getDataLayout().getTypeSizeInBits(ops[0].getType());
// Arguments have a vXi32 type so cast to vXi64.
mlir::Type ty = cir::VectorType::get(builder.getSInt64Ty(), tyPrimitiveSizeInBits / 64);

mlir::Value lhs, rhs;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
mlir::Value lhs, rhs;
mlir::Value lhs = builder.createBitcast(loc, ops[0], ty);
mlir::Value rhs = builder.createBitcast(loc, ops[1], ty);

// in cir, if a shiftOperation is shift right,it will be translated into Ashr
// or lShr automatically in match and rewrite stage according to its operand's
// type
if (isSigned) {
ty =
cir::VectorType::get(builder.getSInt64Ty(), tyPrimitiveSizeInBits / 64);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This line appears in both clauses of the if-else expression. It can be hoisted out as it is in classic codegen. Please also retain the comment that is used there.

cir::ConstantOp shiftAmt =
builder.getConstant(loc, cir::IntAttr::get(builder.getSInt64Ty(), 32));
cir::VecSplatOp shiftSplatVecOp =
cir::VecSplatOp::create(builder, loc, ty, shiftAmt.getResult());
mlir::Value shiftSplatValue = shiftSplatVecOp.getResult();
lhs = builder.createBitcast(loc, ops[0], ty);
rhs = builder.createBitcast(loc, ops[1], ty);
Comment on lines +138 to +139
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These also should be hoisted out of the if-else.

lhs = builder.createShift(loc, lhs, shiftSplatValue, true);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd add a comment here explaining the purpose of these shifts. The comment above about arithmetic shift being used automatically really belongs here, but the more relevant information is that the is filling the upper bits with of the vector with the sign-bit of the 32-bit value in each lane.

lhs = builder.createShift(loc, lhs, shiftSplatValue, false);
rhs = builder.createShift(loc, rhs, shiftSplatValue, true);
rhs = builder.createShift(loc, rhs, shiftSplatValue, false);
} else {
ty =
cir::VectorType::get(builder.getSInt64Ty(), tyPrimitiveSizeInBits / 64);
cir::ConstantOp maskScalar = builder.getConstant(
loc, cir::IntAttr::get(builder.getSInt64Ty(), 0xffffffff));
cir::VecSplatOp mask =
cir::VecSplatOp::create(builder, loc, ty, maskScalar.getResult());
lhs = builder.createBitcast(loc, ops[0], ty);
rhs = builder.createBitcast(loc, ops[1], ty);
lhs = builder.createAnd(loc, lhs, mask);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please keep the comment "// Clear the upper bits". It may seem obvious, but it's a useful hint as to the logic of this function.

rhs = builder.createAnd(loc, rhs, mask);
}
return builder.createMul(loc, lhs, rhs);
}

mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
const CallExpr *expr) {
if (builtinID == Builtin::BI__builtin_cpu_is) {
Expand Down Expand Up @@ -851,12 +892,18 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
case X86::BI__builtin_ia32_sqrtph512:
case X86::BI__builtin_ia32_sqrtps512:
case X86::BI__builtin_ia32_sqrtpd512:
cgm.errorNYI(expr->getSourceRange(),
std::string("unimplemented X86 builtin call: ") +
getContext().BuiltinInfo.getName(builtinID));
return {};
case X86::BI__builtin_ia32_pmuludq128:
case X86::BI__builtin_ia32_pmuludq256:
case X86::BI__builtin_ia32_pmuludq512:
return emitX86Muldq(*this, expr, /*IsSigned*/ false, ops);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
return emitX86Muldq(*this, expr, /*IsSigned*/ false, ops);
return emitX86Muldq(builder, getLoc(expr->getExprLoc()), /*isSigned=*/false, ops);

This suggestion reflects the change in signature suggested above, but it also aligns the isSigned= comment with clang conventions.

case X86::BI__builtin_ia32_pmuldq128:
case X86::BI__builtin_ia32_pmuldq256:
case X86::BI__builtin_ia32_pmuldq512:
return emitX86Muldq(*this, expr, /*IsSigned*/ true, ops);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
return emitX86Muldq(*this, expr, /*IsSigned*/ true, ops);
return emitX86Muldq(builder, getLoc(expr->getExprLoc), /*isSigned=*/true, ops);

case X86::BI__builtin_ia32_pternlogd512_mask:
case X86::BI__builtin_ia32_pternlogq512_mask:
case X86::BI__builtin_ia32_pternlogd128_mask:
Expand Down
68 changes: 68 additions & 0 deletions clang/test/CIR/CodeGenBuiltins/X86/avx2-builtins.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx2 -fclangir -emit-cir -o %t.cir -Wall -Werror -Wsign-conversion
// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx2 -fclangir -emit-llvm -o %t.ll -Wall -Werror -Wsign-conversion
// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s

// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx2 -fclangir -emit-cir -o %t.cir -Wall -Werror -Wsign-conversion
// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx2 -fclangir -emit-llvm -o %t.ll -Wall -Werror -Wsign-conversion
// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s

// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx2 -emit-llvm -o - -Wall -Werror -Wsign-conversion | FileCheck %s --check-prefixes=OGCG
// RUN: %clang_cc1 -x c -flax-vector-conversions=none -fms-extensions -fms-compatibility -ffreestanding %s -triple=x86_64-windows-msvc -target-feature +avx2 -emit-llvm -o - -Wall -Werror -Wsign-conversion | FileCheck %s --check-prefixes=OGCG
// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx2 -emit-llvm -o - -Wall -Werror -Wsign-conversion | FileCheck %s --check-prefixes=OGCG
// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -fms-extensions -fms-compatibility -ffreestanding %s -triple=x86_64-windows-msvc -target-feature +avx2 -emit-llvm -o - -Wall -Werror -Wsign-conversion | FileCheck %s --check-prefixes=OGCG

#include <immintrin.h>

__m256i test_mm256_mul_epu32(__m256i a, __m256i b) {
// CIR-LABEL: _mm256_mul_epu32
// CIR: [[MASK_SCALAR:%.*]] = cir.const #cir.int<4294967295> : !s64i
// CIR: [[MASK_VEC:%.*]] = cir.vec.splat [[MASK_SCALAR]] : !s64i, !cir.vector<4 x !s64i>
// CIR: [[BC_A:%.*]] = cir.cast bitcast %{{.*}} : {{.*}} -> !cir.vector<4 x !s64i>
// CIR: [[BC_B:%.*]] = cir.cast bitcast %{{.*}} : {{.*}} -> !cir.vector<4 x !s64i>
// CIR: [[AND_A:%.*]] = cir.binop(and, [[BC_A]], [[MASK_VEC]])
// CIR: [[AND_B:%.*]] = cir.binop(and, [[BC_B]], [[MASK_VEC]])
// CIR: [[MUL:%.*]] = cir.binop(mul, [[AND_A]], [[AND_B]])

// LLVM-LABEL: _mm256_mul_epu32
// LLVM: and <4 x i64> %{{.*}}, splat (i64 4294967295)
// LLVM: and <4 x i64> %{{.*}}, splat (i64 4294967295)
// LLVM: mul <4 x i64> %{{.*}}, %{{.*}}

// OGCG-LABEL: _mm256_mul_epu32
// OGCG: and <4 x i64> %{{.*}}, splat (i64 4294967295)
// OGCG: and <4 x i64> %{{.*}}, splat (i64 4294967295)
// OGCG: mul <4 x i64> %{{.*}}, %{{.*}}

return _mm256_mul_epu32(a, b);
}

__m256i test_mm256_mul_epi32(__m256i a, __m256i b) {
// CIR-LABEL: _mm256_mul_epi32
// CIR: [[SC:%.*]] = cir.const #cir.int<32> : !s64i
// CIR: [[SV:%.*]] = cir.vec.splat [[SC]] : !s64i, !cir.vector<4 x !s64i>
// CIR: [[A64:%.*]] = cir.cast bitcast %{{.*}} : {{.*}} -> !cir.vector<4 x !s64i>
// CIR: [[B64:%.*]] = cir.cast bitcast %{{.*}} : {{.*}} -> !cir.vector<4 x !s64i>
// CIR: [[SHL_A:%.*]] = cir.shift(left, [[A64]] : !cir.vector<4 x !s64i>, [[SV]] : !cir.vector<4 x !s64i>)
// CIR: [[ASHR_A:%.*]] = cir.shift(right, [[SHL_A]] : !cir.vector<4 x !s64i>, [[SV]] : !cir.vector<4 x !s64i>)
// CIR: [[SHL_B:%.*]] = cir.shift(left, [[B64]] : !cir.vector<4 x !s64i>, [[SV]] : !cir.vector<4 x !s64i>)
// CIR: [[ASHR_B:%.*]] = cir.shift(right, [[SHL_B]] : !cir.vector<4 x !s64i>, [[SV]] : !cir.vector<4 x !s64i>)
// CIR: [[MUL:%.*]] = cir.binop(mul, [[ASHR_A]], [[ASHR_B]])

// LLVM-LABEL: _mm256_mul_epi32
// LLVM: shl <4 x i64> %{{.*}}, splat (i64 32)
// LLVM: ashr <4 x i64> %{{.*}}, splat (i64 32)
// LLVM: shl <4 x i64> %{{.*}}, splat (i64 32)
// LLVM: ashr <4 x i64> %{{.*}}, splat (i64 32)
// LLVM: mul <4 x i64> %{{.*}}, %{{.*}}

// OGCG-LABEL: _mm256_mul_epi32
// OGCG: shl <4 x i64> %{{.*}}, splat (i64 32)
// OGCG: ashr <4 x i64> %{{.*}}, splat (i64 32)
// OGCG: shl <4 x i64> %{{.*}}, splat (i64 32)
// OGCG: ashr <4 x i64> %{{.*}}, splat (i64 32)
// OGCG: mul <4 x i64> %{{.*}}, %{{.*}}

return _mm256_mul_epi32(a, b);
}
55 changes: 55 additions & 0 deletions clang/test/CIR/CodeGenBuiltins/X86/avx512f-builtins.c
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -228,3 +228,58 @@ __mmask16 test_kmov_w(__mmask16 A) {
// OGCG: bitcast <16 x i1> {{.*}} to i16
return __builtin_ia32_kmovw(A);
}


__m512i test_mm512_mul_epi32(__m512i __A, __m512i __B) {
// CIR-LABEL: _mm512_mul_epi32
// CIR: [[SC:%.*]] = cir.const #cir.int<32> : !s64i
// CIR: [[SV:%.*]] = cir.vec.splat [[SC]] : !s64i, !cir.vector<8 x !s64i>
// CIR: [[A64:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<16 x !s32i> -> !cir.vector<8 x !s64i>
// CIR: [[B64:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<16 x !s32i> -> !cir.vector<8 x !s64i>
// CIR: [[SHL_A:%.*]] = cir.shift(left, [[A64]] : !cir.vector<8 x !s64i>, [[SV]] : !cir.vector<8 x !s64i>)
// CIR: [[ASHR_A:%.*]] = cir.shift(right, [[SHL_A]] : !cir.vector<8 x !s64i>, [[SV]] : !cir.vector<8 x !s64i>)
// CIR: [[SHL_B:%.*]] = cir.shift(left, [[B64]] : !cir.vector<8 x !s64i>, [[SV]] : !cir.vector<8 x !s64i>)
// CIR: [[ASHR_B:%.*]] = cir.shift(right, [[SHL_B]] : !cir.vector<8 x !s64i>, [[SV]] : !cir.vector<8 x !s64i>)
// CIR: [[MUL:%.*]] = cir.binop(mul, [[ASHR_A]], [[ASHR_B]])

// LLVM-LABEL: _mm512_mul_epi32
// LLVM: shl <8 x i64> %{{.*}}, splat (i64 32)
// LLVM: ashr <8 x i64> %{{.*}}, splat (i64 32)
// LLVM: shl <8 x i64> %{{.*}}, splat (i64 32)
// LLVM: ashr <8 x i64> %{{.*}}, splat (i64 32)
// LLVM: mul <8 x i64> %{{.*}}, %{{.*}}

// OGCG-LABEL: _mm512_mul_epi32
// OGCG: shl <8 x i64> %{{.*}}, splat (i64 32)
// OGCG: ashr <8 x i64> %{{.*}}, splat (i64 32)
// OGCG: shl <8 x i64> %{{.*}}, splat (i64 32)
// OGCG: ashr <8 x i64> %{{.*}}, splat (i64 32)
// OGCG: mul <8 x i64> %{{.*}}, %{{.*}}

return _mm512_mul_epi32(__A, __B);
}


__m512i test_mm512_mul_epu32(__m512i __A, __m512i __B) {
// CIR-LABEL: _mm512_mul_epu32
// CIR: [[MASK_SCALAR:%.*]] = cir.const #cir.int<4294967295> : !s64i
// CIR: [[MASK_VEC:%.*]] = cir.vec.splat [[MASK_SCALAR]] : !s64i, !cir.vector<8 x !s64i>
// CIR: [[BC_A:%.*]] = cir.cast bitcast %{{.*}} : {{.*}} -> !cir.vector<8 x !s64i>
// CIR: [[BC_B:%.*]] = cir.cast bitcast %{{.*}} : {{.*}} -> !cir.vector<8 x !s64i>
// CIR: [[AND_A:%.*]] = cir.binop(and, [[BC_A]], [[MASK_VEC]])
// CIR: [[AND_B:%.*]] = cir.binop(and, [[BC_B]], [[MASK_VEC]])
// CIR: [[MUL:%.*]] = cir.binop(mul, [[AND_A]], [[AND_B]])

// LLVM-LABEL: _mm512_mul_epu32
// LLVM: and <8 x i64> %{{.*}}, splat (i64 4294967295)
// LLVM: and <8 x i64> %{{.*}}, splat (i64 4294967295)
// LLVM: mul <8 x i64> %{{.*}}, %{{.*}}

// OGCG-LABEL: _mm512_mul_epu32
// OGCG: and <8 x i64> %{{.*}}, splat (i64 4294967295)
// OGCG: and <8 x i64> %{{.*}}, splat (i64 4294967295)
// OGCG: mul <8 x i64> %{{.*}}, %{{.*}}

return _mm512_mul_epu32(__A, __B);
}

23 changes: 23 additions & 0 deletions clang/test/CIR/CodeGenBuiltins/X86/sse2-builtins.c
Original file line number Diff line number Diff line change
Expand Up @@ -108,3 +108,26 @@ void test_mm_pause(void) {
// LLVM: call void @llvm.x86.sse2.pause()
// OGCG: call void @llvm.x86.sse2.pause()
}

__m128i test_mm_mul_epu32(__m128i A, __m128i B) {
// CIR-LABEL: _mm_mul_epu32
// CIR: [[MASK_SCALAR:%.*]] = cir.const #cir.int<4294967295> : !s64i
// CIR: [[MASK_VEC:%.*]] = cir.vec.splat [[MASK_SCALAR]] : !s64i, !cir.vector<2 x !s64i>
// CIR: [[BC_A:%.*]] = cir.cast bitcast %{{.*}} : {{.*}} -> !cir.vector<2 x !s64i>
// CIR: [[BC_B:%.*]] = cir.cast bitcast %{{.*}} : {{.*}} -> !cir.vector<2 x !s64i>
// CIR: [[AND_A:%.*]] = cir.binop(and, [[BC_A]], [[MASK_VEC]])
// CIR: [[AND_B:%.*]] = cir.binop(and, [[BC_B]], [[MASK_VEC]])
// CIR: [[MUL:%.*]] = cir.binop(mul, [[AND_A]], [[AND_B]])

// LLVM-LABEL: _mm_mul_epu32
// LLVM: and <2 x i64> %{{.*}}, splat (i64 4294967295)
// LLVM: and <2 x i64> %{{.*}}, splat (i64 4294967295)
// LLVM: mul <2 x i64> %{{.*}}, %{{.*}}

// OGCG-LABEL: _mm_mul_epu32
// OGCG: and <2 x i64> %{{.*}}, splat (i64 4294967295)
// OGCG: and <2 x i64> %{{.*}}, splat (i64 4294967295)
// OGCG: mul <2 x i64> %{{.*}}, %{{.*}}

return _mm_mul_epu32(A, B);
}
45 changes: 45 additions & 0 deletions clang/test/CIR/CodeGenBuiltins/X86/sse41-builtins.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +sse4.1 -fclangir -emit-cir -o %t.cir -Wall -Werror -Wsign-conversion
// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +sse4.1 -fclangir -emit-llvm -o %t.ll -Wall -Werror -Wsign-conversion
// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s

// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +sse4.1 -fclangir -emit-cir -o %t.cir -Wall -Werror -Wsign-conversion
// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +sse4.1 -fclangir -emit-llvm -o %t.ll -Wall -Werror -Wsign-conversion
// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s

// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +sse4.1 -emit-llvm -o - -Wall -Werror -Wsign-conversion | FileCheck %s --check-prefixes=OGCG
// RUN: %clang_cc1 -x c -flax-vector-conversions=none -fms-extensions -fms-compatibility -ffreestanding %s -triple=x86_64-windows-msvc -target-feature +sse4.1 -emit-llvm -o - -Wall -Werror -Wsign-conversion | FileCheck %s --check-prefixes=OGCG
// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +sse4.1 -emit-llvm -o - -Wall -Werror -Wsign-conversion | FileCheck %s --check-prefixes=OGCG
// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -fms-extensions -fms-compatibility -ffreestanding %s -triple=x86_64-windows-msvc -target-feature +sse4.1 -emit-llvm -o - -Wall -Werror -Wsign-conversion | FileCheck %s --check-prefixes=OGCG

#include <immintrin.h>

__m128i test_mm_mul_epi32(__m128i x, __m128i y) {
// CIR-LABEL: _mm_mul_epi32
// CIR: [[SC:%.*]] = cir.const #cir.int<32> : !s64i
// CIR: [[SV:%.*]] = cir.vec.splat [[SC]] : !s64i, !cir.vector<2 x !s64i>
// CIR: [[A64:%.*]] = cir.cast bitcast %{{.*}} : {{.*}} -> !cir.vector<2 x !s64i>
// CIR: [[B64:%.*]] = cir.cast bitcast %{{.*}} : {{.*}} -> !cir.vector<2 x !s64i>
// CIR: [[SHL_A:%.*]] = cir.shift(left, [[A64]] : !cir.vector<2 x !s64i>, [[SV]] : !cir.vector<2 x !s64i>)
// CIR: [[ASHR_A:%.*]] = cir.shift(right, [[SHL_A]] : !cir.vector<2 x !s64i>, [[SV]] : !cir.vector<2 x !s64i>)
// CIR: [[SHL_B:%.*]] = cir.shift(left, [[B64]] : !cir.vector<2 x !s64i>, [[SV]] : !cir.vector<2 x !s64i>)
// CIR: [[ASHR_B:%.*]] = cir.shift(right, [[SHL_B]] : !cir.vector<2 x !s64i>, [[SV]] : !cir.vector<2 x !s64i>)
// CIR: [[MUL:%.*]] = cir.binop(mul, [[ASHR_A]], [[ASHR_B]])

// LLVM-LABEL: _mm_mul_epi32
// LLVM: shl <2 x i64> %{{.*}}, splat (i64 32)
// LLVM: ashr <2 x i64> %{{.*}}, splat (i64 32)
// LLVM: shl <2 x i64> %{{.*}}, splat (i64 32)
// LLVM: ashr <2 x i64> %{{.*}}, splat (i64 32)
// LLVM: mul <2 x i64> %{{.*}}, %{{.*}}

// OGCG-LABEL: _mm_mul_epi32
// OGCG: shl <2 x i64> %{{.*}}, splat (i64 32)
// OGCG: ashr <2 x i64> %{{.*}}, splat (i64 32)
// OGCG: shl <2 x i64> %{{.*}}, splat (i64 32)
// OGCG: ashr <2 x i64> %{{.*}}, splat (i64 32)
// OGCG: mul <2 x i64> %{{.*}}, %{{.*}}

return _mm_mul_epi32(x, y);
}