diff --git a/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp b/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp index b4351684fc418..276adcfc5c6be 100644 --- a/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp @@ -878,10 +878,9 @@ class ScalarExprEmitter : public StmtVisitor { } if (ty.UseExcessPrecision(cgf.getContext())) { - if (ty->getAs()) { - assert(!cir::MissingFeatures::vectorType()); - cgf.cgm.errorNYI("getPromotionType: promotion to vector type"); - return QualType(); + if (auto *vt = ty->getAs()) { + unsigned numElements = vt->getNumElements(); + return ctx.getVectorType(ctx.FloatTy, numElements, vt->getVectorKind()); } return cgf.getContext().FloatTy; } @@ -2356,4 +2355,4 @@ mlir::Value CIRGenFunction::emitScalarPrePostIncDec(const UnaryOperator *e, bool isPre) { return ScalarExprEmitter(*this, builder) .emitScalarPrePostIncDec(e, lv, kind, isPre); -} +} \ No newline at end of file diff --git a/clang/test/CIR/CodeGen/vector-ext.cpp b/clang/test/CIR/CodeGen/vector-ext.cpp index 2231b5e0870de..287d016ff6d1e 100644 --- a/clang/test/CIR/CodeGen/vector-ext.cpp +++ b/clang/test/CIR/CodeGen/vector-ext.cpp @@ -13,6 +13,7 @@ typedef int vi3 __attribute__((ext_vector_type(3))); typedef int vi2 __attribute__((ext_vector_type(2))); typedef float vf4 __attribute__((ext_vector_type(4))); typedef double vd2 __attribute__((ext_vector_type(2))); +typedef _Float16 vh4 __attribute__((ext_vector_type(4))); vi4 vec_a; // CIR: cir.global external @[[VEC_A:.*]] = #cir.zero : !cir.vector<4 x !s32i> @@ -1217,6 +1218,45 @@ void foo22() { // OGCG: %[[RESULT:.*]] = sext <4 x i1> %[[VEC_OR]] to <4 x i32> // OGCG: store <4 x i32> %[[RESULT]], ptr %[[C_ADDR]], align 16 +void foo24() { + vh4 a; + vh4 b; + vh4 c = a + b; +} + +// CIR: %[[A_ADDR:.*]] = cir.alloca !cir.vector<4 x !cir.f16>, !cir.ptr>, ["a"] +// CIR: %[[B_ADDR:.*]] = cir.alloca !cir.vector<4 x !cir.f16>, !cir.ptr>, ["b"] +// CIR: %[[C_ADDR:.*]] = cir.alloca !cir.vector<4 x !cir.f16>, !cir.ptr>, ["c", init] +// CIR: %[[TMP_A:.*]] = cir.load{{.*}} %[[A_ADDR]] : !cir.ptr>, !cir.vector<4 x !cir.f16> +// CIR: %[[TMP_A_F16:.*]] = cir.cast(floating, %[[TMP_A]] : !cir.vector<4 x !cir.f16>), !cir.vector<4 x !cir.float> +// CIR: %[[TMP_B:.*]] = cir.load{{.*}} %[[B_ADDR]] : !cir.ptr>, !cir.vector<4 x !cir.f16> +// CIR: %[[TMP_B_F16:.*]] = cir.cast(floating, %[[TMP_B]] : !cir.vector<4 x !cir.f16>), !cir.vector<4 x !cir.float> +// CIR: %[[RESULT:.*]] = cir.binop(add, %[[TMP_A_F16]], %[[TMP_B_F16]]) : !cir.vector<4 x !cir.float> +// CIR: %[[RESULT_VF16:.*]] = cir.cast(floating, %[[RESULT]] : !cir.vector<4 x !cir.float>), !cir.vector<4 x !cir.f16> +// CIR: cir.store{{.*}} %[[RESULT_VF16]], %[[C_ADDR]] : !cir.vector<4 x !cir.f16>, !cir.ptr> + +// LLVM: %[[A_ADDR:.*]] = alloca <4 x half>, i64 1, align 8 +// LLVM: %[[B_ADDR:.*]] = alloca <4 x half>, i64 1, align 8 +// LLVM: %[[C_ADDR:.*]] = alloca <4 x half>, i64 1, align 8 +// LLVM: %[[TMP_A:.*]] = load <4 x half>, ptr %[[A_ADDR]], align 8 +// LLVM: %[[TMP_A_F16:.*]] = fpext <4 x half> %[[TMP_A]] to <4 x float> +// LLVM: %[[TMP_B:.*]] = load <4 x half>, ptr %[[B_ADDR]], align 8 +// LLVM: %[[TMP_B_F16:.*]] = fpext <4 x half> %[[TMP_B]] to <4 x float> +// LLVM: %[[RESULT:.*]] = fadd <4 x float> %[[TMP_A_F16]], %[[TMP_B_F16]] +// LLVM: %[[RESULT_VF16:.*]] = fptrunc <4 x float> %[[RESULT]] to <4 x half> +// LLVM: store <4 x half> %[[RESULT_VF16]], ptr %[[C_ADDR]], align 8 + +// OGCG: %[[A_ADDR:.*]] = alloca <4 x half>, align 8 +// OGCG: %[[B_ADDR:.*]] = alloca <4 x half>, align 8 +// OGCG: %[[C_ADDR:.*]] = alloca <4 x half>, align 8 +// OGCG: %[[TMP_A:.*]] = load <4 x half>, ptr %[[A_ADDR]], align 8 +// OGCG: %[[TMP_A_F16:.*]] = fpext <4 x half> %[[TMP_A]] to <4 x float> +// OGCG: %[[TMP_B:.*]] = load <4 x half>, ptr %[[B_ADDR]], align 8 +// OGCG: %[[TMP_B_F16:.*]] = fpext <4 x half> %[[TMP_B]] to <4 x float> +// OGCG: %[[RESULT:.*]] = fadd <4 x float> %[[TMP_A_F16]], %[[TMP_B_F16]] +// OGCG: %[[RESULT_VF16:.*]] = fptrunc <4 x float> %[[RESULT]] to <4 x half> +// OGCG: store <4 x half> %[[RESULT_VF16]], ptr %[[C_ADDR]], align 8 + void foo23() { vi4 a; vi4 b; diff --git a/clang/test/CIR/CodeGen/vector.cpp b/clang/test/CIR/CodeGen/vector.cpp index d9206070d7455..d66c7a3d2aba6 100644 --- a/clang/test/CIR/CodeGen/vector.cpp +++ b/clang/test/CIR/CodeGen/vector.cpp @@ -12,6 +12,7 @@ typedef unsigned int uvi4 __attribute__((vector_size(16))); typedef float vf4 __attribute__((vector_size(16))); typedef double vd2 __attribute__((vector_size(16))); typedef long long vll2 __attribute__((vector_size(16))); +typedef _Float16 vh4 __attribute__((vector_size(8))); vi4 vec_a; // CIR: cir.global external @[[VEC_A:.*]] = #cir.zero : !cir.vector<4 x !s32i> @@ -1259,6 +1260,45 @@ void foo25() { // OGCG: %[[RESULT:.*]] = sext <4 x i1> %[[VEC_OR]] to <4 x i32> // OGCG: store <4 x i32> %[[RESULT]], ptr %[[C_ADDR]], align 16 +void foo27() { + vh4 a; + vh4 b; + vh4 c = a + b; +} + +// CIR: %[[A_ADDR:.*]] = cir.alloca !cir.vector<4 x !cir.f16>, !cir.ptr>, ["a"] +// CIR: %[[B_ADDR:.*]] = cir.alloca !cir.vector<4 x !cir.f16>, !cir.ptr>, ["b"] +// CIR: %[[C_ADDR:.*]] = cir.alloca !cir.vector<4 x !cir.f16>, !cir.ptr>, ["c", init] +// CIR: %[[TMP_A:.*]] = cir.load{{.*}} %[[A_ADDR]] : !cir.ptr>, !cir.vector<4 x !cir.f16> +// CIR: %[[TMP_A_F16:.*]] = cir.cast(floating, %[[TMP_A]] : !cir.vector<4 x !cir.f16>), !cir.vector<4 x !cir.float> +// CIR: %[[TMP_B:.*]] = cir.load{{.*}} %[[B_ADDR]] : !cir.ptr>, !cir.vector<4 x !cir.f16> +// CIR: %[[TMP_B_F16:.*]] = cir.cast(floating, %[[TMP_B]] : !cir.vector<4 x !cir.f16>), !cir.vector<4 x !cir.float> +// CIR: %[[RESULT:.*]] = cir.binop(add, %[[TMP_A_F16]], %[[TMP_B_F16]]) : !cir.vector<4 x !cir.float> +// CIR: %[[RESULT_VF16:.*]] = cir.cast(floating, %[[RESULT]] : !cir.vector<4 x !cir.float>), !cir.vector<4 x !cir.f16> +// CIR: cir.store{{.*}} %[[RESULT_VF16]], %[[C_ADDR]] : !cir.vector<4 x !cir.f16>, !cir.ptr> + +// LLVM: %[[A_ADDR:.*]] = alloca <4 x half>, i64 1, align 8 +// LLVM: %[[B_ADDR:.*]] = alloca <4 x half>, i64 1, align 8 +// LLVM: %[[C_ADDR:.*]] = alloca <4 x half>, i64 1, align 8 +// LLVM: %[[TMP_A:.*]] = load <4 x half>, ptr %[[A_ADDR]], align 8 +// LLVM: %[[TMP_A_F16:.*]] = fpext <4 x half> %[[TMP_A]] to <4 x float> +// LLVM: %[[TMP_B:.*]] = load <4 x half>, ptr %[[B_ADDR]], align 8 +// LLVM: %[[TMP_B_F16:.*]] = fpext <4 x half> %[[TMP_B]] to <4 x float> +// LLVM: %[[RESULT:.*]] = fadd <4 x float> %[[TMP_A_F16]], %[[TMP_B_F16]] +// LLVM: %[[RESULT_VF16:.*]] = fptrunc <4 x float> %[[RESULT]] to <4 x half> +// LLVM: store <4 x half> %[[RESULT_VF16]], ptr %[[C_ADDR]], align 8 + +// OGCG: %[[A_ADDR:.*]] = alloca <4 x half>, align 8 +// OGCG: %[[B_ADDR:.*]] = alloca <4 x half>, align 8 +// OGCG: %[[C_ADDR:.*]] = alloca <4 x half>, align 8 +// OGCG: %[[TMP_A:.*]] = load <4 x half>, ptr %[[A_ADDR]], align 8 +// OGCG: %[[TMP_A_F16:.*]] = fpext <4 x half> %[[TMP_A]] to <4 x float> +// OGCG: %[[TMP_B:.*]] = load <4 x half>, ptr %[[B_ADDR]], align 8 +// OGCG: %[[TMP_B_F16:.*]] = fpext <4 x half> %[[TMP_B]] to <4 x float> +// OGCG: %[[RESULT:.*]] = fadd <4 x float> %[[TMP_A_F16]], %[[TMP_B_F16]] +// OGCG: %[[RESULT_VF16:.*]] = fptrunc <4 x float> %[[RESULT]] to <4 x half> +// OGCG: store <4 x half> %[[RESULT_VF16]], ptr %[[C_ADDR]], align 8 + void foo26() { vi4 a; vi4 b; @@ -1297,4 +1337,4 @@ void foo26() { // OGCG: %[[NE_B_ZERO:.*]] = icmp ne <4 x i32> %[[TMP_B]], zeroinitializer // OGCG: %[[VEC_OR:.*]] = and <4 x i1> %[[NE_A_ZERO]], %[[NE_B_ZERO]] // OGCG: %[[RESULT:.*]] = sext <4 x i1> %[[VEC_OR]] to <4 x i32> -// OGCG: store <4 x i32> %[[RESULT]], ptr %[[C_ADDR]], align 16 +// OGCG: store <4 x i32> %[[RESULT]], ptr %[[C_ADDR]], align 16 \ No newline at end of file